pax_global_header00006660000000000000000000000064143003147570014516gustar00rootroot0000000000000052 comment=5c2d0dc7c371db883d807b3e572a02891751e3a2 public-inbox-1.9.0/000077500000000000000000000000001430031475700141205ustar00rootroot00000000000000public-inbox-1.9.0/.editorconfig000066400000000000000000000003501430031475700165730ustar00rootroot00000000000000# EditorConfig configuration for public-inbox # https://EditorConfig.org # Top-most EditorConfig file root = true [*] end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true charset = utf-8 indent_style = tab public-inbox-1.9.0/.gitattributes000066400000000000000000000001051430031475700170070ustar00rootroot00000000000000# Email signatures start with "-- \n" *.eml whitespace=-blank-at-eol public-inbox-1.9.0/.gitignore000066400000000000000000000003301430031475700161040ustar00rootroot00000000000000/.prove /.proverc /config.mak /MANIFEST.gen /Makefile.old /pm_to_blib /MYMETA.* /Makefile /blib /cover_db *.1 *.5 *.7 *.8 *.html *.gz .*.cols .*.check .*.lexgrog /NEWS.html /NEWS.atom /NEWS *.log /lib/PublicInbox.pm public-inbox-1.9.0/AUTHORS000066400000000000000000000003701430031475700151700ustar00rootroot00000000000000This list only includes major contributors. See history in git (via `git clone https://public-inbox.org/public-inbox.git') for a full history of the project. * Eric Wong (BDFL: Bozo Doofus For Life) * The Linux Foundation (v2 work) public-inbox-1.9.0/COPYING000066400000000000000000001033301430031475700151530ustar00rootroot00000000000000 GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software. A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public. The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version. An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU Affero General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Remote Network Interaction; Use with the GNU General Public License. Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a "Source" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements. You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see . public-inbox-1.9.0/Documentation/000077500000000000000000000000001430031475700167315ustar00rootroot00000000000000public-inbox-1.9.0/Documentation/.gitignore000066400000000000000000000001131430031475700207140ustar00rootroot00000000000000/lei*.txt /public-inbox-*.txt /public-inbox.cgi.txt /standards.txt /.*.txt public-inbox-1.9.0/Documentation/RelNotes/000077500000000000000000000000001430031475700204645ustar00rootroot00000000000000public-inbox-1.9.0/Documentation/RelNotes/v1.0.0.eml000066400000000000000000000013351430031475700220070ustar00rootroot00000000000000From e@80x24.org Thu Feb 8 02:33:57 2018 Date: Thu, 8 Feb 2018 02:33:57 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.0.0 Message-ID: <20180208023357.GA32591@80x24.org> After some 3.5 odd years of working on this, I suppose now is as good a time as any to tar this up and call it 1.0.0. The TODO list is still very long and there'll be some new development in coming weeks :> So, here you have a release: https://public-inbox.org/releases/public-inbox-1.0.0.tar.gz Checksums, mainly as a safeguard against accidental file corruption: SHA-256 4a08569f3d99310f713bb32bec0aa4819d6b41871e0421ec4eec0657a5582216 (in other words, don't trust me; instead read the code :>) public-inbox-1.9.0/Documentation/RelNotes/v1.1.0-pre1.eml000066400000000000000000000347621430031475700226670ustar00rootroot00000000000000From e@80x24.org Wed May 9 20:23:03 2018 Date: Wed, 9 May 2018 20:23:03 +0000 From: Eric Wong To: meta@public-inbox.org Cc: Konstantin Ryabitsev Subject: [ANNOUNCE] public-inbox 1.1.0-pre1 Message-ID: <20180509202303.GA15156@dcvr> Pre-release for v2 repository support. Thanks to The Linux Foundation for supporting this work! https://public-inbox.org/releases/public-inbox-1.1.0-pre1.tar.gz SHA-256: d0023770a63ca109e6fe2c58b04c58987d4f81572ac69d18f95d6af0915fa009 (only intended to guard against accidental file corruption) shortlog below: Eric Wong (27): nntp: improve fairness during XOVER and similar commands nntp: do not drain rbuf if there is a command pending extmsg: use news.gmane.org for Message-ID lookups searchview: fix non-numeric comparison mbox: do not barf on queries which return no results nntp: allow and ignore empty commands ensure SQLite and Xapian files respect core.sharedRepository TODO: a few more updates filter/rubylang: do not set altid on spam training import: cleanup git cat-file processes when ->done disallow "\t" and "\n" in OVER headers searchidx: release lock again during v1 batch callback searchidx: remove leftover debugging code convert: copy description and git config from v1 repo view: untangle loop when showing message headers view: wrap To: and Cc: headers in HTML display view: drop redundant References: display code TODO: add EPOLLEXCLUSIVE item searchview: do not blindly append "l" parameter to URL search: avoid repeated mbox results from search msgmap: add limit to response for NNTP thread: prevent hidden threads in /$INBOX/ landing page thread: sort incoming messages by Date searchidx: preserve umask when starting/committing transactions scripts/import_slrnspool: support v2 repos scripts/import_slrnspool: cleanup progress messages public-inbox 1.1.0-pre1 Eric Wong (Contractor, The Linux Foundation) (239): AUTHORS: add The Linux Foundation watch_maildir: allow '-' in mail filename scripts/import_vger_from_mbox: relax From_ line match slightly import: stop writing legacy ssoma.index by default import: begin supporting this without ssoma.lock import: initial handling for v2 t/import: test for last_object_id insertion content_id: add test case searchmsg: add mid_mime import for _extract_mid scripts/import_vger_from_mbox: support --dry-run option import: APIs to support v2 use search: free up 'Q' prefix for a real unique identifier searchidx: fix comment around next_thread_id address: extract more characters from email addresses import: pass "raw" dates to git-fast-import(1) scripts/import_vger_from_mbox: use v2 layout for import import: quiet down warnings from bogus From: lines import: allow the epoch (0s) as a valid time extmsg: fix broken Xapian MID lookup search: stop assuming Message-ID is unique www: stop assuming mainrepo == git_dir v2writable: initial cut for repo-rotation git: reload alternates file on missing blob v2: support Xapian + SQLite indexing import_vger_from_inbox: allow "-V" option import_vger_from_mbox: use PublicInbox::MIME and avoid clobbering v2: parallelize Xapian indexing v2writable: round-robin to partitions based on article number searchidxpart: increase pipe size for partitions v2writable: warn on duplicate Message-IDs searchidx: do not modify Xapian DB while iterating v2/ui: some hacky things to get the PSGI UI to show up v2/ui: retry DB reopens in a few more places v2writable: cleanup unused pipes in partitions searchidxpart: binmode use PublicInbox::MIME consistently searchidxpart: chomp line before splitting searchidx*: name child subprocesses searchidx: get rid of pointless index_blob wrapper view: remove X-PI-TS reference searchidxthread: load doc data for references searchidxpart: force integers into add_message search: reopen skeleton DB as well searchidx: index values in the threader search: use different Enquire object for skeleton queries rename SearchIdxThread to SearchIdxSkeleton v2writable: commit to skeleton via remote partitions searchidxskeleton: extra error checking searchidx: do not modify Xapian DB while iterating search: query_xover uses skeleton DB iff available v2/ui: get nntpd and init tests running on v2 v2writable: delete ::Import obj when ->done search: remove informational "warning" message searchidx: add PID to error message when die-ing content_id: special treatment for Message-Id headers evcleanup: disable outside of daemon v2writable: deduplicate detection on add evcleanup: do not create event loop if nothing was registered mid: add `mids' and `references' methods for extraction content_id: use `mids' and `references' for MID extraction searchidx: use new `references' method for parsing References content_id: no need to be human-friendly v2writable: inject new Message-IDs on true duplicates search: revert to using 'Q' as a uniQue id per-Xapian conventions searchidx: support indexing multiple MIDs mid: be strict with References, but loose on Message-Id searchidx: avoid excessive XNQ indexing with diffs searchidxskeleton: add a note about locking v2writable: generated Message-ID goes first searchidx: use add_boolean_term for internal terms searchidx: add NNTP article number as a searchable term mid: truncate excessively long MIDs early nntp: use NNTP article numbers for lookups nntp: fix NEWNEWS command searchidx: store the primary MID in doc data for NNTP import: consolidate object info for v2 imports v2: avoid redundant/repeated configs for git partition repos INSTALL: document more optional dependencies search: favor skeleton DB for lookup_mail search: each_smsg_by_mid uses skeleton if available v2writable: remove unnecessary skeleton commit favor Received: date over Date: header globally import: fall back to Sender for extracting name and email scripts/import_vger_from_mbox: perform mboxrd or mboxo escaping v2writable: detect and use previous partition count extmsg: rework partial MID matching to favor current inbox extmsg: rework partial MID matching to favor current inbox content_id: use Sender header if From is not available v2writable: support "barrier" operation to avoid reforking use string ref for Email::Simple->new v2writable: remove unnecessary idx_init call searchidx: do not delete documents while iterating search: allow ->reopen to be chainable v2writable: implement remove correctly skeleton: barrier init requires a lock import: (v2) delete writes the blob into history in subdir import: (v2): write deletes to a separate '_' subdirectory import: implement barrier operation for v1 repos mid: mid_mime uses v2-compatible mids function watchmaildir: use content_digest to generate Message-Id import: force Message-ID generation for v1 here import: switch to URL-safe Base64 for Message-IDs v2writable: test for idempotent removals import: enable locking under v2 index: s/GIT_DIR/REPO_DIR/ Lock: new base class for writable lockers t/watch_maildir: note the reason for FIFO creation v2writable: ensure ->done is idempotent watchmaildir: support v2 repositories searchidxpart: s/barrier/remote_barrier/ v2writable: allow disabling parallelization scripts/import_vger_from_mbox: filter out same headers as MDA v2writable: add DEBUG_DIFF env support v2writable: remove "resent" message for duplicate Message-IDs content_id: do not take Message-Id into account introduce InboxWritable class import: discard all the same headers as MDA InboxWritable: add mbox/maildir parsing + import logic use both Date: and Received: times msgmap: add tmp_clone to create an anonymous copy fix syntax warnings v2writable: support reindexing Xapian t/altid.t: extra tests for mid_set v2writable: add NNTP article number regeneration support v2writable: clarify header cleanups v2writable: DEBUG_DIFF respects $TMPDIR feed: $INBOX/new.atom endpoint supports v2 inboxes import: consolidate mid prepend logic, here www: $MESSAGE_ID/raw endpoint supports "duplicates" search: reopen DB if each_smsg_by_mid fails t/psgi_v2: minimal test for Atom feed and t.mbox.gz feed: fix new.html for v2 view: permalink (per-message) view shows multiple messages searchidx: warn about vivifying multiple ghosts v2writable: warn on unseen deleted files www: get rid of unnecessary 'inbox' name reference searchview: remove unnecessary imports from MID module view: depend on SearchMsg for Message-ID http: fix modification of read-only value githttpbackend: avoid infinite loop on generic PSGI servers www: support cloning individual v2 git partitions http: fix modification of read-only value githttpbackend: avoid infinite loop on generic PSGI servers www: remove unnecessary ghost checks v2writable: append, instead of prepending generated Message-ID lookup by Message-ID favors the "primary" one www: fix attachment downloads for conflicted Message-IDs searchmsg: document why we store To: and Cc: for NNTP public-inbox-convert: tool for converting old to new inboxes v2writable: support purging messages from git entirely search: cleanup uniqueness checking search: get rid of most lookup_* subroutines search: move find_doc_ids to searchidx v2writable: cleanup: get rid of unused fields mbox: avoid extracting Message-ID for linkification www: cleanup expensive fallback for legacy URLs view: get rid of some unnecessary imports search: retry_reopen on first_smsg_by_mid import: run_die supports redirects as spawn does v2writable: initializing an existing inbox is idempotent public-inbox-compact: new tool for driving xapian-compact mda: support v2 inboxes search: warn on reopens and die on total failure v2writable: allow gaps in git partitions v2writable: convert some fatal reindex errors to warnings wwwstream: flesh out clone instructions for v2 v2writable: go backwards through alternate Message-IDs view: speed up homepage loading time with date clamp view: drop load_results feed: optimize query for feeds, too msgtime: parse 3-digit years properly convert: avoid redundant "done\n" statement for fast-import search: move permissions handling to InboxWritable t/v2writable: use simplify permissions reading v2: respect core.sharedRepository in git configs searchidx: correct warning for over-vivification v2: one file, really v2writable: fix parallel termination truncate Message-IDs and References consistently scripts/import_vger_from_mbox: set address properly search: reduce columns stored in Xapian replace Xapian skeleton with SQLite overview DB v2writable: simplify barrier vs checkpoints t/over: test empty Subject: line matching www: rework query responses to avoid COUNT in SQLite over: speedup get_thread by avoiding JOIN nntp: fix NEWNEWS command t/thread-all.t: modernize test to support modern inboxes rename+rewrite test using Benchmark module nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster view: avoid offset during pagination mbox: remove remaining OFFSET usage in SQLite msgmap: replace id_batch with ids_after nntp: simplify the long_response API searchidx: ensure duplicated Message-IDs can be linked together init: s/GIT_DIR/REPO_DIR/ in usage import: rewrite less history during purge v2: support incremental indexing + purge v2writable: do not modify DBs while iterating for ->remove v2writable: recount partitions after acquiring lock searchmsg: remove unused `tid' and `path' methods search: remove unnecessary OP_AND of query mbox: do not sort search results searchview: minor cleanup support altid mechanism for v2 compact: better handling of over.sqlite3* files v2writable: remove redundant remove from Over DB v2writable: allow tracking parallel versions v2writable: refer to git each repository as "epoch" over: use only supported and safe SQLite APIs search: index and allow searching by date-time altid: fix miscopied field name nntp: set Xref across multiple inboxes www: favor reading more from SQLite, and less from Xapian ensure Xapian and SQLite are still optional for v1 tests psgi: ensure /$INBOX/$MESSAGE_ID/T/ endpoint is chronological over: avoid excessive SELECT over: remove forked subprocess v2writable: reduce barriers index: allow specifying --jobs=0 to disable multiprocess convert: support converting with altid defined store less data in the Xapian document msgmap: speed up minmax with separate queries feed: respect feedmax, again v1: remove articles from overview DB compact: do not merge v2 repos by default v2writable: reduce partititions by one search: preserve References in Xapian smsg for x=t view v2: generate better Message-IDs for duplicates v2: improve deduplication checks import: cat_blob drops leading 'From ' lines like Inbox searchidx: regenerate and avoid article number gaps on full index extmsg: remove expensive git path checks use %H consistently to disable abbreviations searchidx: increase term positions for all text terms searchidx: revert default BATCH_BYTES to 1_000_000 Merge remote-tracking branch 'origin/master' into v2 fix tests to run without Xapian installed extmsg: use Xapian only for partial matches Jonathan Corbet (3): Don't use LIMIT in UPDATE statements Update the installation instructions with Fedora package names Allow specification of the number of search results to return -- git clone https://public-inbox.org/ public-inbox (working on a homepage... sorta :) public-inbox-1.9.0/Documentation/RelNotes/v1.2.0.eml000066400000000000000000000065771430031475700220260ustar00rootroot00000000000000From e@80x24.org Sun Nov 3 03:12:41 2019 Date: Sun, 3 Nov 2019 03:12:41 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.2.0 Content-Type: text/plain; charset=utf-8 Message-ID: <20191103030500.public-inbox-1.2.0@released> * first non-pre/rc release with v2 format support for scalability. See public-inbox-v2-format(5) manpage for more details. * new admin tools for v2 inboxes: - public-inbox-convert - converts v1 to v2 repo formats - public-inbox-compact - v2 convenience wrapper for xapian-compact(1) - public-inbox-purge - purges entire messages out of v2 history - public-inbox-edit - edits sensitive data out messages from v2 history - public-inbox-xcpdb - copydatabase(1) wrapper to upgrade Xapian formats (e.g. from "chert" to "glass") and resharding of v2 inboxes * SQLite3 support decoupled from Xapian support, and Xapian DBs may be configured without phrase support to save space. See "indexlevel" in public-inbox-config(5) manpage for more info. * codebase now uses Perl 5.10.1+ features (e.g. "//") * public-inbox-nntpd - support STARTTLS and NNTPS - support COMPRESS extension - fix several RFC3977 compliance bugs - improved interopability with picky clients such as leafnode and Alpine * public-inbox-watch - support multiple spam training directories - support mapping multiple inboxes per Maildir - List-ID header support (see "listid" in public-inbox-config(5)) * public-inbox-mda - List-ID header support (see above) * PublicInbox::WWW - grokmirror-compatible manifest.js.gz endpoint generation - user-configurable color support in $INBOX_URL/_/text/color/ - BOFHs may set default colors via "publicinbox.css" (see public-inbox-config(5)) - ability to map git code repositories and run cgit (see "coderepo" and "cgitrc" in public-inbox-config(5)) - able to recreate blobs with coderepo associations and Xapian - search results may be reversed - reduce memory usage when rendering large threads - syntax highlighting for patches and blobs * public-inbox-httpd / public-inbox-nntpd: - lower memory usage in C10K scenarios - buffers slow clients to filesystem (TMPDIR) instead of RAM - improved FreeBSD support - Danga::Socket is no longer a runtime dependency * many documentation updates, new manpages for: - PublicInbox::SaPlugin::ListMirror - public-inbox-init - public-inbox-learn * workaround memory leaks on Perl 5.16.3 (on CentOS/RHEL 7.x) Thanks to Ali Alnubani, Alyssa Ross, Amitai Schleier, Dave Taht, Dmitry Alexandrov, Eric W. Biederman, Jan Kiszka, Jonathan Corbet, Kyle Meyer, Leah Neukirchen, Mateusz Łoskot, Nicolás Ojeda Bär, SZEDER Gábor, Urs Janßen, Wang Kang, and edef for all their help, bug reports, patches and suggestions. Special thanks to Konstantin Ryabitsev and The Linux Foundation for their sponsorship and support over the past two years. https://public-inbox.org/releases/public-inbox-1.2.0.tar.gz SHA256: dabc735a5cfe396f457ac721559de26ae38abbaaa74612eb786e9e2e1ca94269 Chances are: You don't know me and never will. Everybody else can verify the tarball and sign a reply saying they've verified it, instead. The more who do this, the better, but don't trust the BOFH :P See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.9.0/Documentation/RelNotes/v1.3.0.eml000066400000000000000000000063571430031475700220230ustar00rootroot00000000000000From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.3.0 Date: Mon, 10 Feb 2020 05:52:41 +0000 Message-Id: <20200210055200.public-inbox-1.3.0-rele@sed> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Many internal improvements to improve the developer experience, long-term maintainability, ease-of-installation and compatibility. There are also several bugfixes. Some of the internal improvements involve avoiding Perl startup time in tests. "make check" now runs about 50% faster than before, and the new "make check-run" can be around 30% faster than "make check" after being primed by "make check". Most closures (anonymous subroutines) are purged from the -nntpd, -httpd and WWW code paths to make checking for memory leaks easier. * documentation now builds on BSD make * Date::Parse (TimeDate CPAN distribution) is now optional, allowing installation from OpenBSD systems via "pkg". * the work-in-progress Xapian.pm SWIG bindings are now supported in addition to the traditional Search::Xapian XS bindings. Only the SWIG bindings are packaged for OpenBSD. * Plack is optional for users who wish to avoid web-related components * Filesys::Notify::Simple is optional for non-watch users (but Plack will still pull it in) * improved internal error checking and reporting in numerous places * fixed Perl 5.10.1 compatibility (tested with Devel::PatchPerl) * IPC::Run and XML::Feed are no longer used in tests, though XML::TreePP becomes an optional test dependency. * Email::Address::XS used if available (newer Email::MIME requires it), it should handle more corner cases. * PublicInbox::WWW: - "nested" search results page now shows relevancy percentages - many solver bugs fixed - solver works on "-U0" patches using "git apply --unidiff-zero" - solver now compatible with git < v1.8.5 (but >= v1.8.0) - raw HTML no longer shown inline in multipart/alternative messages (v1.2.0 regression) - reduced memory usage for displaying multipart messages - static file responses support Last-Modified/If-Modified-Since - avoid trailing underlines in diffstat linkification - more consistent handling of messages without Subjects * public-inbox-httpd / public-inbox-nntpd: - MSG_MORE used consistently in long responses - fixed IO::KQueue usage on *BSDs - listen sockets are closed immediately on graceful shutdown - missed signals avoided with signalfd or EVFILT_SIGNAL - Linux x32 ABI support * public-inbox-nntpd: - Y2020 workaround for Time::Local * public-inbox-watch: - avoid memory leak from cyclic reference on SIGHUP - fix documentation of publicinboxwatch.watchspam * public-inbox-convert: - avoid article number jumps when converting indexed v1 inboxes * public-inbox-compact / public-inbox-xcpdb: - concurrent invocations of -compact and -xcpdb commands, not just -mda, -watch, -learn, -purge * examples/unsubscribe.milter: - support unique mailto: unsubscribe Release tarball available for download at: https://public-inbox.org/public-inbox.git/snapshot/public-inbox-1.3.0.tar.gz Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.9.0/Documentation/RelNotes/v1.4.0.eml000066400000000000000000000060371430031475700220170ustar00rootroot00000000000000Date: Fri, 17 Apr 2020 08:48:59 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.4.0 Message-ID: <20200417084800.public-inbox-1.4.0-rele@sed> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline This release focuses on reproducibility improvements and bugfixes for corner-cases. Busy instances of PublicInbox::WWW may also notice memory usage reductions. For rare messages lacking Date and/or Received headers, mirrors now fall back to using the git author/commit times to reindex them. This ensures search and filtering queries behave identically on mirrors as they do on the original machine. "altid" SQLite dumps are now accessible to all over the WWW interface via `POST /$INBOX/$ALTID.sql.gz'. Busy instances of PublicInbox::WWW (whether via public-inbox-httpd or another PSGI server) may notice significant memory usage reductions from the single message "permalink" lifetime optimization. There also ongoing work to improve memory lifetime management to reduce the potential for memory fragmentation in daemons. * general changes: - `include.*' directives in the public-inbox-config(5) file are now honored as documented in git-config(1), thanks to Andreas Rottmann. - `+0000' is assumed for dates missing TZ offsets; thanks to Leah Neukirchen for spotting this regression from v1.2.0. - `<' and `>' characters are dropped to avoid errors in git in addresses for git, thanks again to Leah for noticing this long-standing bug. * PublicInbox::WWW: - memory reductions for message display and rendering - code preload improved to reduce memory fragmentation - remove redundant "a=" parameter in links to solver - escape '&' in hrefs properly - fix optional address obfuscation in search results - `POST /$INBOX/$ALTID.sql.gz' endpoint to retrieve SQLite dumps * public-inbox-httpd + public-inbox-nntpd: - fix SIGUSR2 upgrade in worker-less instances (-W0) * public-inbox-httpd: - fix RFC 7230 conformance when Content-Length and "chunked" are both specified * public-inbox-index: - reproduce original date and time stamps in mirrors for messages lacking Date: and/or Received: headers - new `--compact' (or `-c') switch to perform the equivalent of public-inbox-compact(1) after indexing each inbox * documentation: - add Documentation/technical/data_structures.txt for new hackers * scripts/import_vger_from_mbox: (not really a production-level script) - fix ">From" unescaping thanks to a bug report from Kyle Meyer Thanks to Andreas Rottmann, Leah Neukirchen and Kyle Meyer for their contributions to this release. Release tarball available for download over HTTPS or Tor .onion: https://yhbt.net/public-inbox.git/snapshot/public-inbox-1.4.0.tar.gz http://ou63pmih66umazou.onion/public-inbox.git/snapshot/public-inbox-1.4.0.tar.gz Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.9.0/Documentation/RelNotes/v1.5.0.eml000066400000000000000000000040251430031475700220130ustar00rootroot00000000000000From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.5.0 Date: Sun, 10 May 2020 07:04:00 +0000 Message-ID: <20200510.public-inbox-1.5.0-rele@sed> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline This release introduces a new pure-Perl lazy email parser, PublicInbox::Eml, which uses roughly 10% less memory and is up to 2x faster than Email::MIME. This is a major internal change Limits commonly enforced by MTAs are also enforced in the new parser, as messages may bypass MTA transports. Email::MIME and other Email::* modules are no longer dependencies nor used at all outside of maintainer validation tests. * public-inbox-index - `--max-size=SIZE' CLI switch and `publicinbox.indexMaxSize' config file option added to prevent indexing of overly large messages. - List-Id headers are indexed in new messages, old messages can be found after `--reindex'. * public-inbox-watch - multiple values of `publicinbox..watchheader' are now supported, thanks to Kyle Meyer - List-Id headers are matched case-insensitively as specified by RFC 2919 * PublicInbox::WWW - $INBOX_DIR/description and $INBOX_DIR/cloneurl are not memoized if missing - improved display of threads, thanks to Kyle Meyer - search for List-Id is available via `l:' prefix if indexed - all encodings are preloaded at startup to reduce fragmentation - diffstat linkification and highlighting are stricter and less likely to linkify tables in cover letters - fix hunk header links to solver which were off-by-one line, thanks again to Kyle Meyer Release tarball available for download over HTTPS or Tor .onion: https://yhbt.net/public-inbox.git/snapshot/public-inbox-1.5.0.tar.gz http://ou63pmih66umazou.onion/public-inbox.git/snapshot/public-inbox-1.5.0.tar.gz Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.9.0/Documentation/RelNotes/v1.6.0.eml000066400000000000000000000133221430031475700220140ustar00rootroot00000000000000From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.6.0 Date: Wed, 16 Sep 2020 20:03:09 +0000 Message-ID: <20200916200309.public-inbox-1.6.0-rele@sed> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline A big release containing several performance optimizations, a new anonymous IMAP server, and more. It represents an incremental improvement over 1.5 in several areas with more to come in 1.7. The read-only httpd and nntpd daemons no longer block the event loop when retrieving blobs from git, making better use of SMP systems while accomodating slow storage. Indexing can be now be tuned to give somewhat usable performance on HDD storage, though we can't defy the laws of physics, either. * General changes: - ~/.cache/public-inbox/inline-c is automatically used for Inline::C if it exists. PERL_INLINE_DIRECTORY in env remains supported and prioritized to support `nobody'-type users without HOME. - msgmap.sqlite3 uses journal_mode=TRUNCATE, matching over.sqlite3 behavior for a minor reduction in VFS traffic - public-inbox-tuning(7) - new manpage containing pointers to various tuning options and tips for certain HW and OS setups. - Copy-on-write is disabled on BTRFS for new indices to avoid fragmentation. See the new public-inbox-tuning(7) manpage. - message/{rfc822,news,global} attachments are decoded recursively and indexed for search. Reindexing (see below) is required to ensure these attachments are indexed in old messages. - inbox.lock (v2) and ssoma.lock (v1) files are written to on message delivery (or spam removal) to wake up read-only daemons via inotify or kqueue. - `--help' switch supported by command-line tools * Upgrading for new features in 1.6 The ordering of these steps is only necessary if you intend to use some new features in 1.6. Future releases may have different instructions (or be entirely transparent). 0. install (use your OS package manager, or "make install") 1. restart public-inbox-watch instances if you have any 2. Optional: remove Plack::Middleware::Deflater if you're using a custom .psgi file for PublicInbox::WWW. This only saves some memory and CPU cycles, and you may also skip this step if you expect to roll back to 1.5.0 for any reason. Steps 3a and 3b may happen in any order, 3b is optional and is only required to use new WWW and IMAP features. 3a. restart existing read-only daemons if you have them (public-inbox-nntpd, public-inbox-httpd) 3b. run "public-inbox-index -c --reindex --rethread --all" to reindex all configured inboxes 4. configure and start the new public-inbox-imapd. This requires reindexing in 3b, but there's no obligation to run an IMAP server, either. * public-inbox-index There are several new options to improve usability on slow, rotational storage. - `--batch-size=BYTES' or publicinbox.indexBatchSize parameter to reduce frequency of random writes on HDDs - `--sequential-shard' or publicInbox.sequentialShard parameter to improve OS page cache utilization on HDDs. - `--no-fsync' when combined with Xapian 1.4+ can be used to speed up indexing on SSDs and small (default) `--batch-size' - `--rethread' option to go with `--reindex' (use sparringly, see manpage) - parallelize v2 updates by default, `--sequential-shard' and `-j0' is (once again) allowed to disable parallelization - (re-)indexing parallelizes blob reads from git - `--all' may be specified to index all configured inboxes * public-inbox-learn - `rm' supports `--all' to remove from all configured inboxes * public-inbox-imapd - new read-only IMAP daemon similar to public-inbox-nntpd `AUTH=ANONYMOUS' is supported, but any username and password for clients without `AUTH=ANONYMOUS' support. * public-inbox-nntpd - blob reads from git are handled asynchronously * public-inbox-httpd - Plack::Middleware::Deflater is no longer loaded by default when no .psgi file is specified; PublicInbox::WWW can rely on gzip for buffering (see below) * PublicInbox::WWW - use consistent blank line around attachment links - Attachments in message/{rfc822,news,global} messages can be individually downloaded. Downloading the entire message/rfc822 file in full remains supported - $INBOX_DIR/description is treated as UTF-8 - HTML, Atom, and text/plain responses are gzipped without relying on Plack::Middleware::Deflater - Multi-message endpoints (/t.mbox.gz, /T/, /t/, etc) are ~10% faster when running under public-inbox-httpd with asynchronous blob retrieval - mbox search results may now include all messages pertaining to that thread. Needs `--reindex' mentioned above in `Upgrading for new features in 1.6'. - fix mbox.gz search results downloads for lynx users - small navigation tweaks, more prominent mirroring instructions * public-inbox-watch - Linux::Inotify2 or IO::KQueue is used directly, Filesys::Notify::Simple is no longer required - NNTP groups and IMAP mailboxes may be watched in addition to Maildirs (lightly tested). * Ongoing internal changes - reduce event loop hogging for many-inbox support - use more Perl v5.10-isms, future-proof against Perl 8 - more consistent variable and field naming, improve internal documentation and comments - start supporting >=40 char git identifiers for SHA-256 - test -httpd-specific code paths via Plack::Test::ExternalServer in addition to generic PSGI paths. Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.9.0/Documentation/RelNotes/v1.6.1.eml000066400000000000000000000042621430031475700220200ustar00rootroot00000000000000From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.6.1 Date: Thu, 31 Dec 2020 23:45:56 +0000 Message-ID: <20201231234556.public-inbox-1.6.1-rele@sed> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline A small, bugfix release on top of 1.6.0 from September 2020. Bug fixes: * MIME header decoding no longer warns on undefined variables, with Perl <5.28. Thanks to a bug report by Ali Alnubani. https://public-inbox.org/meta/DM6PR12MB49106F8E3BD697B63B943A22DADB0@DM6PR12MB4910.namprd12.prod.outlook.com/ * Fixed a message threading bug thanks to a report from Kyle Meyer. "public-inbox-index --rethread --reindex" will be necessary in case of certain messages arrive out-of-order. Link: https://public-inbox.org/meta/87360nlc44.fsf@kyleam.com/ * WWW: per-inbox grokmirror manifests no longer return info for all inboxes, only the root /manifest.js.gz includes all inboxes. This regression appeared in 1.6. * public-inbox-mda matches List-Id headers insensitively, matching public-inbox-watch behavior. Similarly, List-Id is always indexed lower-cased for boolean matches to avoid matching an incorrect term. * Newsgroup and Path NNTP headers are now emitted in conformance with RFC 5536 3.1.[45]. Thanks to Andrey Melnikov for the report: https://public-inbox.org/meta/CA+PODjpUN5Q4gBFQhAzUNuMasVEdmp9f=8Uo0Ej0mFumdSwi4w@mail.gmail.com/ * Inotify fixes for public-inbox-imapd users relying on SIGHUP reloads and thousands of watches. * Read-only daemon fixes around TLS and Linux <4.5 systems Bugfixes with minor behavior changes: * The X-Status mbox header is now excluded from imports, just like the Status: header has been for many years. They have no place in public archives and can be privacy concern for people sharing archives. * WWW prevents deep-linking to attachments to limit abuse vectors. Noticed by Leah Neukirchen: https://public-inbox.org/meta/87imagyap9.fsf@vuxu.org/ There are also several ocumentation fixes from Uwe Kleine-König and Kyle Meyer. Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. public-inbox-1.9.0/Documentation/RelNotes/v1.7.0.eml000066400000000000000000000064031430031475700220170ustar00rootroot00000000000000From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.7.0 Date: Thu, 04 Nov 2021 07:52:00 +0000 MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Message-ID: <2021-11-04-public-inbox-1.7.0-finally-rele@sed> Content-Disposition: inline Another big release focused on multi-inbox search and scalability. Special thanks to Konstantin Ryabitsev and Kyle Meyer for numerous bug reports and documentation help. * general changes - config file parsing is 2x faster with 50K inboxes - deduplication ignores whitespace differences within address fields - "PRAGMA optimize" is now issued on commits for SQLite 3.18+ * public-inbox-extindex A new Xapian + SQLite index able to search across several inboxes. This may be configured to replace per-inbox Xapian DBs, (but not per-inbox SQLite indices) and speed up manifest.js.gz generation. See public-inbox-extindex-format(5) and public-inbox-extindex(1) manpages for more details. Using it with "--all" speeds up various multi-inbox operations in PublicInbox::WWW, public-inbox-nntpd, and public-inbox-imapd. * read-only public-inbox-daemon (-httpd, -nntpd, -imapd): libgit2 may be used via Inline::C to avoid hitting system pipe and process limits. See public-inbox-tuning(7) manpage for more details. * various memory usage reductions and workarounds for leaks in Encode <3.15, these mainly affect PublicInbox::WWW * public-inbox-nntpd - startup is 6x faster with 50K inboxes if using -extindex * PublicInbox::WWW - mboxrd search results are returned in reverse Xapian docid order, so more recent results are more likely to show up first - d: and dt: search prefixes allow "approxidate" formats supported by "git log --since=" - manifest.js.gz generation is ~25x faster with -extindex - minor navigation improvements in search results HTML page * lei - local email interface An experimental, subject-to-change, likely-to-eat-your-mail tool for personal mail as well as interacting with public-inboxes on the local filesystem or over HTTP(S). See lei(1), lei-overview(7), and other lei-* manpages for details. This is only ready-to-use w.r.t. external public-inbox instances, but mail synchronization for personal mail remains clunky. * public-inbox-index - non-strict (Subject-based) threading supports non-ASCII characters, reindexing is necessary for old messages with non-ASCII subjects. - --batch-size is now 8M on 64-bit systems for throughput improvements, higher values are still advised for more powerful hardware. * public-inbox-watch - IMAP and NNTP code shared with lei, fixing an off-by-one error in IMAP synchronization for single-message IMAP folders. - \Deleted and \Draft messages ignored for IMAP, as they are for Maildir. - IMAP and NNTP connection establishment (including git-credential prompts) ordering is now tied to config file order. Compatibility: * Rollbacks all the way to public-inbox 1.2.0 remain supported Internal changes * public-inbox-index switched to new internal IPC code shared with lei Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.9.0/Documentation/RelNotes/v1.8.0.eml000066400000000000000000000031411430031475700220140ustar00rootroot00000000000000From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.8.0 Date: Sat, 23 Apr 2022 08:22:59 +0000 MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline Message-ID: <2022-04-23-public-inbox-1.8.0-released@finally> A minor release focused on bugfixes and minor improvements. Upgrades should happen transparently, but downgrading back to 1.7.0 will likely cause problems for lei users (and only lei users). lei users may experience duplicate messages in Maildirs if attempting to downgrade from 1.8.0 to 1.7.x. public-inbox-* tools are unaffected and may downgrade freely. Bugfixes: Numerous test fixes thanks to NixOS developers. Long-running daemons are more robust in case of corrupt blobs or crashes of git-cat-file processes PublicInbox::WWW: all CR are removed before LF, fixing display of CR-CR-LF messages. Solver supports SHA-256 code repositories (inbox and lei store support is still pending). Internal updates: Reduced dependencies on Inline::C for Linux users; Linux users may now use lei with neither Inline::C nor Socket::MsgHdr installed. New features: The --dangerous flag is now supported in public-inbox-index and public-inbox-extindex to use the Xapian::DB_DANGEROUS flag for initial indexes. This may reduce SSD/HDD wear at the expense of disallowing concurrency and data integrity in case of an unexpected shutdown. Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.9.0/Documentation/RelNotes/v1.9.0.eml000066400000000000000000000043571430031475700220270ustar00rootroot00000000000000From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.9.0 Date: Sun, 21 Aug 2022 02:36:59 +0000 MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline Message-ID: <2022-08-21T023659Z-public-inbox-1.9.0-rele@sed> Upgrading: lei users need to "lei daemon-kill" after installation to load new code. Normal daemons (read-only, and public-inbox-watch) will also need restarts, of course, but there's no backwards-incompatible data format changes so rolling back to older versions is harmless. Major bugfixes: * lei no longer freezes from inotify/EVFILT_VNODE handling, user interrupts (Ctrl-C), nor excessive errors/warnings * IMAP server fairness improved to avoid excessive blob prefetch New features: * POP3 server support added, use either public-inbox-pop3d or the new public-inbox-netd superserver * public-inbox-netd superserver supporting any combination of HTTP, IMAP, POP3, and NNTP services; simplifying management and allowing more sharing of memory used for various data structures. * public-inbox-httpd and -netd support per-listener .psgi files * SIGHUP reloads TLS certs and keys in addition to config and .psgi files * "lei reindex" command for lei users to update personal index in ~/.local/share/lei/store for search improvements below: Search improvements: These will require --reindex with public-inbox-index and/or public-inbox-extindex for public inboxes. * patchid: prefix search support added to WWW and lei for "git patch-id --stable" support * text inside base-85 binary patches are no longer indexed to avoid false positives * for lei users, "lei reindex" now exists and is required to take advantage of aforementioned indexing changes Performance improvements: * IMAP server startup is faster with many mailboxes when using "public-inbox-extindex --all" * NNTP group listings are also faster with many inboxes when using "public-inbox-extindex --all" * various small opcode and memory usage reductions Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.9.0/Documentation/clients.txt000066400000000000000000000027701430031475700211410ustar00rootroot00000000000000clients and tools related to public-inbox ----------------------------------------- While public-inbox exposes NNTP and gzipped mboxrd over HTTP, some public-inbox-specific/aware tools have sprung up. Below is a non-exhaustive list of them. Feel free to send additions, corrections and discussions to meta@public-inbox.org Discussions will be visible from our own public-inbox instance: https://public-inbox.org/meta/ Disclaimer: public-inbox itself comes with no warranty or guarantees; so don't treat any of these links as endorsements, either. * l2md - Maildir and procmail importer using C + libgit2 https://git.kernel.org/pub/scm/linux/kernel/git/dborkman/l2md.git * b4 - helper utility for patch-based workflows https://git.kernel.org/pub/scm/utils/b4/b4.git * impibe - Perl script to import v1 or v2 to Maildir https://leahneukirchen.org/dotfiles/bin/impibe discussion: https://public-inbox.org/meta/87v9m0l8t1.fsf@vuxu.org/ * kernel.org helpers - various scripts used by *.kernel.org https://git.kernel.org/pub/scm/linux/kernel/git/mricon/korg-helpers.git * grokmirror - git mirroring tool (not public-inbox-specific) https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git * ssoma - v1 only, abandoned in favor of NNTP https://80x24.org/ssoma.git * piem - Emacs tools for working with public-index (and b4) https://git.kyleam.com/piem/about/ There's also a bunch of random scripts in the scripts/ directory of our source tree at: git clone https://public-inbox.org/public-inbox.git public-inbox-1.9.0/Documentation/common.perl000077500000000000000000000035531430031475700211160ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use Fcntl qw(SEEK_SET); my $have_search = eval { require PublicInbox::Search; 1 }; my $addr = 'meta@public-inbox.org'; for my $pod (@ARGV) { open my $fh, '+<', $pod or die "open($pod): $!"; my $s = do { local $/; <$fh> } // die "read $!"; my $orig = $s; $s =~ s!^=head1 COPYRIGHT\n.+?^=head1([^\n]+)\n!=head1 COPYRIGHT Copyright all contributors L License: AGPL-3.0+ L =head1$1 !ms; $s =~ s!^=head1 CONTACT\n.+?^=head1([^\n]+)\n!=head1 CONTACT Feedback welcome via plain-text mail to L The mail archives are hosted at L and L =head1$1 !ms; $have_search and $s =~ s!^=for\scomment\n ^AUTO-GENERATED-SEARCH-TERMS-BEGIN\n .+? ^=for\scomment\n ^AUTO-GENERATED-SEARCH-TERMS-END\n !search_terms()!emsx; $s =~ s/[ \t]+$//sgm; if ($s eq $orig) { my $t = time; utime($t, $t, $fh); } else { seek($fh, 0, SEEK_SET) or die "seek: $!"; truncate($fh, 0) or die "truncate: $!"; print $fh $s or die "print: $!"; close $fh or die "close: $!"; } } sub search_terms { my $help = eval('\@PublicInbox::Search::HELP'); my $s = ''; my $pad = 0; my $i; for ($i = 0; $i < @$help; $i += 2) { my $pfx = $help->[$i]; my $n = length($pfx); $pad = $n if $n > $pad; $s .= $pfx . "\0"; $s .= $help->[$i + 1]; $s .= "\f\n"; } $pad += 2; my $padding = ' ' x ($pad + 4); $s =~ s/^/$padding/gms; $s =~ s/^$padding(\S+)\0/" $1".(' ' x ($pad - length($1)))/egms; $s =~ s/\f\n/\n/gs; $s =~ s/^ //gms; substr($s, 0, 0, "=for comment\nAUTO-GENERATED-SEARCH-TERMS-BEGIN\n\n"); $s .= "\n=for comment\nAUTO-GENERATED-SEARCH-TERMS-END\n"; } public-inbox-1.9.0/Documentation/dc-dlvr-spam-flow.txt000066400000000000000000000040431430031475700227310ustar00rootroot00000000000000dc-dlvr spam/ham training system flow ------------------------------------- An overview of the Maildir + inotify-based spam training system Eric uses on his mail server. This idea may be implemented for kqueue-based systems, too. The idea is to use inotify (via incron) to watch for new files appearing in Maildirs. We only want to train seen messages as ham, and old (but not necessarily seen) messages as spam. The overall goal of this is to allow a user to train their filters without leaving his favorite mail user agent. Every message written to Maildir involves a rename, so we only have incron watch for IN_MOVED_TO events. The generic flow is as follows, all for a single Unix user account: incron -> report-spam +-> sendmail -> MTA -> dc-dlvr -> spamc -> spamd | V ... For public-inbox, Eric uses a separate Unix account ("pi") to add a layer of protection from fat-fingering something. So his report-spam script delivers to a second recipient for training, the "pi" user: ... | +-> sendmail -> MTA -> dc-dlvr | V ~pi/.dc-dlvr.pre | V public-inbox-learn public-inbox-learn will then internally handle the "spamc -> spamd" delivery path as well as removing the message from the git tree. * incron - run commands based on filesystem events: http://incron.aiken.cz/ * sendmail / MTA - we use and recommend use postfix, which includes a sendmail-compatible wrapper: http://www.postfix.org/ * spamc / spamd - SpamAssassin: http://spamassassin.apache.org/ * report-spam / dc-dlvr - distributed with public-inbox in the scripts/ directory: git clone https://public-inbox.org/public-inbox.git public-inbox-1.9.0/Documentation/design_notes.txt000066400000000000000000000132011430031475700221500ustar00rootroot00000000000000public-inbox design notes ------------------------- Challenges to running normal mailing lists ------------------------------------------ 1) spam 2) bounce processing of invalid/bad email addresses 3) processing subscribe/unsubscribe requests Issues 2) and 3) are side-stepped entirely by moving reader subscriptions to git repository synchronization and Atom feeds. There's no chance of faked subscription requests and no need to deal with confused users who cannot unsubscribe. Use existing infrastructure --------------------------- * public-inbox can coexist with existing mailing lists, any subscriber to the existing mailing list can begin delivering messages to public-inbox-mda(1) or public-inbox-watch(1) * public-inbox uses SMTP for posting. Posting a message to a public-inbox instance is no different than sending a message to any _open_ mailing list. * Existing spam filtering on an SMTP server is also effective on public-inbox. * Readers may continue using use their choice of NNTP and mail clients. * Atom is a reasonable feed format for casual readers and is supported by a variety of feed readers. Why email? ---------- * Freedom from proprietary services, tools and APIs. Communicating with developers and users of Free Software should not rely on proprietary tools or services. * Existing infrastructure, tools, and user familiarity. There is already a large variety of tools, clients, and email providers available. There are also many resources for users to run their own SMTP server on a domain they control. * All public discussion mediums are affected by spam and advertising. There exist several good Free Software anti-spam tools for email. * Privacy is not an issue for public discussion. Public mailing list archives are common and accepted by Free Software communities. There is no need to ask the NSA for backups of your mail archives :) * git, one of the most widely-used version control systems, includes many tools for for email, including: git-format-patch(1), git-send-email(1), git-am(1), git-imap-send(1). Furthermore, the development of git itself is based on the git mailing list: https://public-inbox.org/git/ (or http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/git/ for Tor users) * Email is already the de-facto form of communication in many Free Software communities.. * Fallback/transition to private email and other lists, in case the public-inbox host becomes unavailable, users may still directly email each other (or Cc: lists for related/dependent projects). Why git? -------- * git is distributed and robust while being both fast and space-efficient with text data. NNTP was considered, but does not support delta-compression and places no guarantees on data/transport integrity. However, read-only IMAP and NNTP gateways are implemented. * As of 2016, git is widely used and known to nearly all Free Software developers. For non-developers it is packaged for all major GNU/Linux and *BSD distributions. NNTP is not as widely-used nowadays, and most IMAP clients do not have good support for read-only mailboxes. Why perl 5? ----------- * Perl 5 is widely available on modern *nix systems with good a history of backwards and forward compatibility. * git and SpamAssassin both use it, so it should be one less thing for admins to install and waste disk space with. * Distributing compiled binaries has higher costs for storage/cache space is required for each architecture. Having a runnable, source-only distribution means any user already has access to all of our source. Laziness -------- * Stick to dependencies available in Debian main, this should make it easier for potential users to install, and easier for distro maintainers to pick up. * A list server being turned into an SMTP spam relay and being blacklisted while an admin is asleep is scary. Sidestep that entirely by having clients pull. * Eric has a great Maildir+inotify-based Bayes training setup going back many years. Document, integrate and publicize it for public-inbox usage, encouraging other admins to use it (it works as long as admins read their public-inbox). * Custom, difficult-for-Bayes requires custom anti-spam rules. We may steal rules from the Debian listmasters: svn://anonscm.debian.org/pkg-listmaster * Full archives are easily distributable with git, so somebody else can take over the list if we give up. Anybody may also run an SMTP notifier/delivery service based on the archives. * Avoids bikeshedding about web UI decisions, GUI-lovers can write their own GUI-friendly interfaces (HTML or native) based on public archives. Web notes --------- * Getting users to install/run any new tool is difficult. The web views must be easily read/cache/mirror-able. * There may also be a significant number of webmail users without an MUA or feed reader; so a web view is necessary. * Expose Message-ID in web views to encourage replies from drive-by contributors. * Raw text endpoint allows users to write client-side endpoints without hosting the data themselves (or on a different server). What sucks about public-inbox ----------------------------- * Lack of push notification. On the other hand, feeds seem popular. * some (mostly GUI) mail clients cannot set In-Reply-To headers properly without the original message. * marketing - as it should: Scalability notes ----------------- See the public-inbox-v2-format(5) manpage for all the scalability problems solved. Copyright --------- Copyright all contributors License: AGPL-3.0+ public-inbox-1.9.0/Documentation/design_www.txt000066400000000000000000000130661430031475700216550ustar00rootroot00000000000000PublicInbox::WWW (PSGI interface) design notes URL and anchor naming --------------------- ### Unstable endpoints /$INBOX/?r=$GIT_COMMIT -> HTML only /$INBOX/new.atom -> Atom feed #### Optional, relies on Search::Xapian (or Xapian SWIG binding) /$INBOX/$MESSAGE_ID/t/ -> HTML content of thread (nested) /$INBOX/$MESSAGE_ID/T/ -> HTML content of thread (flat) anchors: #u location of $MESSAGE_ID in URL #m per-message links, where is of the Message-ID of each message (stable) #s relative numeric position of message in thread (unstable) #i<...> diffstat location for patch emails #Z?<...> per-file diff header location for patch emails /$INBOX/$MESSAGE_ID/t.atom -> Atom feed for thread /$INBOX/$MESSAGE_ID/t.mbox.gz -> gzipped mbox of thread /$INBOX/$GIT_OID/s/ -> "git show" (via "git apply") This endpoint requires "coderepo" entries configured for a given inbox. It can recreate ("solve") blobs from patch emails using Xapian and git-apply(1). It can also display non-blob content, but that remains a work-in-progress. /$INBOX/$GIT_OID/s/$FILENAME -> "git show", raw output As above, but shows the raw (usually text/plain) output. ### Stable endpoints /$INBOX/$MESSAGE_ID/ -> HTML content anchors: #r location of the current message in thread skeleton (requires Xapian search) #b start of the message body (linked from thread skeleton) /$INBOX/$MESSAGE_ID -> 301 to /$INBOX/$MESSAGE_ID/ /$INBOX/$MESSAGE_ID/raw -> raw mbox /$INBOX/$MESSAGE_ID/#R -> HTML reply instructions # Covering up a pre-1.0 design mistake: /$INBOX/$MESSAGE_ID/f/ -> 301 to /$INBOX/$MESSAGE_ID/ ### Legacy endpoints (may be ambiguous given Message-IDs with similar suffixes) /$INBOX/m/$MESSAGE_ID/ -> 301 to /$INBOX/$MESSAGE_ID/ /$INBOX/m/$MESSAGE_ID.html -> 301 to /$INBOX/$MESSAGE_ID/ /$INBOX/m/$MESSAGE_ID.txt -> 301 to /$INBOX/$MESSAGE_ID/raw /$INBOX/f/$MESSAGE_ID.html -> 301 to /$INBOX/$MESSAGE_ID/ /$INBOX/f/$MESSAGE_ID.txt [1] -> 301 to /$INBOX/$MESSAGE_ID/raw /$INBOX/atom.xml [2] -> identical to /$INBOX/new.atom Additionally, we support git clone/fetch over HTTP (dumb and smart): git clone --mirror http://$HOSTNAME/$INBOX FIXME: we must refactor/cleanup/add tests for most of our CGI before adding more endpoints and features. [1] These URLs were never linked, but only exist as a convenience to folks who edit existing URLs [2] Do not make this into a 301 since feed readers may not follow them as well as normal browsers do. Encoding notes -------------- Raw HTML and XML should only contain us-ascii characters which render to UTF-8. We must not rely on users having the necessary fonts installed to render uncommon characters. Plain text (raw message) endpoints display in the original encoding(s) of the original email. Offline friendly ---------------- The "/t/", "/T/", "t.mbox.gz" endpoints are designed to be useful for reading long threads for users with intermittent connections or saved for offline viewing. Date displays are always absolute, not the "X hours ago" pattern commonly seen because readers may be reading a previously-saved or cached copy. HTML URLs end with '/' or "$FILENAME.html". The reason many URLs end with the '/' character is so it can trivially be saved to a directory via wget or similar tools as "index.html", making it easy to mirror all files ending in ".html" using any static web server. Guidelines for using limited HTML --------------------------------- We mainly use HTML for linking pages together with . We also set to make window management easier. We favor <pre>-formatted text since public-inbox is intended as a place to share and discuss patches and code. Unfortunately, long paragraphs tends to be less readable with fixed-width serif fonts which GUI browsers default to. * No graphics, images, or icons at all. We tolerate, but do not encourage the use of GUIs. * No setting font sizes, power to users to decide those. We will include and document <span class=?> to support colors for user-supplied CSS. * Only one font type: fixed. This is for accessibility, we must not blow certain elements out-of-proportion with different fonts on the page when a reader increases font size. * Bold and underline elements are OK since they should render fine regardless of chosen font and gracefully degrade if a display does not support them. Italics and strike-through elements must be avoided as they do not render well with some displays or user-chosen fonts. * No JavaScript. JS is historically too buggy and insecure, and we will never expect our readers to do either of the following: a) read and audit all our code for on every single page load b) trust us and and run code without reading it * We only use CSS for one reason: wrapping pre-formatted text This is necessary because unfortunate GUI browsers tend to be prone to layout widening from unwrapped mailers. Do not expect CSS to be enabled, especially with scary things like: https://thejh.net/misc/website-terminal-copy-paste However, we will try to make it easy for users to supply their own colors via user-side CSS. CSS classes (for user-supplied CSS) ----------------------------------- See examples in contrib/css/ and lib/PublicInbox/WwwText.pm (or https://public-inbox.org/meta/_/text/color/ soon) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/flow.ge������������������������������������������������������������0000664�0000000�0000000�00000001336�14300314757�0020220�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# public-inbox data flow # # Note: choose either "delivery tools" OR "git mirroring tools" # for a given inboxdir. Combining them for the SAME inboxdir # will cause conflicts. Of course, different inboxdirs may # choose different means of getting mail into them. graph { flow: down } [delivery tools:\n public-inbox-mda\n public-inbox-watch\n public-inbox-learn] -> [inboxdir] [git mirroring tools:\n grok-pull,\n various scripts ] -- git (clone|fetch) &&\n public-inbox-index --> [inboxdir] [inboxdir] -> [read-only daemons:\n public-inbox-httpd\n public-inbox-imapd\n public-inbox-nntpd] # Copyright 2020-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/flow.txt�����������������������������������������������������������0000664�0000000�0000000�00000003621�14300314757�0020443�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# public-inbox data flow # # Note: choose either "delivery tools" OR "git mirroring tools" # for a given inboxdir. Combining them for the SAME inboxdir # will cause conflicts. Of course, different inboxdirs may # choose different means of getting mail into them. +--------------------+ | delivery tools: | | public-inbox-mda | | public-inbox-watch | | public-inbox-learn | +--------------------+ | | v +----------------------+ +--------------------+ | git mirroring tools: | git (clone|fetch) && | | | grok-pull, | public-inbox-index | inboxdir | | various scripts | ----------------------> | | +----------------------+ +--------------------+ | | v +--------------------+ | read-only daemons: | | public-inbox-httpd | | public-inbox-imapd | | public-inbox-nntpd | +--------------------+ # Copyright 2020-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> ���������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/hosted.txt���������������������������������������������������������0000664�0000000�0000000�00000002720�14300314757�0020761�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������unofficially hosted mirrors at public-inbox.org In addition to eating our own dogfood at <https://public-inbox.org/meta/>, public-inbox.org hosts unofficial archives for several other projects to further test our own software. These mirrors are NOT to be considered reliable or permanent. Interested parties are strongly encouraged to host their own mirrors. The presence of these archives does not imply these projects endorse public-inbox or public-inbox.org in any way. * https://public-inbox.org/bug-gnulib/ bug-gnulib@gnu.org Discussion for Gnulib portability/common source project https://lists.gnu.org/mailman/listinfo/bug-gnulib * https://public-inbox.org/git/ git@vger.kernel.org Mailing list for the git version control system http://vger.kernel.org/majordomo-info.html * https://public-inbox.org/libc-alpha/ libc-alpha@sourceware.org Mailing list for GNU C library development https://www.gnu.org/software/libc/involved.html * https://public-inbox.org/rack-devel/ rack-devel@googlegroups.com Development list for the Ruby webserver interface https://groups.google.com/group/rack-devel * https://public-inbox.org/sox-users/ sox-users@lists.sourceforge.net Users' list for the SoX sound processing tool https://lists.sourceforge.net/lists/listinfo/sox-users * https://public-inbox.org/sox-devel/ sox-devel@lists.sourceforge.net Developers' list for the SoX sound processing tool https://lists.sourceforge.net/lists/listinfo/sox-devel ������������������������������������������������public-inbox-1.9.0/Documentation/include.mk���������������������������������������������������������0000664�0000000�0000000�00000006776�14300314757�0020725�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> all:: RSYNC = rsync RSYNC_DEST = public-inbox.org:/srv/public-inbox/ AWK = awk MAN = man LEXGROG = lexgrog # this is "xml" on FreeBSD and maybe some other distros: XMLSTARLET = xmlstarlet # libgraph-easy-perl from Debian, Graph::Easy from CPAN GRAPH_EASY = graph-easy INSTALL = install PODMAN = pod2man PODMAN_OPTS = -v --stderr -d 1993-10-02 -c 'public-inbox user manual' PODMAN_OPTS += -r public-inbox.git podman = $(PODMAN) $(PODMAN_OPTS) man2text = COLUMNS=80 MANWIDTH=80 TERM=dumb MANOPT='--nj --nh' man all:: man manpages = $(man1) $(man5) $(man7) $(man8) man: $(manpages) prefix ?= $(PREFIX) prefix ?= $(HOME) mandir ?= $(INSTALLMAN1DIR)/.. man5dir = $(mandir)/man5 man7dir = $(mandir)/man7 man8dir = $(mandir)/man8 install-man: man $(INSTALL) -d -m 755 $(DESTDIR)$(INSTALLMAN1DIR) $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir) $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir) $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir) $(INSTALL) -m 644 $(man1) $(DESTDIR)$(INSTALLMAN1DIR) $(INSTALL) -m 644 $(man5) $(DESTDIR)$(man5dir) $(INSTALL) -m 644 $(man7) $(DESTDIR)$(man7dir) $(INSTALL) -m 644 $(man8) $(DESTDIR)$(man8dir) doc_install :: install-man check :: check-man check_man = $(AWK) \ '{gsub(/\b./,"")}$$0 !~ /\.onion/&&length>80{print;e=1}END{exit(e)}' \ >&2 check-man :: $(check_80) check-lexgrog :: $(check_lexgrog) all :: $(docs) txt2pre = $(PERL) -I lib ./Documentation/txt2pre >$@ Documentation/standards.txt : Documentation/standards.perl $(PERL) -w Documentation/standards.perl >$@+ touch -r Documentation/standards.perl $@+ mv $@+ $@ # flow.txt is checked into git since Graph::Easy isn't in many distros Documentation/flow.txt : Documentation/flow.ge (sed -ne '1,/^$$/p' <Documentation/flow.ge; \ $(GRAPH_EASY) Documentation/flow.ge || \ cat Documentation/flow.txt; \ echo; \ sed -ne '/^# Copyright/,$$p' <Documentation/flow.ge \ ) >$@+ touch -r Documentation/flow.ge $@+ mv $@+ $@ Documentation/lei-q.pod : lib/PublicInbox/Search.pm Documentation/common.perl $(PERL) -I lib -w Documentation/common.perl $@ NEWS NEWS.atom NEWS.html : $(news_deps) $(PERL) -I lib -w Documentation/mknews.perl $@ $(RELEASES) # check for internal API changes: check :: NEWS .NEWS.atom.check NEWS.html .NEWS.atom.check: NEWS.atom $(XMLSTARLET) val NEWS.atom || \ { e=$$?; test $$e -eq 0 || test $$e -eq 127; } >$@ html: $(docs_html) doc: $(docs) %.gz: % gzip -9 --rsyncable <$< >$@+ touch -r $< $@+ mv $@+ $@ gz-doc: $(gz_docs) rsync-doc: NEWS.atom.gz # /usr/share/doc/rsync/scripts/git-set-file-times{.gz} on Debian systems # It is also at: https://yhbt.net/git-set-file-times -git set-file-times $(docs) $(txt) $(MAKE) gz-doc $(RSYNC) --chmod=Fugo=r -av $(rsync_docs) $(RSYNC_DEST) clean-doc: $(RM_F) $(man1) $(man5) $(man7) $(man8) $(gz_docs) $(docs_html) \ $(mantxt) \ NEWS NEWS.atom NEWS.html Documentation/standards.txt \ Documentation/flow.html Documentation/flow.html.gz \ Documentation/flow.txt.gz clean :: clean-doc # No camel-cased tarballs or pathnames which MakeMaker creates, # this may not always be a Perl project. This should match what # cgit generate, since git maintainers ensure git-archive has # stable tar output DIST_TREE = HEAD^{tree} DIST_VER = git-dist : ver=$$(git describe $(DIST_VER) | sed -ne s/v//p); \ pkgpfx=public-inbox-$$ver; \ git archive --prefix=$$pkgpfx/ --format=tar $(DIST_TREE) \ | gzip -n >$$pkgpfx.tar.gz; \ echo $$pkgpfx.tar.gz created ��public-inbox-1.9.0/Documentation/lei-add-external.pod�����������������������������������������������0000664�0000000�0000000�00000005074�14300314757�0022562�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-add-external - add inbox or external index =head1 SYNOPSIS lei add-external [OPTIONS] LOCATION =head1 DESCRIPTION Configure lei to search against an external (an inbox or external index). When C<LOCATION> is an existing local path, it should point to a directory that is a C<publicinbox.$NAME.inboxdir> or C<extindex.$NAME.topdir> value in ~/.public-inbox/config. =head1 OPTIONS =for comment TODO: mention curl options? =over =item --boost=NUMBER Set priority of a new or existing location. Default: 0 =item --mirror=URL Create C<LOCATION> by mirroring the public-inbox at C<URL>. C<LOCATION> will have a Makefile with a C<make update> target to update the external. =item --epoch=RANGE Restrict clones of L<public-inbox-v2-format(5)> inboxes to the given range of epochs. The range may be a single non-negative integer or a (possibly open-ended) C<LOW..HIGH> range of non-negative integers. C<~> may be prefixed to either (or both) integer values to represent the offset from the maximum possible value. For example, C<--epoch=~0> alone clones only the latest epoch, C<--epoch=~2..> clones the three latest epochs. Default: C<0..~0> or C<0..> or C<..~0> (all epochs, all three examples are equivalent) =item -v =item --verbose Provide more feedback on stderr. =item -q =item --quiet Suppress feedback messages. =back =head2 MIRRORING =over =item --torsocks=auto|no|yes =item --no-torsocks Whether to wrap L<git(1)> and L<curl(1)> commands with L<torsocks(1)>. Default: C<auto> =item --inbox-version=NUM Force a public-inbox version (must be C<1> or C<2>). =back The following options are passed to L<public-inbox-init(1)>: =over =item -j JOBS, --jobs=JOBS =item -L LEVEL, --indexlevel=LEVEL =back The following options are passed to L<public-inbox-index(1)>: =over =item --batch-size=SIZE =item --compact =item -j JOBS, --jobs=JOBS =item --max-size=SIZE =item --sequential-shard =item --skip-docdata =back =head1 FILES The configuration for lei resides at C<$XDG_CONFIG_HOME/lei/config>. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-forget-external(1)>, L<lei-ls-external(1)>, L<lei-import(1)>, L<public-inbox-index(1)>, L<public-inbox-extindex(1)>, L<public-inbox-extindex-format(5)> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-add-watch.pod��������������������������������������������������0000664�0000000�0000000�00000001755�14300314757�0022050�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-add-watch - watch for new messages and flag changes =head1 SYNOPSIS lei add-watch [OPTIONS] LOCATION [LOCATION...] =head1 DESCRIPTION Tell lei to watch C<LOCATION> for new messages and flag changes. Currently only Maildir locations are supported. WARNING: watches are not always reliable, occasional use of L<lei-index(1)> and L<lei-refresh-mail-sync(1)> is recommended if L<lei-daemon(8)> crashes or needs to be restarted. This will be improved in the future. =for comment TODO: Document --state? Believe valid values are pause, import-ro, =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-ls-watch(1)>, L<lei-rm-watch(1)> �������������������public-inbox-1.9.0/Documentation/lei-blob.pod�������������������������������������������������������0000664�0000000�0000000�00000004036�14300314757�0021125�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-blob - display a git blob, reconstructing from mail if necessary =head1 SYNOPSIS lei blob [OPTIONS] OID =head1 DESCRIPTION Display a git blob. The blob may correspond to a message from the local store, any local external, or blobs associated with a project git repository (if run from a git (working) directory). For blobs which do not exist, it will attempt to recreate the blob using patch emails. =head1 OPTIONS =over =item --git-dir=DIR Specify an additional .git/ directory to scan. This option may be given multiple times. Default: the output of C<git rev-parse --git-dir> =item --no-cwd Do not look in the git repository of the current working directory. =item --no-mail Do not look in mail storage for C<OID>. This is implied by C<--oid-a>, C<--path-a>, and C<--path-b>. =item -A OID-A =item --oid-a=OID-A Provide pre-image object ID as a hint for reconstructing C<OID>. =item -a PATH-A =item --path-a=PATH-A Provide pre-image pathname as a hint for reconstructing C<OID>. =item -b PATH-B =item --path-b=PATH-B Provide post-image pathname as a hint for reconstructing C<OID>. =item -v =item --verbose Provide more feedback on stderr. =back The following options are also supported and are described in L<lei-q(1)>. =over =item --remote Remote externals only get queried when the blob needs to be reconstructed from patch emails. =item --no-local =item --no-external =item -I LOCATION, --include=LOCATION =item --exclude=LOCATION =item --only=LOCATION =item --no-import-remote =item --torsocks=auto|no|yes =item --no-torsocks =item --proxy=PROTOCOL://HOST[:PORT] =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-add-external(1)>, L<lei-q(1)> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-config.pod�����������������������������������������������������0000664�0000000�0000000�00000005324�14300314757�0021455�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-config - git-config wrapper for lei configuration file =head1 SYNOPSIS lei config [OPTIONS] =head1 DESCRIPTION Call L<git-config(1)> with C<$XDG_CONFIG_HOME/lei/config> as the configuration file. All C<OPTIONS> are passed through, but those that override the configuration file are not permitted. All C<imap> and C<nntp> options may be specified per-host or (if using git 2.26+) with wildcards: [imap "imap://*.onion"] proxy = socks5h://127.0.0.1:9050 [nntp "nntp://example.com"] proxy = socks5h://127.0.0.1:1080 =head2 VARIABLES =over 8 =item external.* Managed by L<lei-add-external(1)> and L<lei-forget-external(1)> =item imap.proxy =item nntp.proxy The C<socks5h://> proxy address. Older versions of SOCKS may be supported if there is user demand. =item imap.starttls =item nntp.starttls Enable or disable STARTTLS on non-imaps:// and non-nntps:// hosts. By default, STARTTLS is enabled if available unless connecting to a Tor .onion or localhost. =item imap.compress =item nntp.compress Enable protocol-level compression. This may be incompatible or broken with some servers. Note: L<Net::NNTP> compression support is pending: L<https://rt.cpan.org/Ticket/Display.html?id=129967> =item imap.debug =item nntp.debug Enable debugging output of underlying IMAP and NNTP libraries, currently L<Mail::IMAPClient> and L<Net::NNTP>, respectively. If using L<imap.proxy> or L<nntp.proxy> point to a SOCKS proxy, debugging output for L<IO::Socket::Socks> will be enabled, as well. Disabling L<imap.compress> may be required for readability. =item imap.timeout =item nntp.timeout The read timeout for responses. Default: 600 seconds (IMAP); 120 seconds (NNTP) =item imap.fetchBatchSize Number of full messages to fetch at once. Larger values reduce network round trips at the cost of higher memory use, especially when retrieving large messages. Small responses for IMAP flags are fetched at 10000 times this value. Default: 1 =item color.SLOT C<quoted>, C<hdrdefault>, C<status>, C<attachment> color slots are supported for the C<-f text> and C<-f reply> output formats of L<lei-lcat(1)> and L<lei-q(1)>. Any per-project .git/config, and global ~/.gitconfig files will also be parsed for diff coloring. git diff color slots (C<color.diff.SLOT>) supported are C<new>, C<old>, C<meta>, C<frag>, C<func>, and C<context>. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-convert.pod����������������������������������������������������0000664�0000000�0000000�00000002752�14300314757�0021672�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-convert - one-time conversion from one mail format to another =head1 SYNOPSIS lei convert -o OUTPUT [OPTIONS] LOCATION lei convert -o OUTPUT [OPTIONS] (--stdin|-) =head1 DESCRIPTION Convert messages to another format. C<LOCATION> is a source of messages: a directory (Maildir), a file (various mbox), or a URL (C<imap://>, C<imaps://>, C<nntp://>, or C<nntps://>). URLs requiring authentication use L<git-credential(1)> to fill in the username and password. For a regular file, the location must have a C<E<lt>formatE<gt>:> prefix specifying one of the following formats: C<mboxrd>, C<mboxcl2>, C<mboxcl>, or C<mboxo>. =head1 OPTIONS =over =item -F MAIL_FORMAT =item --in-format=MAIL_FORMAT Message input format. Unless messages are given on stdin, using a format prefix with C<LOCATION> is preferred. =back The following options are also supported and are described in L<lei-q(1)>. =over =item -o MFOLDER, --output=MFOLDER =item --lock METHOD =item --no-kw =item --torsocks=auto|no|yes =item --no-torsocks =item --proxy=PROTOCOL://HOST[:PORT] =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-mail-formats(5)> ����������������������public-inbox-1.9.0/Documentation/lei-daemon-kill.pod������������������������������������������������0000664�0000000�0000000�00000002307�14300314757�0022402�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-daemon-kill - signal the lei-daemon =head1 SYNOPSIS lei daemon-kill [-SIGNAL | -s SIGNAL | --signal SIGNAL] =head1 DESCRIPTION Send a signal to the L<lei-daemon(8)>. C<SIGNAL> defaults to C<TERM>. This command should be run after updating the code of lei. =head1 SIGNALS =over 8 =item SIGTERM Send a graceful termination signal. L<lei-daemon(8)> will exit when all currently running lei commands are done. The listen socket will be released as soon as the signal is processed so another L<lei-daemon(8)> process can take its place. =item SIGKILL Kills L<lei-daemon(8)> immediately. Some worker processes may remain running after a short while after this takes effect. =back =for comment SIGQUIT and SIGINT currently do what SIGTERM do, may change... =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-daemon-pid(1)>, L<lei-daemon(8)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-daemon-pid.pod�������������������������������������������������0000664�0000000�0000000�00000001076�14300314757�0022225�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-daemon-pid - show the PID of the lei-daemon =head1 SYNOPSIS lei daemon-pid =head1 DESCRIPTION Show the PID of the lei-daemon. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-daemon-kill(1)> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-daemon.pod�����������������������������������������������������0000664�0000000�0000000�00000003726�14300314757�0021457�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-daemon - technical information for local email interface daemon =head1 DESCRIPTION This documentation is a high-level overview for developers and administrators interested in how lei works. lei-daemon is a background daemon which powers the L<lei(1)> command-line tool. It may support virtual users and read-write IMAP+JMAP APIs in the future. It is designed to optimize shell completion by avoiding module loading costs, monitor Maildirs (and in the near future, IMAP folders) for changes. =head2 worker processes Most commands cause lei-daemon to L<fork(2)> new worker processes to isolate and parallelize work. lei-daemon is significantly more aggressive than read-only L<public-inbox-daemon(8)> processes with regards to resource use since it's not designed to support C10K/C100K scenarios. =head2 file descriptor passing FD passing is used to reduce IPC costs for bulk I/O when importing large mboxes from stdin and dumping large mboxes to stdout. =head2 SOCK_SEQPACKET SOCK_SEQPACKET sockets are used for both communicating with L<lei(1)> and to internal workers. SOCK_SEQPACKET guarantees reliability (unlike SOCK_DGRAM), allows easy load distribution, and saves developers the trouble of maintaining stream parsers. =head2 file monitoring Inotify or EVFILT_VNODE is used depending on the platform to monitor Maildirs for changes and track keyword changes. The listen socket (default: C<$XDG_RUNTIME_DIR/lei/5.seq.sock>) is also monitored, and the daemon will automatically shutdown if it is unlinked. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-overview(7)>, L<lei-daemon-kill(1)>, L<lei-daemon-pid(1)> ������������������������������������������public-inbox-1.9.0/Documentation/lei-edit-search.pod������������������������������������������������0000664�0000000�0000000�00000001231�14300314757�0022371�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-edit-search - edit saved search =head1 SYNOPSIS lei edit-search [OPTIONS] OUTPUT =head1 DESCRIPTION Invoke C<git config --edit> to edit the saved search at C<OUTPUT>. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-ls-search(1)>, L<lei-forget-search(1)>, L<lei-up(1)>, L<lei-q(1)> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-export-kw.pod��������������������������������������������������0000664�0000000�0000000�00000002307�14300314757�0022146�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-export-kw - export keywords (flags) to Maildir and IMAP folders =head1 SYNOPSIS lei export-kw --all=[<remote|local>] lei export-kw MFOLDER [MFOLDER...] =head1 DESCRIPTION C<lei export-kw> propagates keywords (e.g. C<seen>, C<answered>, C<flagged>, etc.) from lei/store to IMAP folders and/or Maildirs. It only works for messages lei knows about (e.g. was used as a C<lei q --output>, or imported via L<lei-import(1)>, or indexed via L<lei-index(1)>). It does not delete, write, nor modify messages themselves; it only sets metadata on Maildirs and IMAP folders. =head1 OPTIONS =over =item --all Export to all local Maildirs and remote IMAP folders =item --all=local Export all local Maildirs =item --all=remote Export all remote IMAP folders =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-refresh-mail-sync(1)>, L<lei-tag(1)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-forget-external.pod��������������������������������������������0000664�0000000�0000000�00000001507�14300314757�0023315�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-forget-external - forget external locations =head1 SYNOPSIS lei forget-external [OPTIONS] LOCATION [LOCATION...] =head1 DESCRIPTION Forget the specified externals by removing their entries from C<$XDG_CONFIG_HOME/lei/config>. This excludes the locations from future search results. =head1 OPTIONS =over =item -q =item --quiet Suppress feedback messages. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-add-external(1)>, L<lei-ls-external(1)> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-forget-mail-sync.pod�������������������������������������������0000664�0000000�0000000�00000001543�14300314757�0023367�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-forget-mail-sync - forget sync information for a mail folder =head1 SYNOPSIS lei forget-mail-sync [OPTIONS] LOCATION [LOCATION...] =head1 DESCRIPTION Forget synchronization information for C<LOCATION>, an IMAP or Maildir folder. Note that this won't delete any messages on the filesystem. Users using L<lei-index(1)> without L<lei-import(1)> will be left with dangling references in search results. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-ls-mail-sync(1)>, L<lei-index(1)> �������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-forget-search.pod����������������������������������������������0000664�0000000�0000000�00000001614�14300314757�0022737�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-forget-search - forget saved search =head1 SYNOPSIS lei forget-search [OPTIONS] OUTPUT =head1 DESCRIPTION Forget a saved search at C<OUTPUT>. =head1 OPTIONS =over =item --prune[=<local|remote>] C<--prune> will forget saved searches if the C<OUTPUT> no longer exists. C<--prune=local> only prunes local mailboxes, C<--prune=remote> only prunes remote mailboxes (currently C<imap://> and C<imaps://>). =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-edit-search(1)>, L<lei-ls-search(1)>, L<lei-up(1)>, L<lei-q(1)> ��������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-import.pod�����������������������������������������������������0000664�0000000�0000000�00000004613�14300314757�0021522�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-import - one-time import of messages into local store =head1 SYNOPSIS lei import [OPTIONS] LOCATION [LOCATION...] [+L:LABEL] lei import [OPTIONS] (--stdin|-) =head1 DESCRIPTION Import messages into the local storage of L<lei(1)>. C<LOCATION> is a source of messages: a directory (Maildir), a file, or a URL (C<imap://>, C<imaps://>, C<nntp://>, or C<nntps://>). URLs requiring authentication use L<git-credential(1)> to fill in the username and password. For a regular file, the C<LOCATION> must have a C<E<lt>formatE<gt>:> prefix specifying one of the following formats: C<mboxrd>, C<mboxcl2>, C<mboxcl>, or C<mboxo>. =head1 OPTIONS =over =item -F MAIL_FORMAT =item --in-format=MAIL_FORMAT Message input format. Unless messages are given on stdin, using a format prefix with C<LOCATION> is preferred. =item --stdin Read messages from stdin. =item --lock L<mbox(5)> locking method(s) to use: C<dotlock>, C<fcntl>, C<flock> or C<none>. Default: fcntl,dotlock =item +L:LABEL Add the given C<LABEL> to all messages imported, where C<LABEL> is an arbitrary user-defined value consisting of lowercase and digits. See L<lei-tag(1)> for more info on labels. For example, specifying C<+L:inbox> applies the C<inbox> label to all messages being imported. May be specified multiple times to apply multiple labels. Default: none =item +kw:KEYWORD Apply C<KEYWORD> to all messages being imported in addition to any per-message keywords from the store (unless C<--no-kw> is specified). See L<lei-tag(1)> for more info on keywords. May be specified multiple times to apply multiple keywords. Default: none =item --no-kw Don't import message keywords (or "flags" in IMAP terminology). =item --no-incremental Import already seen IMAP and NNTP articles. =item --torsocks=auto|no|yes =item --no-torsocks Whether to wrap L<git(1)> and L<curl(1)> commands with L<torsocks(1)>. Default: C<auto> =item --proxy=PROTOCOL://HOST[:PORT] Use the specified proxy (e.g., C<socks5h://0:9050>). =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-index(1)> ���������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-index.pod������������������������������������������������������0000664�0000000�0000000�00000003077�14300314757�0021322�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-index - index messages without importing them into lei/store =head1 SYNOPSIS lei index [OPTIONS] FOLDER =head1 DESCRIPTION Similar to L<lei-import(1)>, but does not store a copy of messages into C<lei/store>. This command only makes sense for messages stored in Maildir folders. Other folder types may be supported in the future (they can all be indexed, but the message isn't automatically retrieved by L<lei-q(1)> or L<lei-lcat(1)>). Combined with L<lei-q(1)>, C<lei index> allows Maildir users to have similar functionality to L<mairix(1)> by not duplicating messages into C<lei/store>. Occasional invocations of C<lei-refresh-mail-sync --all=local> are recommended to keep indexed messages retrievable. =head1 OPTIONS =over =item -F MAIL_FORMAT =item --in-format=MAIL_FORMAT There is currently no need for this option. It will support C<mh>, eventually. For now, the default (and only supported) format is C<maildir>. When IMAP and NNTP support are fleshed out, those formats will be inferred from their URLs. Default: C<maildir> =item -q =item --quiet Suppress feedback messages. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-refresh-mail-sync(1)>, L<lei-store-format(5)>, L<lei-import(1)> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-init.pod�������������������������������������������������������0000664�0000000�0000000�00000001500�14300314757�0021143�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-init - initialize storage =head1 SYNOPSIS lei init [OPTIONS] [DIRNAME] =head1 DESCRIPTION Initialize local writable storage for L<lei(1)>. If C<DIRNAME> is unspecified, the storage is created at C<$XDG_DATA_HOME/lei/store>. C<leistore.dir> in C<$XDG_CONFIG_HOME/lei/config> records this location. =head1 OPTIONS =over =item -q =item --quiet Suppress feedback messages. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-add-external(1)> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-inspect.pod����������������������������������������������������0000664�0000000�0000000�00000002111�14300314757�0021644�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-inspect - general purpose inspector =head1 SYNOPSIS lei inspect [OPTIONS] ITEM [ITEM...] lei inspect [OPTIONS] (--stdin|-) =head1 DESCRIPTION This is a diagnostic command that provides a general purpose inspector of various things, including blobs, message IDs, Xapian document IDs, and mail sync sources. =head1 OPTIONS =over =item -d DIR =item --dir=DIR An inboxdir, extindex topdir, or Xapian shard =item --pretty Pretty print output. If stdout is opened to a tty, C<--pretty> is enabled by default. =item - =item --stdin Read message from stdin. This is implicit if no arguments are given and stdin is a pipe or regular file. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-mail-diff(1)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-lcat.pod�������������������������������������������������������0000664�0000000�0000000�00000004106�14300314757�0021130�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-lcat - display local copy of messages(s) =head1 SYNOPSIS lei lcat [OPTIONS] MSGID_OR_URL [MSGID_OR_URL...] lei lcat [OPTIONS] (--stdin|-) =head1 DESCRIPTION lcat (local cat) is a wrapper around L<lei-q(1)> that displays local messages by Message-ID. It is able to extract Message-IDs from URLs as well as from common formats such as C<E<lt>$MSGIDE<gt>> and C<id:$MSGID>. When reading from stdin, input that isn't understood is discarded, so the caller doesn't have to bother extracting the Message-ID or link from surrounding text (e.g., a "Link: $URL" line). =head1 OPTIONS The following options, described in L<lei-q(1)>, are supported. One deviation from L<lei-q(1)> is the default output format is C<-f text> when writing to stdout. =over =item --format=FORMAT =item -f FORMAT Most commonly C<text> (the default) or C<reply> to display the message(s) in a format suitable for trimming and sending as a email reply. =item --stdin =item - C<lei lcat> implicitly reads from stdin if it is a L<pipe(7)> or regular file. This is handy for invoking C<lei lcat> from inside an C<$EDITOR> session (assuming you use an C<$EDITOR> which lets you pipe arbitrary lines to arbitrary commands). =item --[no-]remote =item --no-local =item --no-external =item --no-import-remote =item --torsocks=auto|no|yes, --no-torsocks =item --proxy=PROTOCOL://HOST[:PORT] =item -o MFOLDER, --output=MFOLDER =item -d STRATEGY, --dedupe=STRATEGY =item -t, --threads =item -s KEY, --sort=KEY =item -r, --reverse =item --offset=NUMBER =item -g, --globoff =item -a, --augment =item --lock=METHOD =item --alert=COMMAND =item --mua=COMMAND =item --no-color =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-q(1)>, L<lei-blob(1)> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-ls-external.pod������������������������������������������������0000664�0000000�0000000�00000001773�14300314757�0022452�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-ls-external - list inbox and external index locations =head1 SYNOPSIS lei ls-external [OPTIONS] [FILTER] =head1 DESCRIPTION List configured externals. If C<FILTER> is given, restrict the output to matching entries. =head1 OPTIONS =over =item -g =item --globoff Do not match C<FILTER> using C<*?> wildcards and C<[]> ranges. =item --local Limit operations to the local filesystem. =item --remote Limit operations to those requiring network access. =item -z =item -0 Use C<\0> (NUL) instead of newline (CR) to delimit lines. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-add-external(1)>, L<lei-forget-external(1)> �����public-inbox-1.9.0/Documentation/lei-ls-label.pod���������������������������������������������������0000664�0000000�0000000�00000001457�14300314757�0021706�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-ls-label - list labels =head1 SYNOPSIS lei ls-label [OPTIONS] =head1 DESCRIPTION List all known message labels ("mailboxes" in JMAP terminology). This is handy for writing L<lei-import(1)> invocations. =head1 OPTIONS =over =item -z =item -0 Use C<\0> (NUL) instead of newline (CR) to delimit lines. =item -q =item --quiet Suppress feedback messages. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-add-external(1)> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-ls-mail-source.pod���������������������������������������������0000664�0000000�0000000�00000002146�14300314757�0023043�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-ls-mail-source - list IMAP or NNTP mail source folders =head1 SYNOPSIS lei ls-mail-source [OPTIONS] URL =head1 DESCRIPTION List information about the IMAP or NNTP mail source at C<URL>. This command populates the cache used for Bash shell completion and is handy for writing L<lei-import(1)> invocations. =head1 OPTIONS =over =item -z =item -0 Use C<\0> (NUL) instead of newline (CR) to delimit lines. =item -l Format output as JSON and include more information. =item --pretty Pretty print JSON output. If stdout is opened to a tty, C<--pretty> is enabled by default. =item --ascii Escape non-ASCII characters. =item --url Show full URL of newsgroup or IMAP folder. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-import(1)> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-ls-mail-sync.pod�����������������������������������������������0000664�0000000�0000000�00000001745�14300314757�0022523�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-ls-mail-sync - list mail sync folders =head1 SYNOPSIS lei mail-sync [OPTIONS] [FILTER] =head1 DESCRIPTION List mail sync folders. If C<FILTER> is given, restrict the output to matching entries. =head1 OPTIONS =over =item -g =item --globoff Do not match C<FILTER> using C<*?> wildcards and C<[]> ranges. =item --local Limit operations to the local filesystem. =item --remote Limit operations to those requiring network access. =item -z =item -0 Use C<\0> (NUL) instead of newline (CR) to delimit lines. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-refresh-mail-sync(1)>, L<lei-export-kw(1)> ���������������������������public-inbox-1.9.0/Documentation/lei-ls-search.pod��������������������������������������������������0000664�0000000�0000000�00000002515�14300314757�0022070�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-ls-search - list saved search queries =head1 SYNOPSIS lei ls-search [OPTIONS] [PREFIX] =head1 DESCRIPTION List saved search queries. If C<PREFIX> is given, restrict the output to entries that start with the specified value. =head1 OPTIONS =over =item -f FORMAT =item --format=FORMAT Display JSON output rather than default short output that includes only the saved search location. Possible values are C<json>, C<jsonl>, or C<concatjson>. =item --pretty Pretty print C<json> or C<concatjson> output. If stdout is opened to a tty and used as the C<--output> destination, C<--pretty> is enabled by default. =item -l Long listing format (shortcut for C<--format=json>). =item --ascii Escape non-ASCII characters. =item -z =item -0 Use C<\0> (NUL) instead of newline (CR) to delimit lines. This option is incompatible with C<--format>. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-q(1)>, L<lei-up(1)>, L<lei-edit-search(1)>, L<lei-forget-search(1)> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-ls-watch.pod���������������������������������������������������0000664�0000000�0000000�00000001233�14300314757�0021725�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-ls-watch - list active watches =head1 SYNOPSIS lei ls-watch =head1 DESCRIPTION List locations that lei is configured to watch. This command is incomplete, mail-sync locations are implicitly watched. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-add-watch(1)>, L<lei-rm-watch(1)> ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-mail-diff.pod��������������������������������������������������0000664�0000000�0000000�00000001232�14300314757�0022032�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-mail-diff - diff the contents of emails =head1 SYNOPSIS lei mail-diff [OPTIONS] LOCATION lei mail-diff [OPTIONS] (--stdin|-) =head1 DESCRIPTION This is a diagnostic command that's useful for finding deduplication bugs. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-inspect(1)> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-mail-formats.pod�����������������������������������������������0000664�0000000�0000000�00000010375�14300314757�0022605�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-mail-formats - description of mail formats supported by lei =head1 DESCRIPTION L<lei-q(1)> supports writing to several existing mail formats for interoperability with existing mail user agents (MUA); below is an overview of them to help users choose. =head1 Maildir The default output format when given a filesystem path, it supports parallel read-write access. Performance is acceptable for smaller directories, but degrades as mailboxes get larger. Speed and scalability are limited by kernel and filesystem performance due to the use of small files and large number of syscalls. See also: L<https://cr.yp.to/proto/maildir.html> and L<https://wiki2.dovecot.org/MailboxFormat/Maildir> =head1 Mbox family The mbox family consists of several incompatible formats. Locking for parallel access is supported, but may not be compatible across tools. With compression (e.g. L<gzip(1)>), they require the least amount of space while offering good read-only performance. Keyword updates (C<Status:> and/or C<X-Status:> headers) generally require rewriting the entire mbox. See also: L<https://www.loc.gov/preservation/digital/formats/fdd/fdd000383.shtml>, L<mbox(5)> =head2 mboxo The traditional BSD format. It quotes C<From > to C<E<gt>From >, but lines already beginning with C<E<gt>From > do not get quoted, thus automatic reversibility is not guaranteed. MUAs which favor L</mboxcl> or L</mboxcl2> may convert these automatically to their preferred format. Truncation is undetectable unless compressed with gzip or similar. =head2 mboxrd An evolution of L</mboxo>, but quotes C<From > lines prefixed with any number of C<E<gt>> characters and is thus fully reversible. This format is emitted by L<PublicInbox::WWW(3pm)> with gzip. Since git 2.10, C<git am --patch-format=mboxrd> reads this format. C<git log> and C<git format-patch --stdout> can also generate this format with the C<--pretty=mboxrd> switch. As with uncompressed L</mboxo>, uncompressed mboxrd are vulnerable to undetectable truncation. It gracefully degrades to being treated as L</mboxo> by MUAs unaware of the format as excessive C<E<gt>From > quoting is recognizable to humans. =head2 mboxcl L</mboxo> with a C<Content-Length:> header, C<From > lines remain quoted to retain readability with L</mboxo> and L</mboxrd> MUAs. However, it is easy to corrupt these files when using tools which are not aware of C<Content-Length:> and write out updates as L</mboxo>. L<mutt(1)> will convert L</mboxo> and L</mboxrd> to mboxcl upon opening. See also: L<https://www.jwz.org/doc/content-length.html> =head2 mboxcl2 Like L</mboxcl>, but without C<From > any quoting. It is wholly incompatible with MUAs which only handle L</mboxo> and/or L</mboxrd>. This is format is generated by L<mutt(1)> when writing to a new mbox. =head1 MH Not yet supported, locking semantics (or lack thereof) appear to make it unsuitable for parallel access. It is widely-supported by a variety of MUAs and mailing list managers, however. =head1 MMDF Not yet supported, and it's unclear if current usage/support makes it worth supporting. =head1 IMAP Depending on the IMAP server software and configuration, IMAP servers may use any (or combination) of the aforementioned formats or a non-standard database backend. Currently, lei uses L<Mail::IMAPClient> which has acceptable performance over low-latency links. Performance over high-latency links is currently poor. =head1 eml A single raw message file. C<eml> is not an output format for lei, but accepted by as an C<--input-format> (C<-F>) for read-only commands such as L<lei-tag(1)> and L<lei-import(1)>. Since C<eml> is the suffix for the C<message/rfc822> MIME type (according to the C<mime.types> file), lei will infer the type based on the C<.eml> suffix if C<--input-format> is unspecified C<.patch>-suffixed files generated by L<git-format-patch(1)> (without C<--stdout>) are C<eml> files with the addition of an mbox C<From > header. L<lei(1)> removes C<From > lines to treat them as C<eml> when reading these for compatibility with C<git-am(1)> and similar tools. =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei(1)>, L<lei-q(1)>, L<lei-convert(1)>, L<lei-overview(7)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-mail-sync-overview.pod�����������������������������������������0000664�0000000�0000000�00000003250�14300314757�0023744�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei - an overview of lei mail synchronization =head1 DESCRIPTION L<lei(1)> provides several plumbing-level commands to synchronize mail and keywords (flags) between lei/store and existing IMAP and Maildir stores. Nothing documented in this manpage is required for day-to-day use against externals. Mail and keyword synchronization is currently a clunky process. Future work will be done to improve it and add IMAP IDLE support. =head1 TYPICAL WORKFLOW # import mail from a user's IMAP inbox and give it the "inbox" label: lei import +L:inbox imaps://user@example.com/INBOX # dump "inbox" labeled files from the past week to a Maildir lei q L:inbox rt:last.week.. -o /tmp/results # open /tmp/results in your favorite mail agent. If inotify or kevent # works, keyword changes (e.g. marking messages as `seen') are # synchronized automatically. # If the inotify queue overflows, or if lei-daemon crashes, # "lei index" will tell lei about keyword changes: lei index /tmp/results # Optional: cleanup stale entries from mail_sync.sqlite3 lei refresh-mail-sync /tmp/results # to export keyword changes back to IMAP lei export-kw imaps://user@example.com/INBOX =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-import(1)>, L<lei-q(1)>, L<lei-index(1)>, L<lei-refresh-mail-sync(1)>, L<lei-export-kw(1)> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-overview.pod���������������������������������������������������0000664�0000000�0000000�00000011513�14300314757�0022053�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei - an overview of lei =head1 DESCRIPTION L<lei(1)> is a local email interface for public-inbox and personal mail. This document provides some basic examples. =head1 LEI STORE lei has writable local storage based on L<public-inbox-v2-format(5)>. Commands will automatically initialize the store behind the scenes if needed, but you can call L<lei-init(1)> directly if you want to use a store location other than the default C<$XDG_DATA_HOME/lei/store>. The L<lei-import(1)> command provides the primary interface for importing messages into the local storage. In addition, other commands, such as L<lei-q(1)> and L<lei-blob(1)>, use the local store to memoize messages from remotes. =head2 EXAMPLES =over =item $ lei import mboxrd:t.mbox.gz Import the messages from a gzipped mboxrd into the local storage. =item $ lei blob 59ec517f9 Show message with the git blob OID of 59ec517f9. If a message with that OID isn't found, check if the current git repository has the blob, trying to reconstruct it from a message if needed. =item $ lei blob 59ec517f9 | lei tag -F eml +kw:flagged +L:next Set the "flagged" keyword and "next" label on the message with the blob OID of 59ec517f9. =back =head1 EXTERNALS In addition to the above store, lei can make read-only queries to "externals": inboxes and external indices. An external can be registered by passing a URL or local path to L<lei-add-external(1)>. For existing local paths, the external needs to be indexed with L<public-inbox-index(1)> (in the case of a regular inbox) or L<public-inbox-extindex(1)> (in the case of an external index). =head1 SYNCHRONIZATION lei currently has primitive mail synchronization abilities; see L<lei-mail-sync-overview(7)> for more details. =head2 EXAMPLES =over =item $ lei add-external https://public-inbox.org/meta/ Add a remote external for public-inbox's inbox. =item $ lei add-external --mirror https://public-inbox.org/meta/ path Clone L<https://public-inbox.org/meta/> to C<path>, index it with L<public-inbox-index(1)>, and add it as a local external. =back =head1 SEARCHING The L<lei-q(1)> command searches the local store and externals. The search prefixes match those available via L<public-inbox-httpd(1)>. =head2 EXAMPLES =over =item $ lei q s:lei s:skeleton Search for messages whose subject includes "lei" and "skeleton". =item $ lei q -t s:lei s:skeleton Do the same, but also report unmatched messages that are in the same thread as a matched message. =item $ lei q -t -o /tmp/mdir --mua=mutt s:lei s:skeleton Write results to a Maildir at "mdir". Mutt will be invoked to open mfolder (C<mutt -f %f>) while results are being fetched and written. =item $ lei q kw:flagged L:next Search for all flagged messages that also have a "next" label. =item $ lei p2q HEAD | lei q -tt -o /tmp/mdir Search for messages that have post-image git blob IDs that match those of the current repository's HEAD commit, writing them to the Maildir directory "mdir" and flagging the messages that were an exact match. =item $ git show -s HEAD | lei lcat Display a local message for the public-inbox link contained in a commit message. =item $ lei q -f text m:MESSAGE-ID | lei rediff -U5 Feed a message containing a diff to L<lei-rediff(1)> to regenerate its diff with five context lines. Unless C<--git-dir> is specified, this requires the current working directory to be within the associated code repository. =back =head1 PERFORMANCE NOTES L<Inline::C> is required, lei runs as a background daemon to reduce startup costs and can provide real-time L<kqueue(2)>/L<inotify(7)> Maildir monitoring. L<IO::KQueue> (p5-IO-KQueue on FreeBSD) and L<Linux::Inotify2> (liblinux-inotify2-perl and perl-Linux-Inotify2 in .deb and .rpm-based distros, respectively) are recommended. L<Socket::MsgHdr> is optional (libsocket-msghdr-perl in Debian), and further improves startup performance. Its effect is most felt when using shell completion. =head1 BASH COMPLETION Preliminary Bash completion for lei is provided in C<contrib/completion/>. Contributions adding support for other shells, as well as improvements to the existing Bash completion, are welcome. =head1 UPGRADING Since lei runs as a daemon, L<lei-daemon-kill(1)> is required to kill the daemon so it can load new code. It will be restarted with the next invocation of any lei command. =head1 CAVEATS IMAP and NNTP client performance is poor on high-latency connections. It will hopefully be fixed in 2022. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-mail-sync-overview(7)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-p2q.pod��������������������������������������������������������0000664�0000000�0000000�00000005346�14300314757�0020716�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-p2q - use a patch to generate a lei-q query =head1 SYNOPSIS lei p2q [OPTIONS] (FILE|COMMIT) lei p2q [OPTIONS] (--stdin|-) =head1 DESCRIPTION Given a patch, create a query that can be fed on stdin to L<lei-q(1)>. This is useful for mapping the patch to associated messages of an inbox. The patch can be provided on stdin or as a file. Alternatively, when an argument is given that does not point to an existing file, it is taken as a reference to a commit in the current git repository, and L<git-format-patch(1)> is used to generate the patch. =head1 OPTIONS =over =item -w PREFIX[,PREFIX] =item --want=PREFIX[,PREFIX] Search prefixes to use. C<dfpost> (post-image git blob ID) and C<dfn> (file names from the diff) are the most useful. Other available values are C<dfa>, C<dfb>, C<dfctx>, C<dfhh>, and C<dfpre>. =for comment TODO: Put a table of prefixes somewhere and reference that (at least here and in lei-q)? Appending an integer to C<dfpost> or C<dfpre> indicates a minimum ID length, and the generated query will be for that value up through the default abbreviation length. For example, if the repository's C<core.abbrev> is set to C<auto> and git calculates the default abbreviation length as 7, C<dfpost6> will expand a post-image blob ID of e7b4b32 (seven characters) into C<dfpost:e7b4b32 OR dfpost:e7b4b3>. This option may be given multiple times. Default: C<dfpost7> =item --stdin Read message from stdin. This is implicit if no arguments are given and stdin is a pipe or regular file. =item --debug Dump output that shows the information collected for every prefix. This information can be useful for seeing how a patch is processed, but the format should not be considered stable. =item --uri URI escape output for interacting with HTTP(S) public-inbox instances. =item -q =item --quiet Suppress feedback messages. =back =head1 EXAMPLES # to search for all threads which touch a given thread: lei p2q $COMMIT_OID | lei q -t -o /tmp/results # to view results on a remote HTTP(S) public-inbox instance $BROWSER https://example.com/pub-inbox/?q=$(lei p2q --uri $COMMIT_OID) # to view unapplied patches for a given $FILE from the past year: echo \( rt:last.year.. AND dfn:$FILE \) AND NOT \( \ $(git log -p --pretty=mboxrd --since=last.year $FILE | lei p2q -F mboxrd ) \) | lei q -o /tmp/unapplied =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-q(1)> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-q.pod����������������������������������������������������������0000664�0000000�0000000�00000017734�14300314757�0020460�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-q - search for messages matching terms =head1 SYNOPSIS lei q [OPTIONS] TERM [TERM...] lei q [OPTIONS] (--stdin|-) =head1 DESCRIPTION Search for messages across the lei/store and externals. =for comment TODO: Give common prefixes, or at least a description/reference. =head1 OPTIONS =for comment TODO: mention curl options? =over =item --stdin Read search terms from stdin. =item --no-save Do not save the search for L<lei-up(1)>. =item --output=MFOLDER =item -o MFOLDER =item --mfolder=MFOLDER Warning: this clobbers and overwrites the output destination unless L</--augment> is specified. Destination for results (e.g., C</tmp/results-Maildir>, C<imaps://user@mail.example.com/INBOX.test>, or C<mboxcl2:/tmp/results-mboxcl2>). The prefix may be a supported protocol: C<imap://> or C<imaps://>. URLs requiring authentication use L<git-credential(1)> to fill in the username and password. A prefix can specify the format of the output: C<maildir>, C<mboxrd>, C<mboxcl2>, C<mboxcl>, C<mboxo>. For a description of mail formats, see L<lei-mail-formats(5)>. C<maildir> is the default for an existing directory or non-existing path. Default: C<-> (stdout) =item --format=FORMAT =item -f FORMAT Format of results to stdout. This option exists as a convenient way to specify the format for the default stdout destination. C<reply>, C<text>, C<json>, C<jsonl>, or C<concatjson> are all supported, as are the various mbox variants described in L</--output>. When a format isn't specified, it's chosen based on the L</--output> destination or prefix. C<json> is used for the default destination (stdout). Using a C<format:> prefix with the C<--output> destination is preferred when not writing to stdout. =item --no-color Disable color (for C<-f reply> and C<-f text>). =item --pretty Pretty print C<json> or C<concatjson> output. If stdout is opened to a tty and used as the C<--output> destination, C<--pretty> is enabled by default. =item --mua=COMMAND A command to run on C<--output> Maildir or mbox (e.g., C<mutt -f %f>). For a subset of MUAs known to accept a mailbox via C<-f>, COMMAND can be abbreviated to the name of the program: C<mutt>, C<mailx>, C<mail>, or C<neomutt>. =item --alert=COMMAND[,COMMAND...] Run C<COMMAND> after writing to output. C<:WINCH> indicates to send C<SIGWINCH> to the C<--mua> process. C<:bell> indicates to print a bell code. Any other value is interpreted as a command to execute as is. This option may be given multiple times. Default: C<:WINCH,:bell> when C<--mua> is specified and C<--output> doesn't point to stdout, nothing otherwise. =item --augment =item -a Augment output destination instead of clobbering it. =item --no-import-before Do not import keywords before writing to an existing output destination. =item --threads =item -t Return all messages in the same thread as the actual match(es). Using this twice (C<-tt>) sets the C<flagged> (AKA "important") on messages which were actual matches. This is useful to distinguish messages which were direct hits from messages which were merely part of the same thread. TODO: Warning: this flag may become persistent and saved in lei/store unless an MUA unflags it! (Behavior undecided) =item --dedupe=STRATEGY =item -d STRATEGY Strategy for deduplicating messages: C<content>, C<oid>, C<mid>, or C<none>. Default: C<content> =for comment TODO: Provide description of strategies? =item --[no-]remote Whether to include results requiring network access. When local externals are configured, C<--remote> must be explicitly passed to enable reporting of results from remote externals. =item --no-local Limit operations to those requiring network access. =item --no-external Don't include results from externals. =item --include=LOCATION =item -I LOCATION Include specified external in search. This option may be given multiple times. =item --exclude=LOCATION Exclude specified external from search. This option may be given multiple times. =item --only=LOCATION =item -O LOCATION Use only the specified external for search. This option may be given multiple times, in which case the search uses only the specified set. =item --globoff =item -g Do not match locations using C<*?> wildcards and C<[]> ranges. This option applies to C<--include>, C<--exclude>, and C<--only>. =item --no-import-remote Disable the default behavior of memoizing remote messages into the local store. =item --lock=METHOD L<mbox(5)> locking method(s) to use: C<dotlock>, C<fcntl>, C<flock> or C<none>. Default: fcntl,dotlock =item --limit=NUMBER =item -NUMBER =item -n NUMBER Fuzzy limit the number of matches per-local external and lei/store. Messages added by the L<--threads> switch do not count towards this limit, and there is no limit on remote externals. Default: 10000 =item --offset=NUMBER Shift start of search results. Default: 0 =item --reverse =item -r Reverse the results. Note that this applies before C<--limit>. =item --sort=KEY =item -s KEY Order the results by KEY. Valid keys are C<received>, C<relevance>, and C<docid>. Default: C<received> =item --verbose =item -v Provide more feedback on stderr. =item --quiet =item -q Suppress feedback messages. =item --torsocks=auto|no|yes =item --no-torsocks Whether to wrap L<git(1)> and L<curl(1)> commands with L<torsocks(1)>. Default: C<auto> =item --proxy=PROTOCOL://HOST[:PORT] =back =head1 SEARCH TERMS C<lei q> supports the same search prefixes used by HTTP(S) public-inbox instances: =for comment AUTO-GENERATED-SEARCH-TERMS-BEGIN s: match within Subject e.g. s:"a quick brown fox" d: match date-time range, git "approxidate" formats supported Open-ended ranges such as `d:last.week..' and `d:..2.days.ago' are supported b: match within message body, including text attachments nq: match non-quoted text within message body q: match quoted text within message body n: match filename of attachment(s) t: match within the To header c: match within the Cc header f: match within the From header a: match within the To, Cc, and From headers tc: match within the To and Cc headers l: match contents of the List-Id header bs: match within the Subject and body dfn: match filename from diff dfa: match diff removed (-) lines dfb: match diff added (+) lines dfhh: match diff hunk header context (usually a function name) dfctx: match diff context lines dfpre: match pre-image git blob ID dfpost: match post-image git blob ID dfblob: match either pre or post-image git blob ID patchid: match `git patch-id --stable' output rt: match received time, like `d:' if sender's clock was correct =for comment AUTO-GENERATED-SEARCH-TERMS-END Additional search prefixes which only affect the local lei/store: L: match the given label kw: match the given keywords See L<lei-tag(1)> for more info on labels and keywords. Most prefixes are probabilistic, meaning they support stemming and wildcards (C<*>). Ranges (such as C<d:>) and boolean prefixes do not support stemming or wildcards. The upstream Xapian query parser documentation fully explains the query syntax: L<https://xapian.org/docs/queryparser.html> =head1 TIPS C<-f reply> is intended to aid in turning a cover letter into a reply (since using C<git format-patch --in-reply-to=...> is tedious). Results (including "From " lines) should be edited and trimmed in your favorite C<$EDITOR> before sending. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-add-external(1)>, L<lei-lcat(1)>, L<lei-up(1)>, L<Xapian::QueryParser Syntax|https://xapian.org/docs/queryparser.html> ������������������������������������public-inbox-1.9.0/Documentation/lei-rediff.pod�����������������������������������������������������0000664�0000000�0000000�00000006167�14300314757�0021455�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-rediff - regenerate a diff with different options =head1 SYNOPSIS lei rediff [OPTIONS] LOCATION [LOCATION...] lei rediff [OPTIONS] (--stdin|-) =head1 DESCRIPTION Read a message from C<LOCATION> or stdin and regenerate its diff with the specified L<git-diff(1)> options. This is useful if you want to change the display of the original patch (e.g., increasing context, coloring moved lines differently, or using an external diff viewer). It relies on the contents of the .git directory of your current project working tree. In other words, it works anywhere L<git-am(1)> works. Otherwise, C<--git-dir=> may be specified any number of times to add repositories to build blob data from. =head1 OPTIONS In addition to many L<git-diff(1)> options (e.g. C<-W>, C<-w>, C<-U $LINES>) the following options are supported: =over =item --stdin Read message from stdin. This is implicit if no arguments are given and stdin is a pipe or regular file. For users of text editors and pagers capable of piping its buffer to arbitrary commands, it is useful to pipe a patch email to C<lei rediff> before piping it to L<git-am(1)>. The output of C<lei rediff> is compatible with C<git am> if its input was a patch email. =item --drq[=COUNT] De-Re-Quote. De-quotes the input and re-quotes (the output). Removes COUNT levels of C<E<gt> > email reply prefixes and re-adds them upon regenerating the diff. This switch is intended as a convenience for running inside a pipe-capable text editor when writing replies to a patch email. Note: this may over-add C<E<gt> > prefixes if some input lines are missing C<E<gt> > prefixes. COUNT is 1 if unspecified; in other words, C<--drq=1> and C<--drq> are equivalent. It implies L</--quiet> unless L</--verbose> is specified since text editors tend to combine stderr with stdout. =item --dequote-only[=COUNT] Like L</--drq>, but does not re-add quote prefixes to the output. This can be useful for feeding a hunk to L<git-apply(1)> or L<patch(1)> while writing a reply or further processing by another diff viewer. Unlike L</--drq>, it does NOT imply L</--quiet>. =item --git-dir=DIR Specify an additional .git/ directory to scan. This option may be given multiple times. Default: the output of C<git rev-parse --git-dir> =item --no-cwd Do not look in the git repository of the current working directory. =item -q =item --quiet Suppress progress output. =item -v =item --verbose Provide more feedback on stderr. =back The options below, described in L<lei-q(1)>, are also supported. =over =item --[no-]remote =item --no-local =item --no-external =item --no-import-remote =item --torsocks=auto|no|yes, --no-torsocks =item --proxy=PROTOCOL://HOST[:PORT] =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-q(1)>, L<lei-blob(1)>, L<lei-p2q(1)> ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-refresh-mail-sync.pod������������������������������������������0000664�0000000�0000000�00000002423�14300314757�0023535�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-refresh-mail-sync - refresh sync info with Maildir, IMAP =head1 SYNOPSIS lei refresh-mail-sync --all[=<remote|local>] lei refresh-mail-sync MFOLDER [MFOLDER...] =head1 DESCRIPTION C<lei refresh-mail-sync> is intended to keep old messages indexed with L<lei-index(1)> retrievable if Maildir flags change a filename. It will prune invalid entries for messages which no longer exist in a Maildir. It is also useful for ensuring L<lei-export-kw(1)> can propagate keyword (flag) changes to Maildirs and IMAP folders. It only needs read-only access to Maildirs and IMAP folders and will not attempt to write to them at all. =head1 OPTIONS =over =item --all Refresh all local Maildirs and remote IMAP folders =item --all=local Refresh all local Maildirs =item --all=remote Refresh all remote IMAP folders =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-index(1)>, L<lei-export-kw(1)>, L<lei-ls-mail-sync(1)> ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-reindex.pod����������������������������������������������������0000664�0000000�0000000�00000002060�14300314757�0021640�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-reindex - reindex messages already in lei/store =head1 SYNOPSIS lei reindex [OPTIONS] =head1 DESCRIPTION Forces a re-index of all messages previously-indexed by L<lei-import(1)> or L<lei-index(1)>. This can be used for in-place upgrades and bugfixes while other processes are querying the store. Keep in mind this roughly doubles the size of the already-large Xapian database. It does not re-index messages in externals, using the C<--reindex> switch of L<public-inbox-index(1)> or L<public-inbox-extindex(1)> is needed for that. =head1 OPTIONS =over =item -q =item --quiet Suppress feedback messages. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-index(1)>, L<lei-import(1)> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-rm-watch.pod���������������������������������������������������0000664�0000000�0000000�00000001301�14300314757�0021721�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-rm-watch - stop watching locations =head1 SYNOPSIS lei rm-watch [OPTIONS] LOCATION [LOCATION...] =head1 DESCRIPTION Tell lei to stop watching C<LOCATION> for new messages and flag changes. Currently only Maildir locations are supported. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-add-watch(1)>, L<lei-ls-watch(1)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-rm.pod���������������������������������������������������������0000664�0000000�0000000�00000003145�14300314757�0020625�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-rm - unindex a message in lei/store =head1 SYNOPSIS lei rm [OPTIONS] (-|--stdin) lei rm [OPTIONS] LOCATION =head1 DESCRIPTION Removes message(s) and associated private metadata from lei/store indices. It does not affect messages stored in externals, so it's still possible to get "removed" messages from externals in L<lei-q> search results. This does not remove the message from underlying git storage nor does it remove messages from Maildir/mbox/IMAP/etc. sources. =head1 OPTIONS =over =item - =item --stdin Read input from standard input. This is the default if standard input is a pipe or regular file and there are no arguments on the command-line. =item -F MAIL_FORMAT =item --in-format=MAIL_FORMAT Message input format: C<eml>, C<mboxrd>, C<mboxcl2>, C<mboxcl>, or C<mboxo> when reading from stdin or using one of the mbox variants. Not necessary when using an IMAP URL, NNTP URL or Maildir. Default: C<eml> when reading from stdin or if the file suffix ends in C<.patch> or C<.eml>. =item --lock=METHOD L<mbox(5)> locking method(s) to use: C<dotlock>, C<fcntl>, C<flock> or C<none>. Default: fcntl,dotlock =item -q =item --quiet Suppress feedback messages. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-store-format(5)> ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-security.pod���������������������������������������������������0000664�0000000�0000000�00000012534�14300314757�0022060�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei - security information =head1 SYNOPSIS L<lei(1)> is intended for use with both publicly-archived and "private" mail in personal mailboxes. This document is intended to give an overview of security implications and lower^Wmanage user expectations. =head1 DESCRIPTION lei expects to be run as a regular user on a Unix-like system. It expects a case-sensitive filesystem with standard Unix permissions support. It does not use POSIX ACLs, extended attributes, nor any other security-related functions which require non-standard Perl modules. There is preliminary support for "virtual users", but it is incomplete and undocumented. =head1 INTERNAL FILES lei runs with a umask of 077 to prevent other users on the system from accessing each other's mail. The git storage and Xapian databases are located at C<$XDG_DATA_HOME/lei/store> (typically C<~/.local/share/lei/store>). Any personal mail imported will reside here, so this should be on an encrypted filesystem or block device. C<$XDG_RUNTIME_DIR/lei> (typically C</run/user/$UID/lei> or C</tmp/lei-$UID>) contain the socket used to access the lei daemon. It must only be accessible to the owner (mode 0700). C<$XDG_CACHE_HOME/lei> (typically C<~/.cache/lei>) will contain IMAP and Maildir folder names which could leak sensitive information as well as git repository names. C<$XDG_DATA_HOME/lei/saved-searches> (typically C<~/.local/share/lei/saved-searches>) will contain aforementioned folder names as well as (removable) search history. The configuration for lei resides at C<$XDG_CONFIG_HOME/lei/config> (typically C<~/.config/lei/config>). It may contain sensitive pathnames and hostnames in the config if a user chooses to configure them. lei itself will never write credentials to the filesystem. However, L<git-credential(1)> may be configured to do so. lei will only read C<~/.netrc> if C<--netrc> is used (and it will never write to C<~/.netrc>). C<$XDG_CACHE_HOME/public-inbox> (typically C<~/.cache/public-inbox>) can contain data and L<Inline::C>-built modules which can be shared with public-facing L<public-inbox-daemon(8)> instances; so no private data should be in "public-inbox" paths. =head1 EXTERNAL FILES Locations set by L<lei-add-external(1)> can be shared with public-facing L<public-inbox-daemon(8)> processes. They may reside on shared storage and may be made world-readable to other users on the local system. =head1 CORE DUMPS In case any process crashes, a core dumps may contain passwords or contents of sensitive messages. Please report these so they can be fixed (see L</CONTACT>). =head1 NETWORK ACCESS lei currently uses the L<curl(1)> and L<git(1)> executables in C<$PATH> for HTTP and HTTPS network access. Interactive authentication for HTTP and HTTPS is not-yet-supported since all currently supported HTTP/HTTPS sources are L<PublicInbox::WWW> instances. The L<Mail::IMAPClient> library is used for IMAP and IMAPS. L<Net::NNTP> (standard library) is used for NNTP and NNTPS. L<Mail::IMAPClient> and L<Net::NNTP> will use L<IO::Socket::SSL> for TLS if available. In turn, L<IO::Socket::SSL> uses the widely-installed OpenSSL library. STARTTLS will be attempted if advertised by the server unless IMAPS or NNTPS are used. C<-c imap.starttls=0> and C<-c nntp.startls=0> may be used to disable STARTTLS. L<IO::Socket::Socks> will be used if C<-c imap.proxy> or C<-c nntp.proxy> point to a C<socks5h://$HOST:$PORT> address (common for Tor). The C<--netrc> switch may be passed to curl and used for NNTP/IMAP access (via L<Net::Netrc>). =head1 CREDENTIAL DATA lei uses L<git-credential(1)> to prompt users for IMAP and NNTP usernames and passwords. These passwords are not encrypted in memory and get transferred across processes via anonymous UNIX sockets and pipes. They may be exposed via syscall tracing tools (e.g. L<strace(1)>), kernel and hardware bugs/attacks. While credentials are not written to the filesystem by default, it is possible for them to end up on disk if processes are swapped out. Use of an encrypted swap partition is recommended. =head1 AUTHENTICATION METHODS LOGIN (username + password) is known to work over IMAP(S), as does AUTH=ANONYMOUS (which is used by L<public-inbox-imapd(1)> as part of our test suite). AUTHINFO may work for NNTP, but is untested. Testers will be needed for other authentication methods. =head1 DENIAL-OF-SERVICE VECTORS lei uses the same MIME parsing library as L<public-inbox-mda(1)> with limits header sizes, parts, nesting and boundary limits similar to those found in SpamAssassin and postfix. Email address parsing is handled by L<Email::Address::XS> if available, but may fall back to regular expressions which favor speed and predictable execution times over correctness. =head1 ENCRYPTED EMAILS Not yet supported, but it should eventually be possible to configure decryption and indexing of encrypted messages and attachments. When supported, decrypted terms will be stored in Xapian DBs under C<$XDG_DATA_HOME/lei/store>. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-overview(7)>, L<lei(1)> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-store-format.pod�����������������������������������������������0000664�0000000�0000000�00000006267�14300314757�0022641�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������% public-inbox developer manual =head1 NAME lei-store-format - lei/store format description =head1 DESCRIPTION C<lei/store> is a hybrid store based on L<public-inbox-extindex-format(5)> ("extindex") combined with L<public-inbox-v2-format(5)> ("v2") for blob storage. While v2 is ideal for archiving a single public mailing list; it was never intended for personal mail nor storing multiple blobs of the "same" message. As with extindex, it can index disparate C<List-Id> headers belonging to the "same" message with different git blob OIDs. Unlike v2 and extindex, C<Message-ID> headers are NOT required; allowing unsent draft messages to be stored and indexed. =head1 DIRECTORY LAYOUT Blob storage exists in the form of v2-style epochs. These epochs are under the C<local/> directory (instead of C<git/>) to prevent them from being accidentally treated as a v2 inbox. =head2 INDEX OVERVIEW AND DEFINITIONS $EPOCH - Integer starting with 0 based on time $SCHEMA_VERSION - DB schema version (for Xapian) $SHARD - Integer starting with 0 based on parallelism ~/.local/share/lei/store - local/$EPOCH.git # normal bare git repositories - mail_sync.sqlite3 # sync state IMAP, Maildir, NNTP Additionally, the following share the same roles they do in extindex: - ei.lock # lock file to protect global state - ALL.git # empty, alternates for local/*.git - ei$SCHEMA_VERSION/$SHARD # per-shard Xapian DB - ei$SCHEMA_VERSION/over.sqlite3 # overview DB for WWW, IMAP - ei$SCHEMA_VERSION/misc # misc Xapian DB =head2 XREF3 DEDUPLICATION Index deduplication follows extindex, see L<public-inbox-extindex-format(5)/XREF3 DEDUPLICATION> for more information. =head2 BLOB DEDUPLICATION The contents of C<local/*.git> repos is deduplicated by git blob object IDs (currently SHA-1). This allows multiple copies of cross-posted and personally Cc-ed messages to be stored with different C<Received:>, C<X-Spam-Status:> and similar headers to allow troubleshooting. =head2 VOLATILE METADATA Keywords and label information (as described in RFC 8621 for JMAP) is stored in existing Xapian shards (C<ei$SCHEMA_VERSION/$SHARD>). It is possible to search for messages matching labels and keywords using C<L:> and C<kw:>, respectively. As with all data stored in Xapian indices, volatile metadata is associated with the Xapian document, thus it is shared across different blobs of the "same" message. =head2 mail_sync.sqlite3 This SQLite database maintained for bidirectional mapping of git blobs to IMAP UIDs, Maildir file names, and NNTP article numbers. It is also used for retrieving messages from Maildirs indexed by L<lei-index(1)>. =head1 IPC L<lei-daemon(8)> communicates with the C<lei/store> process using L<unix(7)> C<SOCK_SEQPACKET> sockets. =head1 CAVEATS Reindexing and synchronization is not yet supported. =head1 THANKS Thanks to the Linux Foundation for sponsoring the development and testing. =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-v2-format(5)>, L<public-inbox-extindex(5)> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-tag.pod��������������������������������������������������������0000664�0000000�0000000�00000004130�14300314757�0020755�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-tag - set/unset metadata on messages =head1 SYNOPSIS lei tag [OPTIONS] FILE [FILE...] METADATA [METADATA...] lei tag [OPTIONS] (-|--stdin) METADATA [METADATA...] =head1 DESCRIPTION Set or unset volatile metadata on messages. In JMAP terms, "volatile metadata" includes "mailboxes" (analogous to a folder or label) and a restricted set of "keywords". This supported keywords are the combination of system keywords (seen, answered, flagged, and draft), which map to Maildir flags and mbox Status/X-Status headers, as well as reserved keywords (forwarded, phishing, junk, and notjunk). To add a label or keyword, prefix it with "+L:" and "+kw:", respectively. To remove a label or keyword, use "-L:" or "-kw:". For example, "+kw:flagged" would set the "flagged" keyword for the specified messages, and "-L:INBOX" would remove the "INBOX" label. =head1 OPTIONS =over =item -F MAIL_FORMAT =item --in-format=MAIL_FORMAT Message input format: C<eml>, C<mboxrd>, C<mboxcl2>, C<mboxcl>, or C<mboxo>. Default: C<eml> =item -q =item --quiet Suppress feedback messages. =back =head1 LABELS Labels are user-defined values analogous to IMAP/JMAP mailbox names. They must only contain lowercase characters, digits, and a limited amount of punctuation (e.g. C<.>, C<->, C<@>). Messages may have multiple labels. =head1 KEYWORDS Keywords are "flags" in Maildir and IMAP terminology. Common keywords include: C<seen>, C<answered>, C<flagged>, and C<draft>, though C<forwarded>, C<phishing>, C<junk>, and C<notjunk> are also supported. When writing to various mboxes, the common keywords will be mapped to the C<Status> and C<X-Status> headers. Messages may have multiple keywords. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-add-external(1)> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei-up.pod���������������������������������������������������������0000664�0000000�0000000�00000004162�14300314757�0020633�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei-up - update a saved search =head1 SYNOPSIS lei up [OPTIONS] OUTPUT lei up [OPTIONS] --all[=<local|remote>] =head1 DESCRIPTION Update the saved search at C<OUTPUT> or all saved searches. =head1 OPTIONS =over =item --all[=<local|remote>] C<--all> updates all saved searches (listed in L<lei-ls-search(1)>). C<--all=local> only updates local mailboxes, C<--all=remote> only updates remote mailboxes (currently C<imap://> and C<imaps://>). =item --remote-fudge-time=INTERVAL Look for mail older than the time of the last successful query. Using a small interval will reduce bandwidth use. A larger interval reduces the likelihood of missing a result due to MTA delays or downtime. The time(s) of the last successful queries are the C<lastresult> values visible from L<lei-edit-search(1)>. Date formats understood by L<git-rev-parse(1)> may be used. e.g C<1.hour> or C<3.days> Default: 2.days =item --no-external =item --no-local =item --no-remote These disable the use of all externals, local externals, or remote externals respectively. They are useful during temporary network or mount-point outages. Unlike C<lei q>, these switches override the original C<lei q --only> options saved as C<lei.q.only>. The combination C<--all=remote --no-remote> is supported for offline use in case a user is updating an IMAP folder on localhost. =item --exclude=LOCATION As with L<lei-q(1)>, but may also exclude externals originally specified via C<lei q --only>. =item --lock=METHOD =item --alert=CMD =item --mua=CMD C<--lock>, C<--alert>, and C<--mua> are all supported and documented in L<lei-q(1)>. C<--mua> is incompatible with C<--all>. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-q(1)>, L<lei-ls-search(1)>, L<lei-edit-search(1)>, L<lei-forget-search(1)> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei.pod������������������������������������������������������������0000664�0000000�0000000�00000005650�14300314757�0020214�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME lei - local email interface =head1 SYNOPSIS lei [OPTIONS] COMMAND =head1 DESCRIPTION lei is a command-line tool for importing and searching email, regardless of whether it is from a personal mailbox or a public-inbox. lei supports a local, writable store built on top of L<public-inbox-v2-format(5)> and L<public-inbox-extindex(1)>. L<lei-q(1)> provides an interface for querying messages across the lei store and read-only local and remote "externals" (inboxes and external indices). Warning: lei is still in its early stages and may destroy mail. Be sure to have backups of destinations lei writes to. Available in public-inbox 1.7.0+. =head1 OPTIONS =over =item -c NAME=VALUE Override configuration C<NAME> to C<VALUE>. =item -C DIR Change current working directory to the specified directory before running the command. This option can be given before or after C<COMMAND> and is accepted by all lei subcommands except L<lei-daemon-kill(1)>. =back =head1 COMMANDS Subcommands for initializing and managing local, writable storage: =over =item * L<lei-init(1)> =item * L<lei-import(1)> =item * L<lei-tag(1)> =back The following subcommands can be used to manage and inspect external locations: =over =item * L<lei-add-external(1)> =item * L<lei-forget-external(1)> =item * L<lei-ls-external(1)> =back Subcommands related to searching and inspecting messages from the lei store and configured externals are =over =item * L<lei-blob(1)> =item * L<lei-config(1)> =item * L<lei-edit-search(1)> =item * L<lei-forget-search(1)> =item * L<lei-lcat(1)> =item * L<lei-ls-search(1)> =item * L<lei-p2q(1)> =item * L<lei-q(1)> =item * L<lei-rediff(1)> =item * L<lei-up(1)> =back Other subcommands include =over =item * L<lei-add-watch(1)> =item * L<lei-config(1)> =item * L<lei-convert(1)> =item * L<lei-daemon-kill(1)> =item * L<lei-daemon-pid(1)> =item * L<lei-forget-mail-sync(1)> =item * L<lei-mail-diff(1)> =item * L<lei-inspect(1)> =item * L<lei-ls-label(1)> =item * L<lei-ls-mail-source(1)> =item * L<lei-ls-mail-sync(1)> =item * L<lei-ls-watch(1)> =item * L<lei-rm-watch(1)> =back =head1 FILES By default storage is located at C<$XDG_DATA_HOME/lei/store>. The configuration for lei resides at C<$XDG_CONFIG_HOME/lei/config>. =head1 ERRORS Errors and dianostics for interactive commands are reported to stderr. Some errors for background tasks are emitted via L<syslog(3)> as L<lei-daemon(8)> for the top-level daemon, and C<lei/store> for the L<lei-store-format(5)> worker. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<lei-overview(7)>, L<lei-daemon(8)> ����������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/lei_design_notes.txt�����������������������������������������������0000664�0000000�0000000�00000002412�14300314757�0023003�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������lei design notes ---------------- Daemon architecture ------------------- The use of a persistent daemon works around slow startup time of Perl. This is especially important for built-in support for shell completion. It attempts to support inotify and EVFILT_VNODE background monitoring of Maildir keyword changes. If lei were reimplemented in a language with faster startup time, the daemon architecture would likely remain since it also lets us easily decouple the local storage from slow IMAP/NNTP backends and allow us to serialize writes to git-fast-import, SQLite, and Xapian across multiple processes. The coupling of IMAP and NNTP network latency to local storage is a current weakness of public-inbox-watch. Therefore, -watch will likely adopt the daemon architecture of lei in the future. Read/write vs read-only storage ------------------------------- public-inboxes are intended to be written and read by different Unix users. Commonly, a single Unix user or group will write to a public-inbox, but the inbox will be served by a user with read-only permissions (e.g. "www-data" or "nobody"). lei/store is intended to be read and written by a single user, thus we can rely on the Write-Ahead-Log journal of SQLite to improve performance: <https://sqlite.org/wal.html> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/marketing.txt������������������������������������������������������0000664�0000000�0000000�00000001665�14300314757�0021463�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������marketing guide for public-inbox TL; DR: Don't market this. If you must: don't be pushy and annoying about it. Slow down. Please no superlatives, hype or BS. It's online and public, so it already markets itself. Being informative is not a bad thing, being insistent is. Chances are, you're preaching to the choir; or the folks you're trying to convince are not ready for everything our project represents to the resistance against centralization. Baby steps... There's never a need for anybody to migrate to using our software, or to use any particular instance of it. It's designed to coexist with other mail archives, especially other installations of public-inbox. Most importantly, we take victories even when our software doesn't get adopted. Freedom from lock-in is more important than the adoption of any software. Every time somebody recognizes and rejects various forms of lock-in and centralization is already a victory for us. ���������������������������������������������������������������������������public-inbox-1.9.0/Documentation/mknews.perl��������������������������������������������������������0000775�0000000�0000000�00000011506�14300314757�0021127�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/perl -w # Copyright (C) 2019-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # Generates NEWS, NEWS.atom, and NEWS.html files using release emails # this uses unstable internal APIs of public-inbox, and this script # needs to be updated if they change. use strict; use PublicInbox::Eml; use PublicInbox::View; use PublicInbox::Hval qw(fmt_ts); use PublicInbox::MsgTime qw(msg_datestamp); use PublicInbox::MID qw(mids mid_escape); END { $INC{'Plack/Util.pm'} and warn "$0 should not have loaded Plack::Util\n" } my $dst = shift @ARGV or die "Usage: $0 <NEWS|NEWS.atom|NEWS.html>"; # newest to oldest my @releases = @ARGV; my $dir = 'Documentation/RelNotes'; my $base_url = 'https://public-inbox.org/meta'; my $html_url = 'https://public-inbox.org/NEWS.html'; my $atom_url = 'https://public-inbox.org/NEWS.atom'; my $addr = 'meta@public-inbox.org'; my $latest = shift(@releases) or die 'no releases?'; my $mtime; my $mime_latest = release2mime($latest, \$mtime); my $tmp = "$dst+"; my $out; if ($dst eq 'NEWS') { open $out, '>:encoding(utf8)', $tmp or die; mime2txt($out, $mime_latest); for my $v (@releases) { print $out "\n" or die; mime2txt($out, release2mime($v)); } } elsif ($dst eq 'NEWS.atom' || $dst eq 'NEWS.html') { open $out, '>', $tmp or die; my $ibx = My::MockObject->new( description => 'public-inbox releases', over => undef, search => 1, # for WwwStream::html_top base_url => "$base_url/", ); $ibx->{-primary_address} = $addr; my $ctx = { ibx => $ibx, -upfx => "$base_url/", -hr => 1, }; if ($dst eq 'NEWS.html') { html_start($out, $ctx); mime2html($out, $mime_latest, $ctx); while (defined(my $v = shift(@releases))) { mime2html($out, release2mime($v), $ctx); } html_end($out, $ctx); } elsif ($dst eq 'NEWS.atom') { my $astream = atom_start($out, $ctx, $mtime); for my $v (reverse(@releases)) { mime2atom($out, $astream, release2mime($v), $ctx); } mime2atom($out, $astream, $mime_latest, $ctx); print $out '</feed>' or die; } else { die "BUG: Unrecognized $dst\n"; } } else { die "Unrecognized $dst\n"; } close($out) or die; utime($mtime, $mtime, $tmp) or die; rename($tmp, $dst) or die; exit 0; sub release2mime { my ($release, $mtime_ref) = @_; my $f = "$dir/$release.eml"; open(my $fh, '<', $f) or die "open($f): $!"; my $mime = PublicInbox::Eml->new(\(do { local $/; <$fh> })); # Documentation/include.mk relies on mtimes of each .eml file # to trigger rebuild, so make sure we sync the mtime to the Date: # header in the .eml my $mtime = msg_datestamp($mime->header_obj); utime($mtime, $mtime, $fh) or warn "futimes $f: $!"; $$mtime_ref = $mtime if $mtime_ref; $mime; } sub mime2txt { my ($out, $mime) = @_; my $title = $mime->header('Subject'); $title =~ s/^\s*\[\w+\]\s*//g; # [ANNOUNCE] or [ANN] my $dtime = msg_datestamp($mime->header_obj); $title .= ' - ' . fmt_ts($dtime) . ' UTC'; print $out $title, "\n" or die; my $uline = '=' x length($title); print $out $uline, "\n\n" or die; my $mid = mids($mime)->[0]; print $out 'Link: ', $base_url, '/', mid_escape($mid), "/\n\n" or die; print $out $mime->body_str or die; } sub mime2html { my ($out, $eml, $ctx) = @_; my $smsg = $ctx->{smsg} = bless {}, 'PublicInbox::Smsg'; $smsg->populate($eml); $ctx->{msgs} = [ 1 ]; # for <hr> in eml_entry print $out PublicInbox::View::eml_entry($ctx, $eml) or die; } sub html_start { my ($out, $ctx) = @_; require PublicInbox::WwwStream; $ctx->{www} = My::MockObject->new(style => ''); my $www_stream = PublicInbox::WwwStream::init($ctx); print $out $www_stream->html_top, '<pre>' or die; } sub html_end { for (@$PublicInbox::WwwStream::CODE_URL) { print $out " git clone $_\n" or die; } print $out "</pre></body></html>\n" or die; } sub atom_start { my ($out, $ctx, $mtime) = @_; require PublicInbox::WwwAtomStream; # WwwAtomStream stats this dir for mtime my $astream = PublicInbox::WwwAtomStream->new($ctx); delete $astream->{emit_header}; my $ibx = $ctx->{ibx}; my $title = PublicInbox::WwwAtomStream::title_tag($ibx->description); my $updated = PublicInbox::WwwAtomStream::feed_updated($mtime); print $out <<EOF or die; <?xml version="1.0" encoding="us-ascii"?> <feed xmlns="http://www.w3.org/2005/Atom" xmlns:thr="http://purl.org/syndication/thread/1.0">$title<link rel="alternate" type="text/html" href="$html_url"/><link rel="self" href="$atom_url"/><id>$atom_url</id>$updated EOF $astream; } sub mime2atom { my ($out, $astream, $eml, $ctx) = @_; my $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->populate($eml); if (defined(my $str = $astream->feed_entry($smsg, $eml))) { print $out $str or die; } } package My::MockObject; use strict; our $AUTOLOAD; sub new { my ($class, %values) = @_; bless \%values, $class; } sub AUTOLOAD { my ($self) = @_; my $attr = (split(/::/, $AUTOLOAD))[-1]; $self->{$attr}; } 1; ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-clone.pod���������������������������������������������0000664�0000000�0000000�00000004635�14300314757�0023136�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-clone - "git clone --mirror" wrapper =head1 SYNOPSIS public-inbox-clone INBOX_URL [INBOX_DIR] =head1 DESCRIPTION public-inbox-clone is a wrapper around C<git clone --mirror> for making the initial clone of a remote HTTP(S) public-inbox. It allows cloning multi-epoch v2 inboxes with a single command and zero configuration. It does not run L<public-inbox-init(1)> nor L<public-inbox-index(1)>. Those commands must be run separately if serving/searching the mirror is required. As-is, public-inbox-clone is suitable for creating a git-only backup. public-inbox-clone creates a Makefile with handy targets to update the inbox once indexed. This Makefile may be edited by the user; it will not be rewritten by L<public-inbox-fetch(1)> unless it is removed completely. public-inbox-clone does not use nor require any extra configuration files (not even C<~/.public-inbox/config>). L<public-inbox-fetch(1)> may be used to keep C<INBOX_DIR> up-to-date. For v2 inboxes, it will create a C<$INBOX_DIR/manifest.js.gz> file to speed up subsequent L<public-inbox-fetch(1)>. =head1 OPTIONS =over =item --epoch=RANGE Restrict clones of L<public-inbox-v2-format(5)> inboxes to the given range of epochs. The range may be a single non-negative integer or a (possibly open-ended) C<LOW..HIGH> range of non-negative integers. C<~> may be prefixed to either (or both) integer values to represent the offset from the maximum possible value. For example, C<--epoch=~0> alone clones only the latest epoch, C<--epoch=~2..> clones the three latest epochs. Default: C<0..~0> or C<0..> or C<..~0> (all epochs, all three examples are equivalent) =item -q =item --quiet Quiets down progress messages, also passed to L<git-fetch(1)>. =item -v =item --verbose Increases verbosity, also passed to L<git-fetch(1)>. =item --torsocks=auto|no|yes =item --no-torsocks Whether to wrap L<git(1)> and L<curl(1)> commands with L<torsocks(1)>. Default: C<auto> =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-fetch(1)>, L<public-inbox-init(1)>, L<public-inbox-index(1)> ���������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-compact.pod�������������������������������������������0000664�0000000�0000000�00000003417�14300314757�0023461�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-compact - compact Xapian DBs in an inbox =head1 SYNOPSIS public-inbox-compact INBOX_DIR public-inbox-compact --all =head1 DESCRIPTION public-inbox-compact is a wrapper for L<xapian-compact(1)> which locks the inbox and prevents other processes such as L<public-inbox-watch(1)> or L<public-inbox-mda(1)> from writing while it operates. It enforces the use of the C<--no-renumber> option of L<xapian-compact(1)> which is required to work with the rest of the public-inbox search code. This command is rarely needed for active inboxes. Using the C<--compact> option of L<public-inbox-index(1)> is recommended, instead, and only when doing a C<--reindex>. =head1 OPTIONS =over =item --all Compact all inboxes configured in ~/.public-inbox/config. This is an alternative to specifying individual inboxes directories on the command-line. =item --blocksize =item --no-full =item --fuller These options are passed directly to L<xapian-compact(1)>. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =item XAPIAN_FLUSH_THRESHOLD The number of documents to update before committing changes to disk. This environment is handled directly by Xapian, refer to Xapian API documentation for more details. Default: 10000 =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2018-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<xapian-compact(1)>, L<public-inbox-index(1)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-config.pod��������������������������������������������0000664�0000000�0000000�00000031766�14300314757�0023310�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-config - public-inbox config file description =head1 SYNOPSIS ~/.public-inbox/config =head1 DESCRIPTION The public-inbox config file is parseable by L<git-config(1)>. This is a global configuration file for mapping/discovering all public-inboxes used by a particular user. =head1 CONFIGURATION FILE =head2 EXAMPLE [publicinbox "test"] inboxdir = /home/user/path/to/test.git ; multiple addresses are supported address = test@example.com ; address = alternate@example.com url = http://example.com/test newsgroup = inbox.test ; backwards compatibility with public-inbox pre-1.2.0, ; "inboxdir" takes precedence over "mainrepo" mainrepo = /home/user/path/to/test.git =head2 VARIABLES =over 8 =item publicinbox.<name>.address The email address of the public-inbox. May be specified more than once for merging multiple mailing lists (or migrating to new addresses). This must be specified at least once, the first value will be considered the primary address for informational purposes. Default: none, required =item publicinbox.<name>.inboxdir The absolute path to the directory which hosts the public-inbox. This must be specified once. This was previously known as "mainrepo", which remains supported, but "inboxdir" takes precedence. Default: none, required =item publicinbox.<name>.url The primary URL for hosting the HTTP/HTTPS archives. Additional HTTP/HTTPS URLs may be specified via C<$GIT_DIR/cloneurl> as documented in L<gitweb(1)> Default: none, optional =item publicinbox.<name>.newsgroup The NNTP group name for use with L<public-inbox-nntpd(1)>. This may be any newsgroup name with hierarchies delimited by C<.>. For example, the newsgroup for L<mailto:meta@public-inbox.org> is: C<inbox.comp.mail.public-inbox.meta> It also configures the folder hierarchy used by L<public-inbox-imapd(1)> as well as L<public-inbox-pop3d(1)> Omitting this for a given inbox will prevent the inbox from being served by L<public-inbox-nntpd(1)>, L<public-inbox-imapd(1)>, and/or L<public-inbox-pop3d(1)> Default: none, optional =item publicinbox.<name>.watch See L<public-inbox-watch(1)> =item publicinbox.<name>.watchheader See L<public-inbox-watch(1)> =item publicinbox.<name>.listid The L<rfc2919|https://tools.ietf.org/html/rfc2919> header without angle brackets for L<public-inbox-mda(1)> deliveries and L<public-inbox-watch(1)>. For public-inbox-watch users, this is a shortcut for specifying C<publicinbox.$NAME.watchheader=List-Id:E<lt>foo.example.comE<gt>> For public-inbox-mda users, this may be used to avoid recipient matching via C<ORIGINAL_RECIPIENT> environment variable. This may be specified multiple times for merging multiple mailing lists into a single public-inbox, only one C<List-Id> header needs to match. Default: none =item publicinbox.<name>.imapmirror This may be the full IMAP URL of an independently-run IMAP mirror. Default: none =item publicinbox.<name>.nntpmirror This may be the full NNTP URL of an independently-run mirror. For example, the https://public-inbox.org/meta/ inbox is mirrored by Gmane at C<nntp://news.gmane.io/gmane.mail.public-inbox.general> Default: none =item publicinbox.<name>.indexlevel The indexing level for L<public-inbox-index(1)> C<basic> only requires L<DBD::SQLite(3pm)> and provides all NNTP functionality along with thread-awareness in the WWW interface. C<medium> requires L<Search::Xapian(3pm)> to provide full-text term search functionality in the WWW UI. C<full> also includes positional information used by Xapian to allow for searching for phrases using quoted text. (e.g. C<"looking for a complete sentence">) Default: C<full> =item publicinbox.<name>.boost Control indexing order for L<public-inbox-extindex(1)>, with ties broken by config file order. This only affects indexing and does not affect messages which are already indexed. Default: C<0> =item publicinbox.<name>.indexSequentialShard See L<public-inbox-index(1)/publicInbox.indexSequentialShard> =item publicinbox.<name>.httpbackendmax If a digit, the maximum number of parallel L<git-http-backend(1)> processes to allow for cloning this particular inbox. If an alphanumeric value starting with a lowercase alphabetic character is specified, the inbox will use a L</NAMED LIMITER> which can be shared by multiple inboxes. Default: 32 (using a default limiter shared by all inboxes) =item publicinbox.<name>.coderepo The nickname of a "coderepo" section associated with the inbox. May be specified more than once for M:N mapping of code repos to inboxes. If enabled, diff hunk headers in patch emails will link to the line numbers of blobs. Default: none =item publicinbox.<name>.replyto May be used to control how reply instructions in the PSGI interface are displayed. ":none=dead inbox" may be specified to denote an inactive list ("dead inbox" may be replaced with another phrase). A list of comma-delimited email addresses may be specified. This can be useful for dedicated inboxes for bot emails, but discussion happens on a separate mailing list/inbox. Mirrors of existing centralized mailing lists may use ":list" here to redirect mail only to the configured inbox address. The use of ":list" is discouraged for new mailing lists, as it leads to centralization. Default: :all =item publicinbox.css The local path name of a CSS file for the PSGI web interface. May contain the attributes "media", "title" and "href" which match the associated attributes of the HTML <style> tag. "href" may be specified to point to the URL of an remote CSS file and the path may be "/dev/null" or any empty file. Multiple files may be specified and will be included in the order specified. =item publicinboxmda.spamcheck This may be set to C<none> to disable the use of SpamAssassin L<spamc(1)> for filtering spam before it is imported into git history. Other spam filtering backends may be supported in the future. Default: spamc =item publicinboxwatch.spamcheck See L<public-inbox-watch(1)> =item publicinboxwatch.watchspam See L<public-inbox-watch(1)> =item publicinbox.imapserver Set this to point to the hostname(s) of the L<public-inbox-imapd(1)> instance. This is used to advertise the existence of the IMAP endpoint in the L<PublicInbox::WWW> HTML interface. Default: none =item publicinbox.nntpserver Same as C<publicinbox.imapserver>, but for the hostname(s) of the L<public-inbox-nntpd(1)> instance. Default: none =item publicinbox.pop3server Same as C<publicinbox.imapserver>, but for the hostname(s) of the L<public-inbox-pop3d(1)> instance. Default: none =item publicinbox.pop3state See L<public-inbox-pop3d(1)/publicinbox.pop3state> =item publicinbox.<name>.feedmax The size of an Atom feed for the inbox. If specified more than once, only the last value is used. Invalid values (<= 0) will be treated as the default value. Default: 25 =item publicinbox.<name>.hide A comma-delimited list of listings to hide the inbox from. Valid values are currently C<www> and C<manifest>. Default: none =item coderepo.<nick>.dir The path to a git repository for "publicinbox.<name>.coderepo" =item coderepo.<nick>.cgitUrl The URL of the cgit instance associated with the coderepo. Default: none =item publicinbox.cgitrc A path to a L<cgitrc(5)> file. "repo.url" directives in the cgitrc will be mapped to the nickname of a coderepo (without trailing slash), and "repo.path" directives map to "coderepo.<nick>.dir". Use of this directive allows admins of existing cgit installations to skip declaring coderepo sections and map inboxes directly to code repositories known to cgit. Macro expansion (e.g. C<$HTTP_HOST>) is not yet supported. =item publicinbox.cgitbin A path to the C<cgit.cgi> executable. The L<PublicInbox::WWW> interface can spawn cgit as a fallback if the publicinbox.cgitrc directive is configured. Default: /var/www/htdocs/cgit/cgit.cgi or /usr/lib/cgit/cgit.cgi =item publicinbox.cgitdata A path to the data directory used by cgit for storing static files. Typically guessed based the location of C<cgit.cgi> (from C<publicinbox.cgitbin>, but may be overridden. Default: basename of C<publicinbox.cgitbin>, /var/www/htdocs/cgit/ or /usr/share/cgit/ =item publicinbox.mailEditor See L<public-inbox-edit(1)> =item publicinbox.indexMaxSize =item publicinbox.indexBatchSize =item publicinbox.indexSequentialShard See L<public-inbox-index(1)> =item publicinbox.wwwlisting Enable a HTML listing style when the root path of the URL '/' is accessed. Valid values are: =over 8 =item * all - Show all inboxes =item * 404 - Return a 404 page. This is useful to allow customization with L<Plack::App::Cascade(3pm)> =item * match=domain - Only show inboxes with URLs which belong to the domain of the HTTP request =for comment TODO support showing cgit listing =back Default: C<404> =item publicinbox.grokmanifest Controls the generation of a grokmirror-compatible gzipped JSON file at the top-level of the PSGI interface. You generally do not need to change this from the default. Valid values are: =over 8 =item * match=domain - Only include inboxes with URLs which belong to the domain of the HTTP request. This is compatible with virtual hosting where several domains come from the same host. =item * all - All inboxes are present in C<manifest.js.gz>, regardless of domain. Only use this if you're serving HTTP requests in a domain-agnostic manner. =item * 404 - C<manifest.js.gz> will only contain an empty JSON array. This does NOT affect C<$INBOX_URL/manifest.js.gz>, which will always contain all git repos used by the inbox at C<$INBOX_URL> =back Default: C<match=domain> =item publicinbox.<name>.obfuscate Whether to obfuscate email addresses in the L<PublicInbox::WWW> HTML interface. Default: false =item publicinbox.noObfuscate A space-delimited list of well-known addresses and domains that should not be obfuscated when C<publicinbox.$NAME.obfuscate> is true (e.g., C<public@example.com> and C<@example.com>). This may be specified more than once, in which case the values are merged. Default: none =item extindex.<name>.topdir The directory of an external index. See L<public-inbox-extindex(1)> for more details. =item extindex.<name>.url Identical to L</publicinbox.E<lt>nameE<gt>.url>, but for external indices =item extindex.<name>.coderepo Identical to L</publicinbox.E<lt>nameE<gt>.coderepo>, but for external indices. Code repos may be freely associated with any number of public inboxes and external indices. =back =head2 NAMED LIMITER (PSGI) Named limiters are useful for preventing large inboxes from monopolizing (or overloading) the server. Since serving git clones (via L<git-http-backend(1)> can be memory-intensive for large inboxes, it makes sense to put large inboxes on a named limiter with a low max value; while smaller inboxes can use the default limiter. C<RLIMIT_*> keys may be set to enforce resource limits for a particular limiter (L<BSD::Resource(3pm)> is required). Default named-limiters are prefixed with "-". Currently, the "-cgit" named limiter is reserved for instances spawning cgit via C<publicinbox.cgitrc> =over 8 =item publicinboxlimiter.<name>.max The maximum number of parallel processes for the given limiter. =item publicinboxlimiter.<name>.rlimitCore =item publicinboxlimiter.<name>.rlimitCPU =item publicinboxlimiter.<name>.rlimitData The maximum core size, CPU time, or data size processes run with the given limiter will use. This may be comma-separated to distinguish soft and hard limits. The word "INFINITY" is accepted as the RLIM_INFINITY constant (if supported by your OS). See L<setrlimit(2)> for more info on the behavior of RLIMIT_CORE, RLIMIT_CPU, and RLIMIT_DATA for you operating system. =back =head3 EXAMPLE WITH NAMED LIMITERS ; big inboxes which require lots of memory to clone: [publicinbox "big1"] inboxdir = /path/to/big1 address = big1@example.com httpbackendmax = big [publicinbox "big2"] inboxdir = /path/to/big2 address = big2@example.com httpbackendmax = big ; tiny inboxes which are easily cloned: [publicinbox "tiny1"] inboxdir = /path/to/tiny1 address = tiny1@example.com [publicinbox "tiny2"] inboxdir = /path/to/tiny2 address = tiny2@example.com [publicinboxlimiter "big"] max = 4 In the above example, the "big1" and "big2" are limited to four parallel L<git-http-backend(1)> processes between them. However, "tiny1" and "tiny2" will share the default limiter which means there can be 32 L<git-http-backend(1)> processes between them. =head1 ENVIRONMENT =over 8 =item PI_CONFIG Used to override the default "~/.public-inbox/config" value. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>, L<public-inbox-mda(1)>, L<public-inbox-watch(1)>, L<grokmirror|https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git> ����������public-inbox-1.9.0/Documentation/public-inbox-convert.pod�������������������������������������������0000664�0000000�0000000�00000005042�14300314757�0023507�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-convert - convert v1 inboxes to v2 =head1 SYNOPSIS public-inbox-convert [OPTIONS] OLD_DIR NEW_DIR =head1 DESCRIPTION public-inbox-convert copies the contents of an old "v1" inbox into a new "v2" inbox. It makes no changes to the old inbox and users are expected to update the "inboxdir" path in L<public-inbox-config(5)> to point to the path of NEW_DIR once they are satisfied with the conversion. =head1 OPTIONS =over =item --no-index Disables Xapian and overview DB indexing on the new inbox. By default, public-inbox-convert creates a new index in the v2 inbox and indexes all existing messages, a lengthy operation for large inboxes. =item -j JOBS =item --jobs=JOBS Control the number of indexing jobs and Xapian shards of the v2 inbox. By default, this is the detected CPU count but capped at 4 due to various bottlenecks. The number of Xapian shards will be 1 less than the JOBS value, since there is a single process which distributes work to the Xapian shards. =item -L LEVEL, --index-level=LEVEL =item -c, --compact =item -v, --verbose =item --no-fsync =item --sequential-shard =item --batch-size=BYTES =item --max-size=BYTES These options affect indexing. They have no effect if L</--no-index> is specified See L<public-inbox-index(1)> for a description of these options. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =back =head1 UPGRADING Editing "~/.public-inbox/config" (or whatever C<PI_CONFIG> is set to) will be required to start using the new directory. =head1 BUGS Writes from L<public-inbox-mda(1)> or L<git-fetch(1)> to the v1 inbox which occur after the start of the conversion will not be picked up in the v2 inbox. Users of L<public-inbox-watch(1)> do not have to worry about this. They only need to update the config file to point to the v2 inbox, send C<SIGHUP> public-inbox-watch process to reload the config file, and then C<SIGUSR1> to rescan existing Maildirs. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-init(1)>, L<public-inbox-index(1)>, L<public-inbox-config(5)>, L<public-inbox-v1-format(5)>, L<public-inbox-v2-format(5)> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-daemon.pod��������������������������������������������0000664�0000000�0000000�00000014531�14300314757�0023275�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-daemon - common usage for public-inbox network daemons =head1 SYNOPSIS public-inbox-netd public-inbox-httpd public-inbox-imapd public-inbox-nntpd public-inbox-pop3d =head1 DESCRIPTION This manual describes common options and behavior for public-inbox network daemons. Network daemons for public-inbox provide read-only IMAP, HTTP, NNTP and POP3 access to public-inboxes. Write access to a public-inbox will never be required to run these. These daemons are implemented with a common core using non-blocking sockets and optimized for fairness; even with thousands of connected clients over slow links. They also provide graceful shutdown/upgrade support to avoid breaking existing connections during software upgrades. These daemons may also utilize multiple pre-forked worker processes to take advantage of multiple CPUs. =head1 OPTIONS =over =item -l [PROTOCOL://]ADDRESS[?opt1=val1,opt2=val2] =item --listen [PROTOCOL://]ADDRESS[?opt1=val1,opt2=val2] This takes an absolute path to a Unix socket or HOST:PORT to listen on. For example, to listen to TCP connections on port 119, use: C<-l 0.0.0.0:119>. This may also point to a Unix socket (C<-l /path/to/http.sock>) for a reverse proxy like L<nginx(8)> to use. May be specified multiple times to allow listening on multiple sockets. Unless per-listener options are used (required for L<public-inbox-netd(1)>), this does not need to be specified at all if relying on L<systemd.socket(5)> or similar, Per-listener options may be specified after C<?> as C<KEY=VALUE> pairs delimited by C<,>. See L<public-inbox-netd(1)> for documentation on the C<cert=>, C<key=>, C<env.NAME=VALUE>, C<out=>, C<err=>, and C<psgi=> options available. Default: server-dependent unless socket activation is used with L<systemd(1)> or similar (see L<systemd.socket(5)>). =item -1 =item --stdout PATH Specify an appendable path to redirect stdout descriptor (1) to. Using this is preferable to setting up the redirect externally (e.g. E<gt>E<gt>/path/to/log in shell) since it allows SIGUSR1 to be handled (see L<SIGNALS/SIGNALS> below). C<out=> may also be specified on a per-listener basis. Default: /dev/null with C<--daemonize>, inherited otherwise =item -2 PATH =item --stderr PATH Like C<--stdout>, but for the stderr descriptor (2). C<err=> may also be specified on a per-listener basis. Default: /dev/null with C<--daemonize>, inherited otherwise =item -W =item --worker-processes Set the number of worker processes. Normally, this should match the number of CPUs on the system to take full advantage of the hardware. However, users of memory-constrained systems may want to lower this. Setting this to zero (C<-W0>) disables the master/worker split; saving some memory but removing the ability to use SIGTTIN to increase worker processes or have the worker restarted by the master on crashes. Default: 1 =item --cert /path/to/cert The default TLS certificate for HTTPS, IMAPS, NNTPS, POP3S and/or STARTTLS support if the C<cert> option is not given with C<--listen>. Well-known TCP ports automatically get TLS or STARTTLS support If using systemd-compatible socket activation and a TCP listener on port well-known ports (563 is inherited, it is automatically NNTPS when this option is given. When a listener on port 119 is inherited and this option is given, it automatically gets STARTTLS support. =item --key /path/to/key The default TLS certificate key for the default C<--cert> or per-listener C<cert=> option. The private key may be concatenated into the path used by the cert, in which case this option is not needed. =back =head1 SIGNALS Most of our signal handling behavior is copied from L<nginx(8)> and/or L<starman(1)>; so it is possible to reuse common scripts for managing them. =over 8 =item SIGUSR1 Reopens log files pointed to by --stdout and --stderr options. =item SIGUSR2 Spawn a new process with the intention to replace the running one. See L</UPGRADING> below. =item SIGHUP Reload config files associated with the process. (Note: broken for L<public-inbox-httpd(1)> only in E<lt>= 1.6) =item SIGTTIN Increase the number of running workers processes by one. =item SIGTTOU Decrease the number of running worker processes by one. =item SIGWINCH Stop all running worker processes. SIGHUP or SIGTTIN may be used to restart workers. =item SIGQUIT Gracefully terminate the running process. =back SIGTTOU, SIGTTIN, SIGWINCH all have no effect when worker processes are disabled with C<-W0> on the command-line. =head1 ENVIRONMENT =over 8 =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =item LISTEN_FDS, LISTEN_PID Used by systemd (and compatible) installations for socket activation. See L<systemd.socket(5)> and L<sd_listen_fds(3)>. =item PERL_INLINE_DIRECTORY Pointing this to point to a writable directory enables the use of L<Inline> and L<Inline::C> extensions which may provide platform-specific performance improvements. Currently, this enables the use of L<vfork(2)> which speeds up subprocess spawning with the Linux kernel. public-inbox will never enable L<Inline::C> automatically without this environment variable set or C<~/.cache/public-inbox/inline-c> created by a user. See L<Inline> and L<Inline::C> for more details. =back =head1 UPGRADING There are two ways to upgrade a running process. Users of process management systems with socket activation (L<systemd(1)> or similar) may rely on multiple instances For systemd, this means using two (or more) '@' instances for each service (e.g. C<SERVICENAME@INSTANCE>) as documented in L<systemd.unit(5)>. Users of traditional SysV init may use SIGUSR2 to spawn a replacement process and gracefully terminate the old process using SIGQUIT. In either case, the old process will not truncate running responses; so responses to expensive requests do not get interrupted and lost. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-httpd(1)>, L<public-inbox-imapd(1)>, L<public-inbox-nntpd(1)>, L<public-inbox-pop3d(1)>, L<public-inbox-netd(1)> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-edit.pod����������������������������������������������0000664�0000000�0000000�00000006323�14300314757�0022757�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-edit - destructively edit messages in a public inbox =head1 SYNOPSIS public-inbox-edit -m MESSAGE-ID --all|INBOX_DIR public-inbox-edit -F RAW_FILE --all|INBOX_DIR [.. INBOX_DIR] =head1 DESCRIPTION public-inbox-edit allows editing messages in a given inbox to remove sensitive information. It is only intended as a last resort, as it will cause discontiguous git history and draw more attention to the sensitive data in mirrors. =head1 OPTIONS =over =item --all Edit the message in all inboxes configured in ~/.public-inbox/config. This is an alternative to specifying individual inboxes directories on the command-line. =item -m MESSAGE-ID Edits the message corresponding to the given C<MESSAGE-ID>. If the C<MESSAGE-ID> is ambiguous, C<--force> or using the C<--file> of the original will be required. =item -F FILE Edits the message corresponding to the Message-ID: header and content given in C<FILE>. This requires the unmodified raw message, and the contents of C<FILE> will not itself be modified. This is useful if a Message-ID is ambiguous due to filtering/munging rules or other edits. =item --force Forcibly perform the edit even if Message-ID is ambiguous. =item --raw Do not perform "From " line escaping. By default, this generates a mboxrd variant file to detect unpurged messages in the new mbox. This makes sense if your configured C<publicinbox.mailEditor> is a regular editor and not something like C<mutt -f> =back =head1 CONFIGURATION =over 8 =item publicinbox.mailEditor The command to perform the edit with. An example of this would be C<mutt -f>, and the user would then use the facilities in L<mutt(1)> to edit the mail. This is useful for editing attachments or Base64-encoded emails which are more difficult to edit with a normal editor (configured via C<GIT_EDITOR>, C<VISUAL> or C<EDITOR>). Default: none =back =head1 ENVIRONMENT =over 8 =item GIT_EDITOR / VISUAL / EDITOR =for comment MAIL_EDITOR is undocumented (unstable, don't want naming conflicts) public-inbox-edit will fall back to using one of these variables (in that order) if C<publicinbox.mailEditor> is unset. =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =back =head1 LIMITATIONS Only L<v2|public-inbox-v2-format(5)> repositories are supported. This is safe to run while normal inbox writing tools (L<public-inbox-mda(1)>, L<public-inbox-watch(1)>, L<public-inbox-learn(1)>) are active. Running this in parallel with L<public-inbox-xcpdb(1)> or C<"public-inbox-index --reindex"> can lead to errors or edited data remaining indexed. Incremental L<public-inbox-index(1)> (without C<--reindex>) is fine. Keep in mind this is a last resort, as it will be disruptive to anyone using L<git(1)> to mirror the inbox being edited. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-purge(1)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-extindex-format.pod�����������������������������������0000664�0000000�0000000�00000007037�14300314757�0025153�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������% public-inbox developer manual =head1 NAME public-inbox-extindex-format - external index format description =head1 DESCRIPTION The extindex is an index-only evolution of the per-inbox SQLite and Xapian indices used by L<public-inbox-v2-format(5)> and L<public-inbox-v1-format(5)>. It exists to facilitate searches across multiple inboxes as well as to reduce index space when messages are cross-posted to several existing inboxes. It transparently indexes messages across any combination of v1 and v2 inboxes and data about inboxes themselves. =head1 DIRECTORY LAYOUT While inspired by v2, there is no git blob storage nor C<msgmap.sqlite3> DB. Instead, there is an C<ALL.git> (all caps) git repo which treats every indexed v1 inbox or v2 epoch as a git alternate. As with v2 inboxes, it uses C<over.sqlite3> and Xapian "shards" for WWW and IMAP use. Several exclusive new tables are added to deal with L</XREF3 DEDUPLICATION> and metadata. Unlike v1 and v2 inboxes, it is NOT designed to map to a NNTP newsgroup. Thus it lacks C<msgmap.sqlite3> to enforce the unique Message-ID requirement of NNTP. =head2 INDEX OVERVIEW AND DEFINITIONS $SCHEMA_VERSION - DB schema version (for Xapian) $SHARD - Integer starting with 0 based on parallelism foo/ # "foo" is the name of the index - ei.lock # lock file to protect global state - ALL.git # empty, alternates for inboxes - ei$SCHEMA_VERSION/$SHARD # per-shard Xapian DB - ei$SCHEMA_VERSION/over.sqlite3 # overview DB for WWW, IMAP - ei$SCHEMA_VERSION/misc # misc Xapian DB File and directory names are intentionally different from analogous v2 names to ensure extindex and v2 inboxes can easily be distinguished from each other. =head2 XREF3 DEDUPLICATION Due to cross-posted messages being the norm in the large Linux kernel development community and Xapian indices being the primary consumer of storage, it makes sense to deduplicate indexing as much as possible. The internal storage format is based on the NNTP "Xref" tuple, but with the addition of a third element: the git blob OID. Thus the triple is expressed in string form as: $NEWSGROUP_NAME:$ARTICLE_NUM:$OID If no C<newsgroup> is configured for an inbox, the C<inboxdir> of the inbox is used. This data is stored in the C<xref3> table of over.sqlite3. =head2 misc XAPIAN DB In addition to the numeric Xapian shards for indexing messages, there is a new, in-development Xapian index for storing data about inboxes themselves and other non-message data. This index allows us to speed up operations involving hundreds or thousands of inboxes. =head1 BENEFITS In addition to providing cross-inbox search capabilities, it can also replace per-inbox Xapian shards (but not per-inbox over.sqlite3). This allows reduction in disk space, open file handles, and associated memory use. =head1 CAVEATS Relocating v1 and v2 inboxes on the filesystem will require extindex to be garbage-collected and/or reindexed. Configuring and maintaining stable C<newsgroup> names before any messages are indexed from every inbox can avoid expensive reindexing and rely exclusively on GC. =head1 LOCKING L<flock(2)> locking exclusively locks the empty ei.lock file for all non-atomic operations. =head1 THANKS Thanks to the Linux Foundation for sponsoring the development and testing. =head1 COPYRIGHT Copyright 2020-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-v2-format(5)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-extindex.pod������������������������������������������0000664�0000000�0000000�00000006726�14300314757�0023671�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-extindex - create and update external search indices =head1 SYNOPSIS public-inbox-extindex [OPTIONS] EXTINDEX_DIR INBOX_DIR... public-inbox-extindex [OPTIONS] [EXTINDEX_DIR] --all =head1 DESCRIPTION public-inbox-extindex creates and updates an external search and overview database used by the read-only public-inbox PSGI (HTTP), NNTP, and IMAP interfaces. This requires either the L<Search::Xapian> XS bindings OR the L<Xapian> SWIG bindings, along with L<DBD::SQLite> and L<DBI> Perl modules. =head1 OPTIONS =over =item -j JOBS =item --jobs=JOBS =item --no-fsync =item --dangerous =item --rethread =item --max-size SIZE =item --batch-size SIZE These switches behave as they do for L<public-inbox-index(1)> =item --all Index all C<publicinbox> entries in C<PI_CONFIG>. C<publicinbox> entries indexed by C<public-inbox-extindex> can have full Xapian searching abilities with the per-C<publicinbox> C<indexlevel> set to C<basic> and their respective Xapian (C<xap15> or C<xapian15>) directories removed. For multiple public-inboxes where cross-posting is common, this allows significant space savings on Xapian indices. =item --gc Perform garbage collection instead of indexing. Use this if inboxes are removed from the extindex, or if messages are purged or removed from some inboxes. =item --reindex Forces a re-index of all messages in the extindex. This can be used for in-place upgrades and bugfixes while read-only server processes are utilizing the index. Keep in mind this roughly doubles the size of the already-large Xapian database. The extindex locks will be released roughly every 10s to allow L<public-inbox-mda(1)> and L<public-inbox-watch(1)> processes to write to the extindex. =item --fast Used with C<--reindex>, it will only look for new and stale entries and not touch already-indexed messages. =back =head1 FILES L<public-inbox-extindex-format(5)> =head1 CONFIGURATION public-inbox-extindex does not currently write to the L<public-inbox-config(5)> file, configuration may be entered manually. The extindex name of C<all> is a special case which corresponds to indexing C<--all> inboxes. An example for C<--all> is as follows: [extindex "all"] topdir = /path/to/extindex_dir url = all coderepo = foo coderepo = bar See L<public-inbox-config(5)> for more details. =head1 ENVIRONMENT =over 8 =item PI_CONFIG Used to override the default "~/.public-inbox/config" value. =item XAPIAN_FLUSH_THRESHOLD The number of documents to update before committing changes to disk. This environment is handled directly by Xapian, refer to Xapian API documentation for more details. Setting C<XAPIAN_FLUSH_THRESHOLD> or C<publicinbox.indexBatchSize> for a large C<--reindex> may cause L<public-inbox-mda(1)>, L<public-inbox-learn(1)> and L<public-inbox-watch(1)> tasks to wait long and unpredictable periods of time during C<--reindex>. Default: none, uses C<publicinbox.indexBatchSize> =back =head1 UPGRADING Occasionally, public-inbox will update it's schema version and require a full index by running this command. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<Search::Xapian>, L<DBD::SQLite> ������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-fetch.pod���������������������������������������������0000664�0000000�0000000�00000004705�14300314757�0023125�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-fetch - "git fetch" wrapper for v2 inbox mirrors =head1 SYNOPSIS public-inbox-fetch [--exit-code] -C INBOX_DIR =head1 DESCRIPTION public-inbox-fetch updates git storage of public-inbox mirrors. With v2 inboxes, it allows detection of new epochs and avoids unnecessary traffic on old epochs. public-inbox-fetch does not use nor require any configuration files of its own. It does not run L<public-inbox-index(1)>, making it suitable for maintaining git-only backups. For v2 inboxes, it will maintain C<$INBOX_DIR/manifest.js.gz> file to speed up future invocations. It always safe to remove manifest.js.gz, it is merely an optimization and will be restored on the next invocation. To prevent fetches on any v2 epoch, use L<chmod(1)> to remove write permissions to the top-level of the epoch. For example, to disable fetches on epoch 4: chmod a-w $INBOX_DIR/git/4.git If you wish to re-enable fetches to the epoch: chmod u+w $INBOX_DIR/git/4.git =head1 OPTIONS =over =item -q =item --quiet Quiets down progress messages, also passed to L<git-fetch(1)>. =item -T REMOTE =item --try-remote REMOTE Try a given remote name instead of C<origin> or C<_grokmirror>. May be specified more than once. Default: C<origin>, C<_grokmirror> =item --exit-code Exit with C<127> if no updates are done. This can be used in shell scripts to avoid invoking L<public-inbox-index(1)> when there are no updates: public-inbox-fetch -q --exit-code && public-inbox-index test $? -eq 0 || exit $? =item -v =item --verbose Increases verbosity, also passed to L<git-fetch(1)>. =item --torsocks=auto|no|yes =item --no-torsocks Whether to wrap L<git(1)> and L<curl(1)> commands with L<torsocks(1)>. Default: C<auto> =back =head1 EXIT CODES =over =item 127 no updates when L</--exit-code> is used above =back public-inbox-fetch will also exit with curl L<curl(1)/EXIT CODES> as documented in the L<curl(1)> manpage (e.g. C<7> when curl cannot reach a host). Likewise, L<git-fetch(1)> failures are also propagated to the user. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-index(1)>, L<curl(1)> �����������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-glossary.pod������������������������������������������0000664�0000000�0000000�00000010251�14300314757�0023670�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-glossary - glossary for public-inbox =head1 DESCRIPTION public-inbox combines several independently-developed protocols and data formats with overlapping concepts. This document is intended as a guide to identify and clarify overlapping concepts with different names. This is mainly intended for hackers of public-inbox, but may be useful for administrators of public-facing services and/or users building tools. =head1 TERMS =over 8 =item IMAP UID, NNTP article number, on-disk Xapian docid A sequentially-assigned positive integer. These integers are per-inbox, or per-extindex. This is the C<num> column of the C<over> table in C<over.sqlite3> =item tid, THREADID A sequentially-assigned positive integer. These integers are per-inbox or per-extindex. In the future, this may be prefixed with C<T> for JMAP (RFC 8621) and RFC 8474. This may not be strictly compliant with RFC 8621 since inboxes and extindices are considered independent entities from each other. This is the C<tid> column of the C<over> table in C<over.sqlite3> =item blob For email, this is the git blob object ID (SHA-(1|256)) of an RFC-(822|2822|5322) email message. =item IMAP EMAILID, JMAP Email Id To-be-decided. This will likely be the git blob ID prefixed with C<g> rather than the numeric UID to accommodate the same blob showing up in both an extindex and inbox (or multiple extindices). =item newsgroup The name of the NNTP newsgroup, see L<public-inbox-config(5)>. =item IMAP (folder|mailbox) slice A 50K slice of a newsgroup to accommodate the limitations of IMAP clients with L<public-inbox-imapd(1)>. This is the C<newsgroup> name with a C<.$INTEGER_SUFFIX>, e.g. a newsgroup named C<inbox.test> would have its first slice named C<inbox.test.0>, and second slice named C<inbox.test.1> and so forth. If implemented, the RFC 8474 MAILBOXID of an IMAP slice will NOT have the same Mailbox Id as the public-facing full JMAP mailbox. =item inbox name, public JMAP mailbox name The HTTP(S) name of the public-inbox (C<publicinbox.E<lt>nameE<gt>.*>). JMAP will use this name rather than the newsgroup name since public-facing JMAP will be part of the PSGI code and not need a separate daemon like L<public-inbox-nntpd(1)> or L<public-inbox-imapd(1)> =item epoch A git repository used for blob storage. See L<public-inbox-v2-format(5)/GIT EPOCHS>. =item keywords, (IMAP|Maildir) flags, mbox Status + X-Status Private, per-message keywords or flags as described in RFC 8621 section 10.4. These are conveyed in the C<Status:> and C<X-Status:> headers for L<mbox(5)>, as system IMAP FLAGS (RFC 3501 section 2.3.2), or Maildir info flags. L<public-inbox-watch(1)> ignores drafts and trashed (deleted) messages. L<lei-import(1)> ignores trashed (deleted) messages, but it imports drafts. =item labels, private JMAP mailboxes For L<lei(1)> users only. This will allow lei users to place the same email into one or more virtual folders for ease-of-filtering. This is NOT tied to public-inbox names, as messages stored by lei may not be public. These are similar in spirit to arbitrary freeform "tags" in mail software such as L<notmuch(1)> and non-system IMAP FLAGS. =item volatile metadata (VMD) For L<lei(1)> users only, this refers to the combination of keywords and labels which are subject to frequent change independently of immutable message content. =item IMAP INTERNALDATE, JMAP receivedAt, rt: search prefix The first valid timestamp value of Received: headers (top first). If no Received: header exists, the Date: header is used, and the current time if neither header(s) exist. When mirroring via git, this is the git commit time. =item IMAP SENT*, JMAP sentAt, dt: and d: search prefixes The first valid timestamp value of the Date: header(s). If no Date: header exists, the time from the Received: header is used, and then the current time if neither header exists. When mirroring via git, this is the git author time. =back =head1 COPYRIGHT Copyright 2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-v2-format(5)>, L<public-inbox-v1-format(5)>, L<public-inbox-extindex-format(5)>, L<gitglossary(7)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-httpd.pod���������������������������������������������0000664�0000000�0000000�00000002350�14300314757�0023151�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-httpd - PSGI server optimized for public-inbox =head1 SYNOPSIS public-inbox-httpd [OPTIONS] [/path/to/myapp.psgi] =head1 DESCRIPTION public-inbox-httpd is a PSGI/Plack server supporting HTTP/1.1 and HTTP/1.0. It uses options and environment variables common to all L<public-inbox-daemon(8)> implementations in addition to the PSGI file. If a PSGI file is not specified, L<PublicInbox::WWW> is loaded with a default middleware stack consisting of L<Plack::Middleware::ReverseProxy>, and L<Plack::Middleware::Head> This may point to a PSGI file for supporting generic PSGI apps. =head1 ENVIRONMENT =over 8 =item GIT_HTTP_MAX_REQUEST_BUFFER Shared with L<git-http-backend(1)>, this governs the maximum upload size of an HTTP request. Default: 10m =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>, L<Plack> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-imapd.pod���������������������������������������������0000664�0000000�0000000�00000005546�14300314757�0023132�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-imapd - IMAP server for sharing public-inboxes =head1 SYNOPSIS public-inbox-imapd [OPTIONS] =head1 DESCRIPTION public-inbox-imapd provides a read-only IMAP daemon for public-inbox. It uses options and environment variables common to all L<public-inbox-daemon(8)> implementations. Like L<public-inbox-nntpd(1)> and L<public-inbox-httpd(1)>, C<public-inbox-imapd> will never require write access to the directory where the public-inboxes are stored, so it may be run as a different user than the user running L<public-inbox-watch(1)>, L<public-inbox-mda(1)>, or L<git-fetch(1)>. =head1 OPTIONS See common options in L<public-inbox-daemon(8)/OPTIONS>. Additionally, IMAP-specific behavior for certain options are supported and documented below. =over =item -l PROTOCOL://ADDRESS/?cert=/path/to/cert,key=/path/to/key =item --listen PROTOCOL://ADDRESS/?cert=/path/to/cert,key=/path/to/key In addition to the normal C<-l>/C<--listen> switch described in L<public-inbox-daemon(8)>, the C<PROTOCOL> prefix (e.g. C<imap://> or C<imaps://>) may be specified to force a given protocol. For STARTTLS and IMAPS support, the C<cert> and C<key> may be specified on a per-listener basis after a C<?> character and separated by C<,>. These directives are per-directive, and it's possible to use a different cert for every listener. =item --cert /path/to/cert The default TLS certificate for optional STARTTLS and IMAPS support if the C<cert> option is not given with C<--listen>. If using systemd-compatible socket activation and a TCP listener on port 993 is inherited, it is automatically IMAPS when this option is given. When a listener on port 143 is inherited and this option is given, it automatically gets STARTTLS support. =item --key /path/to/key The default private TLS certificate key for optional STARTTLS and IMAPS support if the C<key> option is not given with C<--listen>. The private key may be concatenated into the path used by C<--cert>, in which case this option is not needed. =back =head1 CONFIGURATION C<public-inbox-imapd> uses the same configuration knobs as L<public-inbox-nntpd(1)>, see L<public-inbox-nntpd(1)> and L<public-inbox-config(5)>. =over 8 =item publicinbox.<name>.newsgroup The newsgroup name maps to an IMAP folder name. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/>, and L<nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta>, L<nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta> =head1 COPYRIGHT Copyright 2020-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>, L<public-inbox-config(5)>, L<public-inbox-nntpd(1)> ����������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-index.pod���������������������������������������������0000664�0000000�0000000�00000023577�14300314757�0023153�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-index - create and update search indices =head1 SYNOPSIS public-inbox-index [OPTIONS] INBOX_DIR... public-inbox-index [OPTIONS] --all =head1 DESCRIPTION public-inbox-index creates and updates the search, overview and NNTP article number database used by the read-only public-inbox HTTP and NNTP interfaces. Currently, this requires L<DBD::SQLite> and L<DBI> Perl modules. L<Search::Xapian> is optional, only to support the PSGI search interface. Once the initial indices are created by public-inbox-index, L<public-inbox-mda(1)> and L<public-inbox-watch(1)> will automatically maintain them. Running this manually to update indices is only required if relying on L<git-fetch(1)> to mirror an existing public-inbox; or if upgrading to a new version of public-inbox using the C<--reindex> option. Having the overview and article number database is essential to running the NNTP interface, and strongly recommended for the HTTP interface as it provides thread grouping in addition to normal search functionality. =head1 OPTIONS =over =item -j JOBS =item --jobs=JOBS Influences the number of Xapian indexing shards in a (L<public-inbox-v2-format(5)>) inbox. See L<public-inbox-init(1)/--jobs> for a full description of sharding. C<--jobs=0> is accepted as of public-inbox 1.6.0 to disable parallel indexing regardless of the number of pre-existing shards. If the inbox has not been indexed or initialized, C<JOBS - 1> shards will be created (one job is always needed for indexing the overview and article number mapping). Default: the number of existing Xapian shards =item -c =item --compact Compacts the Xapian DBs after indexing. This is recommended when using C<--reindex> to avoid running out of disk space while indexing multiple inboxes. While option takes a negligible amount of time compared to C<--reindex>, it requires temporarily duplicating the entire contents of the Xapian DB. This switch may be specified twice, in which case compaction happens both before and after indexing to minimize the temporal footprint of the (re)indexing operation. Available since public-inbox 1.4.0. =item --reindex Forces a re-index of all messages in the inbox. This can be used for in-place upgrades and bugfixes while NNTP/HTTP server processes are utilizing the index. Keep in mind this roughly doubles the size of the already-large Xapian database. Using this with C<--compact> or running L<public-inbox-compact(1)> afterwards is recommended to release free space. public-inbox protects writes to various indices with L<flock(2)>, so it is safe to reindex (and rethread) while L<public-inbox-watch(1)>, L<public-inbox-mda(1)> or L<public-inbox-learn(1)> run. This does not touch the NNTP article number database. It does not affect threading unless C<--rethread> is used. =item --all Index all inboxes configured in ~/.public-inbox/config. This is an alternative to specifying individual inboxes directories on the command-line. =item --rethread Regenerate internal THREADID and message thread associations when reindexing. This fixes some bugs in older versions of public-inbox. While it is possible to use this without C<--reindex>, it makes little sense to do so. Available in public-inbox 1.6.0+. =item --prune Run L<git-gc(1)> to prune and expire reflogs if discontiguous history is detected. This is intended to be used in mirrors after running L<public-inbox-edit(1)> or L<public-inbox-purge(1)> to ensure data is expunged from mirrors. Available since public-inbox 1.2.0. =item --max-size SIZE Sets or overrides L</publicinbox.indexMaxSize> on a per-invocation basis. See L</publicinbox.indexMaxSize> below. Available since public-inbox 1.5.0. =item --batch-size SIZE Sets or overrides L</publicinbox.indexBatchSize> on a per-invocation basis. See L</publicinbox.indexBatchSize> below. When using rotational storage but abundant RAM, using a large value (e.g. C<500m>) with C<--sequential-shard> can significantly speed up and reduce fragmentation during the initial index and full C<--reindex> invocations (but not incremental updates). Available in public-inbox 1.6.0+. =item --no-fsync Disables L<fsync(2)> and L<fdatasync(2)> operations on SQLite and Xapian. This is only effective with Xapian 1.4+. This is primarily intended for systems with low RAM and the small (default) C<--batch-size=1m>. Users of large C<--batch-size> may even find disabling L<fdatasync(2)> causes too much dirty data to accumulate, resulting on latency spikes from writeback. Available in public-inbox 1.6.0+. =item --dangerous Speed up initial index by using in-place updates and denying support for concurrent readers. This is only effective with Xapian 1.4+. Available in public-inbox 1.8.0+ =item --sequential-shard Sets or overrides L</publicinbox.indexSequentialShard> on a per-invocation basis. See L</publicinbox.indexSequentialShard> below. Available in public-inbox 1.6.0+. =item --skip-docdata Stop storing document data in Xapian on an existing inbox. See L<public-inbox-init(1)/--skip-docdata> for description and caveats. Available in public-inbox 1.6.0+. =item -E EXTINDEX =item --update-extindex=EXTINDEX Update the given external index (L<public-inbox-extindex-format(5)>. Either the configured section name (e.g. C<all>) or a directory name may be specified. Defaults to C<all> if C<[extindex "all"]> is configured, otherwise no external indices are updated. May be specified multiple times in rare cases where multiple external indices are configured. =item --no-update-extindex Do not update the C<all> external index by default. This negates all uses of C<-E> / C<--update-extindex=> on the command-line. =item --since=DATESTRING =item --after=DATESTRING =item --until=DATESTRING =item --before=DATESTRING Passed directly to L<git-log(1)> to limit changes for C<--reindex> =back =head1 FILES For v1 (ssoma) repositories described in L<public-inbox-v1-format(5)>. All public-inbox-specific files are contained within the C<$GIT_DIR/public-inbox/> directory. v2 inboxes are described in L<public-inbox-v2-format(5)>. =head1 CONFIGURATION =over 8 =item publicinbox.indexMaxSize Prevents indexing of messages larger than the specified size value. A single suffix modifier of C<k>, C<m> or C<g> is supported, thus the value of C<1m> to prevents indexing of messages larger than one megabyte. This is useful for avoiding memory exhaustion in mirrors via git. It does not prevent L<public-inbox-mda(1)> or L<public-inbox-watch(1)> from importing (and indexing) a message. This option is only available in public-inbox 1.5 or later. Default: none =item publicinbox.indexBatchSize Flushes changes to the filesystem and releases locks after indexing the given number of bytes. The default value of C<1m> (one megabyte) is low to minimize memory use and reduce contention with parallel invocations of L<public-inbox-mda(1)>, L<public-inbox-learn(1)>, and L<public-inbox-watch(1)>. Increase this value on powerful systems to improve throughput at the expense of memory use. The reduction of lock granularity may not be noticeable on fast systems. With SSDs, values above C<4m> have little benefit. For L<public-inbox-v2-format(5)> inboxes, this value is multiplied by the number of Xapian shards. Thus a typical v2 inbox with 3 shards will flush every 3 megabytes by default unless parallelism is disabled via C<--sequential-shard> or C<--jobs=0>. This influences memory usage of Xapian, but it is not exact. The actual memory used by Xapian and Perl has been observed in excess of 10x this value. This option is available in public-inbox 1.6 or later. public-inbox 1.5 and earlier used the current default, C<1m>. Default: 1m (one megabyte) =item publicinbox.indexSequentialShard For L<public-inbox-v2-format(5)> inboxes, setting this to C<true> allows indexing Xapian shards in multiple passes. This speeds up indexing on rotational storage with high seek latency by allowing individual shards to fit into the kernel page cache. Using a higher-than-normal number of C<--jobs> with L<public-inbox-init(1)> may be required to ensure individual shards are small enough to fit into cache. Warning: interrupting C<public-inbox-index(1)> while this option is in use may leave the search indices out-of-date with respect to SQLite databases. WWW and IMAP users may notice incomplete search results, but it is otherwise non-fatal. Using C<--reindex> will bring everything back up-to-date. Available in public-inbox 1.6.0+. This is ignored on L<public-inbox-v1-format(5)> inboxes. Default: false, shards are indexed in parallel =item publicinbox.<name>.indexSequentialShard Identical to L</publicinbox.indexSequentialShard>, but only affect the inbox matching E<lt>nameE<gt>. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG Used to override the default "~/.public-inbox/config" value. =item XAPIAN_FLUSH_THRESHOLD The number of documents to update before committing changes to disk. This environment is handled directly by Xapian, refer to Xapian API documentation for more details. For public-inbox 1.6 and later, use C<publicinbox.indexBatchSize> instead. Setting C<XAPIAN_FLUSH_THRESHOLD> or C<publicinbox.indexBatchSize> for a large C<--reindex> may cause L<public-inbox-mda(1)>, L<public-inbox-learn(1)> and L<public-inbox-watch(1)> tasks to wait long and unpredictable periods of time during C<--reindex>. Default: none, uses C<publicinbox.indexBatchSize> =back =head1 UPGRADING Occasionally, public-inbox will update it's schema version and require a full index by running this command. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<Search::Xapian>, L<DBD::SQLite>, L<public-inbox-extindex-format(5)> ���������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-init.pod����������������������������������������������0000664�0000000�0000000�00000010675�14300314757�0023002�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-init - create or reinitialize a public-inbox =head1 SYNOPSIS B<public-inbox-init> [OPTIONS] NAME INBOX_DIR HTTP_URL ADDRESS [ADDRESS..] =head1 DESCRIPTION Creates an empty public-inbox or reinitializes an existing one. It updates C<~/.public-inbox/config> by creating a C<[publicinbox "NAME"]> section where C<publicinbox.NAME.inboxdir> is C<INBOX_DIR>, C<publicinbox.NAME.url> is C<HTTP_URL>, and C<publicinbox.NAME.address> is C<ADDRESS>. Multiple addresses may be specified for inboxes with multiple addresses. =head1 OPTIONS =over =item -V FORMAT_VERSION =item --version FORMAT_VERSION Specify C<2> here to use the scalable L<public-inbox-v2-format(5)> if you have L<DBD::SQLite> installed. The default is C<1> for the old L<public-inbox-v1-format(5)>, but C<2> is strongly recommended for scalability if you have L<DBD::SQLite>. Default: C<1> =item -L <basic|medium|full> =item --indexlevel <basic|medium|full> Controls the indexing level for L<public-inbox-index(1)> See L<public-inbox-config(5)> for more information. Default: C<full> =item --ng NEWSGROUP =item --newsgroup NEWSGROUP The NNTP group name for use with L<public-inbox-nntpd(8)>. This may be any newsgroup name with hierarchies delimited by C<.>. For example, the newsgroup for L<mailto:meta@public-inbox.org> is: C<inbox.comp.mail.public-inbox.meta> This may be set after-the-fact via C<publicinbox.$NAME.newsgroup> in the configuration file. See L<public-inbox-config(5)> for more info. Available in public-inbox 1.6.0+. Default: none. =item -c KEY=VALUE Allow setting arbitrary configs as C<publicinbox.$NAME.$KEY>. This is idempotent for the same C<VALUE>, but allows setting multiple values for keys such as C<publicinbox.$NAME.url> and C<publicinbox.$NAME.watch>. =item --skip-artnum This option allows archivists to publish incomplete archives with only new mail while allowing NNTP article numbers to be reserved for yet-to-be-archived old mail. This is mainly intended for users of C<--skip-epoch> (documented below) but may be of use to L<public-inbox-v1-format(5)> users. There is no automatic way to use reserved NNTP article numbers when old mail is found, yet. Available in public-inbox 1.6.0+. Default: unset, no NNTP article numbers are skipped =item -S =item --skip-epoch For C<-V2> (L<public-inbox-v2-format(5)>) inboxes only, this option allows archivists to publish incomplete archives with newer mail while allowing "0.git" (or "1.git" and so on) epochs to be added-after-the-fact (without affecting "git clone" followers). Available since public-inbox 1.2.0. Default: unset, no epochs are skipped =item -j JOBS =item --jobs=JOBS Control the number of Xapian index shards in a C<-V2> (L<public-inbox-v2-format(5)>) inbox. It can be useful to use a single shard (C<-j1>) for inboxes on high-latency storage (e.g. rotational HDD) unless the system has enough RAM to cache 5-10x the size of the git repository. Another approach for HDDs is to use the L<public-inbox-index(1)/publicInbox.indexSequentialShard> option and many shards, so each shard may fit into the kernel page cache. Unfortunately, excessive shards slows down read-only query performance. For fast storage, it is generally not useful to specify higher values than the default due to the top-level producer process being a bottleneck. Default: the number of online CPUs, up to 4 (3 shard workers, 1 producer) =item --skip-docdata Do not store document data in Xapian, reducing Xapian storage overhead by around 1.5%. Warning: this option prevents rollbacks to public-inbox 1.5.0 and earlier. Available in public-inbox 1.6.0+. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG Used to override the default C<~/.public-inbox/config> value. =back =head1 LIMITATIONS Some of the options documented in L<public-inbox-config(5)> require editing the config file. Old versions lack the C<--ng>/C<--newsgroup> parameter See L<public-inbox-config(5)> for all the options which may be applied to a given inbox. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git-init(1)>, L<git-config(1)>, L<public-inbox-v1-format(5)>, L<public-inbox-v2-format(5)> �������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-learn.pod���������������������������������������������0000664�0000000�0000000�00000004600�14300314757�0023127�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-learn - spam trainer and remover for public-inbox =head1 SYNOPSIS public-inbox-learn <spam|ham|rm> </path/to/RFC2822_message =head1 DESCRIPTION public-inbox-learn can remove spam or inject ham messages into an inbox while training a SpamAssassin instance. It is intended for users of L<public-inbox-mda(1)> or L<public-inbox-watch(1)>, but not users relying on L<git-fetch(1)> to mirror inboxes. It reads one message from standard input and operates on it depending on the command given: =head1 COMMANDS public-inbox-learn takes one of the following commands as its first and only argument: =over 8 =item spam Treat the message as spam. This will mark the message as removed so it becomes inaccessible via NNTP or WWW endpoints for all configured inboxes. The message remains accessible in git history. It will also be fed to L<spamc(1)> for training purposes unless C<publicinboxmda.spamcheck> is C<none> in L<public-inbox-config(5)>. =item ham Treat standard input as ham. This is useful for manually injecting messages into the archives which failed the spam check run by L<public-inbox-mda(1)> or L<public-inbox-watch(1)>. It relies on the C<To:>, C<Cc:>, and C<List-ID:> headers to match configured inbox addresses and C<listid> directives. It will also be fed to L<spamc(1)> for training purposes unless C<publicinboxmda.spamcheck> is C<none> in L<public-inbox-config(5)>. =item rm This is similar to the C<spam> command above, but does not feed the message to L<spamc(1)> and only removes messages which match on any of the C<To:>, C<Cc:>, and C<List-ID:> headers. The C<--all> option may be used match C<spam> semantics in removing the message from all configured inboxes. C<--all> is only available in public-inbox 1.6.0+. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG Per-user config file parseable by L<git-config(1)>. See L<public-inbox-config(5)>. Default: ~/.public-inbox/config =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<spamc(1)>, L<public-inbox-mda(1)>, L<public-inbox-watch(1)> ��������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-mda.pod�����������������������������������������������0000664�0000000�0000000�00000004064�14300314757�0022573�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-mda - mail delivery agent for public-inbox =head1 SYNOPSIS public-inbox-mda </path/to/RFC2822_message =head1 DESCRIPTION Mail Delivery Agent (MDA) for public-inbox installations. Each system user may have their own public-inbox instances. This may be invoked via L<procmail(1)> or similar tools. By default, it relies on L<spamc(1)> for filtering mail, but may be disabled via L<public-inbox-config(5)/publicinboxmda.spamcheck> =head1 OPTIONS =over 8 =item --no-precheck By default, public-inbox-mda does some simple checks before invoking L<spamc(1)> since it is intended to receive mail before it goes to a mailing list. However, some users prefer to use public-inbox-mda to mirror mailing lists. This option exists to support those users. Using this option, the following prechecks are disabled: * multiple Message-IDs * non-existent Message-IDs * Message-IDs longer than 244 characters long * From: header shorter than 3 characters * Subject: header shorter than 2 characters * unusable Date: headers * inbox address specified in To: or Cc: header =back =head1 ENVIRONMENT =over 8 =item ORIGINAL_RECIPIENT The original recipient email address, set by the MTA. Postfix sets it by default, untested on other MTAs. This does not have to be set if relying on C<publicinbox.$NAME.listid> directives configured in L<public-inbox-config(5)>. =item PI_CONFIG Per-user config file parseable by L<git-config(1)>. See L<public-inbox-config(5)>. Default: ~/.public-inbox/config =item PI_EMERGENCY emergency Maildir destination. Default: ~/.public-inbox/emergency/ =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-v1-format(5)> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-netd.pod����������������������������������������������0000664�0000000�0000000�00000005052�14300314757�0022762�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-netd - read-only network daemon for sharing public-inboxes =head1 SYNOPSIS public-inbox-netd [OPTIONS] =head1 DESCRIPTION public-inbox-netd provides a read-only multi-protocol (HTTP/IMAP/NNTP/POP3) daemon for public-inbox. It uses options and environment variables common to all L<public-inbox-daemon(8)> implementations. The default configuration will never require write access to the directory where the public-inbox is stored, so it may be run as a different user than the user running L<public-inbox-watch(1)>, L<public-inbox-mda(1)>, or L<git-fetch(1)>. =head1 OPTIONS See common options in L<public-inbox-daemon(8)/OPTIONS>. =over =item -l PROTOCOL://ADDRESS/?cert=/path/to/cert,key=/path/to/key =item --listen PROTOCOL://ADDRESS/?cert=/path/to/cert,key=/path/to/key =item -l http://ADDRESS/?env.PI_CONFIG=/path/to/cfg,psgi=/path/to/app.psgi In addition to the normal C<-l>/C<--listen> switch described in L<public-inbox-daemon(8)>, the protocol prefix (e.g. C<nntp://> or C<nntps://>) may be specified to force a given protocol. Environment variable overrides in effect during loading and reloading (SIGHUP) can be specified as C<env.NAME=VALUE> for all protocols. HTTP(S) listeners may also specify C<psgi=> to use a different C<.psgi> file for each listener. C<err=/path/to/errors.log> may be used to isolate error/debug output for a particular listener away from C<--stderr>. Non-HTTP(S) listeners may also specify C<out=> for logging to C<stdout>. HTTP(S) users are encouraged to configure L<Plack::Middleware::AccessLog> or L<Plack::Middleware::AccessLog::Timed>, instead. =item --cert /path/to/cert See L<public-inbox-daemon(1)>. =item --key /path/to/key See L<public-inbox-daemon(1)>. =back =head1 CONFIGURATION These configuration knobs should be used in the L<public-inbox-config(5)>. =over 8 =item publicinbox.<name>.newsgroup =item publicinbox.nntpserver =item publicinbox.pop3state =back See L<public-inbox-config(5)> for documentation on them. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/>, and L<nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta>, L<nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>, L<public-inbox-config(5)> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-nntpd.pod���������������������������������������������0000664�0000000�0000000�00000005354�14300314757�0023160�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-nntpd - NNTP server for sharing public-inbox =head1 SYNOPSIS public-inbox-nntpd [OPTIONS] =head1 DESCRIPTION public-inbox-nntpd provides a read-only NNTP daemon for public-inbox. It uses options and environment variables common to all L<public-inbox-daemon(8)> implementations. The default configuration will never require write access to the directory where the public-inbox is stored, so it may be run as a different user than the user running L<public-inbox-watch(1)>, L<public-inbox-mda(1)>, or L<git-fetch(1)>. =head1 OPTIONS See common options in L<public-inbox-daemon(8)/OPTIONS>. Additionally, NNTP-specific behavior for certain options are supported and documented below. =over =item -l PROTOCOL://ADDRESS/?cert=/path/to/cert,key=/path/to/key =item --listen PROTOCOL://ADDRESS/?cert=/path/to/cert,key=/path/to/key In addition to the normal C<-l>/C<--listen> switch described in L<public-inbox-daemon(8)>, the protocol prefix (e.g. C<nntp://> or C<nntps://>) may be specified to force a given protocol. For STARTTLS and NNTPS support, the C<cert> and C<key> may be specified on a per-listener basis after a C<?> character and separated by C<,>. These directives are per-directive, and it's possible to use a different cert for every listener. =item --cert /path/to/cert The default TLS certificate for optional STARTTLS and NNTPS support if the C<cert> option is not given with C<--listen>. If using systemd-compatible socket activation and a TCP listener on port 563 is inherited, it is automatically NNTPS when this option is given. When a listener on port 119 is inherited and this option is given, it automatically gets STARTTLS support. =item --key /path/to/key The default private TLS certificate key for optional STARTTLS and NNTPS support if the C<key> option is not given with C<--listen>. The private key may be concatenated into the path used by C<--cert>, in which case this option is not needed. =back =head1 CONFIGURATION These configuration knobs should be used in the L<public-inbox-config(5)> =over 8 =item publicinbox.<name>.newsgroup =item publicinbox.nntpserver =back See L<public-inbox-config(5)> for documentation on them. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/>, and L<nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta>, L<nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta> =head1 COPYRIGHT Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>, L<public-inbox-config(5)> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-overview.pod������������������������������������������0000664�0000000�0000000�00000007512�14300314757�0023701�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-overview - an overview of public-inbox =head1 DESCRIPTION public-inbox consists of many pieces which may be used independently or in conjunction of each other for: =over 4 =item 1 Mirroring existing public-inboxes. =item 2 Mirroring mailing lists. =item 3 Hosting standalone inboxes. =back =head2 Mirroring existing public-inboxes Mirroring existing public-inboxes is the easiest way to get started. Your mirror will remain dependent on the REMOTE_URL you are mirroring and you only need to use two new commands in addition to common L<git(1)> commands. Instructions are different depending on whether the inbox is L<public-inbox-v1-format(5)> or L<public-inbox-v2-format(5)>. See the "Archives are clonable:" part of the WWW interface of a given inbox for cloning instructions specific to that inbox. The instructions are roughly: # for v1 inboxes: git clone --mirror URL INBOX_DIR # for v2 inboxes (each epoch needs to be cloned): git clone --mirror URL/EPOCH INBOX_DIR/git/EPOCH.git # The following should create the necessary entry in # ~/.public-inbox/config, use "-V2" only for v2 inboxes: public-inbox-init [-V2] NAME INBOX_DIR MY_URL LIST_ADDRESS # Optional but strongly recommended for hosting HTTP # (and required for NNTP) # enable overview (requires DBD::SQLite) and, if Search::Xapian is # available, search: public-inbox-index INBOX_DIR # Periodically fetch the repo using git-fetch(1) # for v1 inboxes: git --git-dir=INBOX_DIR fetch # for v2 (in most cases, only the newest epoch needs to be fetched): git --git-dir=INBOX_DIR/git/EPOCH.git fetch # index new messages after fetching: public-inbox-index INBOX_DIR See L</"Serving public-inboxes"> below for info on how to expose your mirror to other readers. =head2 Mirroring mailing lists Mirroring mailing lists may be done by any reader of a mailing list using L<public-inbox-watch(1)>. # This will create a new v2 inbox: public-inbox-init -V2 NAME INBOX_DIR MY_URL LIST_ADDRESS Then, see the L<public-inbox-watch(1)> manual for configuring C<watch>, C<watchheader>, C<listid> and the optional C<spamcheck> and C<watchspam> entries. You will need to leave L<public-inbox-watch(1)> running to keep the mailbox up-to-date as messages are delivered to the mailing list. Running L<public-inbox-index(1)> to create search indices is recommended. L<public-inbox-watch(1)> will automatically maintain the indices if they were created by L<public-inbox-index(1)> public-inbox-index INBOX_DIR Instead of using L<public-inbox-watch(1)>, using L<public-inbox-mda(1)> with the C<--no-precheck> option and relying on the C<listid> directive in L<public-inbox-config(5)> is also an option. =head2 Hosting standalone inboxes Using L<public-inbox-init(1)> to initialize the inbox as in the other methods is recommended. See L<public-inbox-mda(1)> for more details; but this also requires MTA-specific knowledge. =head2 Serving public-inboxes Since public-inboxes are git repositories, they may be served to remote clients via L<git-daemon(1)> as well as specialized HTTP and NNTP daemons distributed with public-inbox. See L<public-inbox-httpd(1)> and L<public-inbox-nntpd(1)> for more information on using these daemons. Hosting a public-inbox over HTTP or NNTP will never require write access to any files in the inbox directory. Users familiar with PSGI and L<Plack> may also use L<PublicInbox::WWW> with the preferred server instead of L<public-inbox-httpd(1)> =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2016-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-pop3d.pod���������������������������������������������0000664�0000000�0000000�00000007346�14300314757�0023065�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-pop3d - POP3 server for sharing public-inboxes =head1 SYNOPSIS public-inbox-pop3d [OPTIONS] =head1 DESCRIPTION public-inbox-pop3d provides a POP3 daemon for public-inbox. It uses options and environment variables common to all read-only L<public-inbox-daemon(8)> implementations, but requires additional read-write storage to keep track of deleted messages on a per-user basis. Like L<public-inbox-imapd(1)>, C<public-inbox-pop3d> will never require write access to the directory where the public-inboxes are stored. It is designed for anonymous access, thus the password is always C<anonymous> (all lower-case). Usernames are of the format: C<$UUID@$NEWSGROUP_NAME> Where C<$UUID> is the output of the L<uuidgen(1)> command. Dash (C<->) characters in UUIDs are ignored, and C<[A-F]> hex characters are case-insensitive. Users should keep their UUIDs private to prevent others from deleting unretrieved messages. Users may switch to a new UUID at any time to retrieve previously-retrieved messages. Historical slices of 50K messages are available by suffixing the integer L<$SLICE>, where C<0> is the oldest. C<$UUID@$NEWSGROUP_NAME.$SLICE> It may be run as a different user than the user running L<public-inbox-watch(1)>, L<public-inbox-mda(1)>, or L<public-inbox-fetch(1)>. To save storage, L</publicinbox.pop3state> only stores the highest-numbered deleted message =head1 OPTIONS See common options in L<public-inbox-daemon(8)/OPTIONS>. =over =item -l PROTOCOL://ADDRESS/?cert=/path/to/cert,key=/path/to/key =item --listen PROTOCOL://ADDRESS/?cert=/path/to/cert,key=/path/to/key In addition to the normal C<-l>/C<--listen> switch described in L<public-inbox-daemon(8)>, the C<PROTOCOL> prefix (e.g. C<pop3://> or C<pop3s://>) may be specified to force a given protocol. For STARTTLS and POP3S support, the C<cert> and C<key> may be specified on a per-listener basis after a C<?> character and separated by C<,>. These directives are per-directive, and it's possible to use a different cert for every listener. =item --cert /path/to/cert The default TLS certificate for optional STARTTLS and POP3S support if the C<cert> option is not given with C<--listen>. If using systemd-compatible socket activation and a TCP listener on port 995 is inherited, it is automatically POP3S when this option is given. When a listener on port 110 is inherited and this option is given, it automatically gets STARTTLS support. =item --key /path/to/key The default private TLS certificate key for optional STARTTLS and POP3S support if the C<key> option is not given with C<--listen>. The private key may be concatenated into the path used by C<--cert>, in which case this option is not needed. =back =head1 CONFIGURATION Aside from C<publicinbox.pop3state>, C<public-inbox-pop3d> uses the same configuration knobs as L<public-inbox-nntpd(1)>, see L<public-inbox-nntpd(1)> and L<public-inbox-config(5)>. =over 8 =item publicInbox.pop3state A directory containing per-user/mailbox account information; must be writable to the C<public-inbox-pop3d> process. =item publicInbox.<name>.newsgroup The newsgroup name maps to a POP3 folder name. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/>, and L<nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta>, L<nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta> =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>, L<public-inbox-config(5)>, L<public-inbox-nntpd(1)>, L<uuidgen(1)> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-purge.pod���������������������������������������������0000664�0000000�0000000�00000004156�14300314757�0023156�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-purge - erase messages from a public inbox and its history =head1 SYNOPSIS # requires ~/.public-inbox/config public-inbox-purge --all </path/to/message-to-purge # for testing with unconfigured inboxes: public-inbox-purge $INBOX_DIR </path/to/message-to-purge =head1 DESCRIPTION public-inbox-purge allows removing entire messages in a given inbox from history. It is only intended as a last resort, as it will cause discontiguous git history and draw more attention to the sensitive data in mirrors. For removing spam, L<public-inbox-learn(1)> is preferable as it preserves contiguous git history. For editing sensitive information out of messages, consider L<public-inbox-edit(1)>, instead, but keep in mind it still leads to discontiguous git history. =head1 OPTIONS =over =item --all Purge the message in all inboxes configured in ~/.public-inbox/config. This is an alternative to specifying individual inboxes directories on the command-line. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =back =head1 LIMITATIONS Only L<public-inbox-v2-format(5)> inboxes are supported. This is safe to run while normal inbox writing tools (L<public-inbox-mda(1)>, L<public-inbox-watch(1)>, L<public-inbox-learn(1)>) are active. Running this in parallel with L<public-inbox-xcpdb(1)> or C<"public-inbox-index --reindex"> can lead to errors or purged data remaining indexed. Incremental L<public-inbox-index(1)> (without C<--reindex>) is fine. Keep in mind this is a last resort, as it will be disruptive to anyone using L<git(1)> to mirror the inbox being purged. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-edit(1)>, L<public-inbox-learn(1)> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-tuning.pod��������������������������������������������0000664�0000000�0000000�00000014173�14300314757�0023340�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-tuning - tuning public-inbox =head1 DESCRIPTION public-inbox intends to support a wide variety of hardware. While we strive to provide the best out-of-the-box performance possible, tuning knobs are an unfortunate necessity in some cases. =over 4 =item 1 New inboxes: public-inbox-init -V2 =item 2 Optional Inline::C use =item 3 Performance on rotational hard disk drives =item 4 Btrfs (and possibly other copy-on-write filesystems) =item 5 Performance on solid state drives =item 6 Read-only daemons =item 7 Other OS tuning knobs =item 8 Scalability to many inboxes =back =head2 New inboxes: public-inbox-init -V2 If you're starting a new inbox (and not mirroring an existing one), the L<-V2|public-inbox-v2-format(5)> requires L<DBD::SQLite>, but is orders of magnitude more scalable than the original C<-V1> format. =head2 Optional Inline::C use Our optional use of L<Inline::C> speeds up subprocess spawning from large daemon processes. To enable L<Inline::C>, either set the C<PERL_INLINE_DIRECTORY> environment variable to point to a writable directory, or create C<~/.cache/public-inbox/inline-c> for any user(s) running public-inbox processes. If libgit2 development files are installed and L<Inline::C> is enabled (described above), per-inbox C<git cat-file --batch> processes are replaced with a single L<perl(1)> process running C<PublicInbox::Gcf2::loop> in read-only daemons. libgit2 use will be available in public-inbox 1.7.0+ More (optional) L<Inline::C> use will be introduced in the future to lower memory use and improve scalability. Note: L<Inline::C> is required for L<lei(1)>, but not public-inbox-* =head2 Performance on rotational hard disk drives Random I/O performance is poor on rotational HDDs. Xapian indexing performance degrades significantly as DBs grow larger than available RAM. Attempts to parallelize random I/O on HDDs leads to pathological slowdowns as inboxes grow. While C<-V2> introduced Xapian shards as a parallelization mechanism for SSDs; enabling C<publicInbox.indexSequentialShard> repurposes sharding as mechanism to reduce the kernel page cache footprint when indexing on HDDs. Initializing a mirror with a high C<--jobs> count to create more shards (in C<-V2> inboxes) will keep each shard smaller and reduce its kernel page cache footprint. Keep in mind excessive sharding imposes a performance penalty for read-only queries. Users with large amounts of RAM are advised to set a large value for C<publicinbox.indexBatchSize> as documented in L<public-inbox-index(1)>. C<dm-crypt> users on Linux 4.0+ are advised to try the C<--perf-same_cpu_crypt> C<--perf-submit_from_crypt_cpus> switches of L<cryptsetup(8)> to reduce I/O contention from kernel workqueue threads. =head2 Btrfs (and possibly other copy-on-write filesystems) L<btrfs(5)> performance degrades from fragmentation when using large databases and random writes. The Xapian + SQLite indices used by public-inbox are no exception to that. public-inbox 1.6.0+ disables copy-on-write (CoW) on Xapian and SQLite indices on btrfs to achieve acceptable performance (even on SSD). Disabling copy-on-write also disables checksumming, thus C<raid1> (or higher) configurations may be corrupt after unsafe shutdowns. Fortunately, these SQLite and Xapian indices are designed to recoverable from git if missing. Disabling CoW does not prevent all fragmentation. Large values of C<publicInbox.indexBatchSize> also limit fragmentation during the initial index. Avoid snapshotting subvolumes containing Xapian and/or SQLite indices. Snapshots use CoW despite our efforts to disable it, resulting in fragmentation. L<filefrag(8)> can be used to monitor fragmentation, and C<btrfs filesystem defragment -fr $INBOX_DIR> may be necessary. Large filesystems benefit significantly from the C<space_cache=v2> mount option documented in L<btrfs(5)>. Older, non-CoW filesystems are generally work well out-of-the-box for our Xapian and SQLite indices. =head2 Performance on solid state drives While SSD read performance is generally good, SSD write performance degrades as the drive ages and/or gets full. Issuing C<TRIM> commands via L<fstrim(8)> or similar is required to sustain write performance. Users of the Flash-Friendly File System L<F2FS|https://en.wikipedia.org/wiki/F2FS> may benefit from optimizations found in SQLite 3.21.0+. Benchmarks are greatly appreciated. =head2 Read-only daemons L<public-inbox-httpd(1)>, L<public-inbox-imapd(1)>, and L<public-inbox-nntpd(1)> are all designed for C10K (or higher) levels of concurrency from a single process. SMP systems may use C<--worker-processes=NUM> as documented in L<public-inbox-daemon(8)> for parallelism. The open file descriptor limit (C<RLIMIT_NOFILE>, C<ulimit -n> in L<sh(1)>, C<LimitNOFILE=> in L<systemd.exec(5)>) may need to be raised to accommodate many concurrent clients. Transport Layer Security (IMAPS, NNTPS, or via STARTTLS) significantly increases memory use of client sockets, sure to account for that in capacity planning. =head2 Other OS tuning knobs Linux users: the C<sys.vm.max_map_count> sysctl may need to be increased if handling thousands of inboxes (with L<public-inbox-extindex(1)>) to avoid out-of-memory errors from git. Other OSes may have similar tuning knobs (patches appreciated). =head2 Scalability to many inboxes L<public-inbox-extindex(1)> allows any number of public-inboxes to share the same Xapian indices. git 2.33+ startup time is orders-of-magnitude faster and uses less memory when dealing with thousands of alternates required for thousands of inboxes with L<public-inbox-extindex(1)>. Frequent packing (via L<git-gc(1)>) both improves performance and reduces the need to increase C<sys.vm.max_map_count>. =head1 CONTACT Feedback encouraged via plain-text mail to L<mailto:meta@public-inbox.org> Information for *BSDs and non-traditional filesystems especially welcome. Our archives are hosted at L<https://public-inbox.org/meta/>, L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/>, and other places =head1 COPYRIGHT Copyright all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-v1-format.pod�����������������������������������������0000664�0000000�0000000�00000013521�14300314757�0023644�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������% public-inbox developer manual =head1 NAME public-inbox-v1-format - git repository and tree description (aka "ssoma") =head1 DESCRIPTION WARNING: this does NOT describe the scalable v2 format used by public-inbox. Use of ssoma is not recommended for new installations due to scalability problems. ssoma uses a git repository to store each email as a git blob. The tree filename of the blob is based on the SHA1 hexdigest of the first Message-ID header. A commit is made for each message delivered. The commit SHA-1 identifier is used by ssoma clients to track synchronization state. =head1 PATHNAMES IN TREES A Message-ID may be extremely long and also contain slashes, so using them as a path name is challenging. Instead we use the SHA-1 hexdigest of the Message-ID (excluding the leading "E<lt>" and trailing "E<gt>") to generate a path name. Leading and trailing white space in the Message-ID header is ignored for hashing. A message with Message-ID of: E<lt>20131106023245.GA20224@dcvr.yhbt.netE<gt> Would be stored as: f2/8c6cfd2b0a65f994c3e1be266105413b3d3f63 Thus it is easy to look up the contents of a message matching a given a Message-ID. =head1 MESSAGE-ID CONFLICTS public-inbox v1 repositories currently do not resolve conflicting Message-IDs or messages with multiple Message-IDs. =head1 HEADERS The Message-ID header is required. "Bytes", "Lines" and "Content-Length" headers are stripped and not allowed, they can interfere with further processing. When using ssoma with public-inbox-mda, the "Status" mbox header is also stripped as that header makes no sense in a public archive. =head1 LOCKING L<flock(2)> locking exclusively locks the empty $GIT_DIR/ssoma.lock file for all non-atomic operations. =head1 EXAMPLE INPUT FLOW (SERVER-SIDE MDA) 1. Message is delivered to a mail transport agent (MTA) 1a. (optional) reject/discard spam, this should run before ssoma-mda 1b. (optional) reject/strip unwanted attachments ssoma-mda handles all steps once invoked. 2. Mail transport agent invokes ssoma-mda 3. reads message via stdin, extracting Message-ID 4. acquires exclusive flock lock on $GIT_DIR/ssoma.lock 5. creates or updates the blob of associated 2/38 SHA-1 path 6. updates the index and commits 7. releases $GIT_DIR/ssoma.lock ssoma-mda can also be used as an L<inotify(7)> trigger to monitor maildirs, and the ability to monitor IMAP mailboxes using IDLE will be available in the future. =head1 GIT REPOSITORIES (SERVERS) ssoma uses bare git repositories on both servers and clients. Using the L<git-init(1)> command with --bare is the recommend method of creating a git repository on a server: git init --bare /path/to/wherever/you/want.git There are no standardized paths for servers, administrators make all the choices regarding git repository locations. Special files in $GIT_DIR on the server: =over =item $GIT_DIR/ssoma.lock An empty file for L<flock(2)> locking. This is necessary to ensure the index and commits are updated consistently and multiple processes running MDA do not step on each other. =item $GIT_DIR/public-inbox/msgmap.sqlite3 SQLite3 database maintaining a stable mapping of Message-IDs to NNTP article numbers. Used by L<public-inbox-nntpd(1)> and created and updated by L<public-inbox-index(1)>. Users of the L<PublicInbox::WWW> interface will find it useful for attempting recovery from copy-paste truncations of URLs containing long Message-IDs. Automatically updated by L<public-inbox-mda(1)>, L<public-inbox-learn(1)> and L<public-inbox-watch(1)>. Losing or damaging this file will cause synchronization problems for NNTP clients. This file is expected to be stable and require no updates to its schema. Requires L<DBD::SQLite>. =item $GIT_DIR/public-inbox/xapian$N/ Xapian database for search indices in the PSGI web UI. $N is the value of PublicInbox::Search::SCHEMA_VERSION, and installations may have parallel versions on disk during upgrades or to roll-back upgrades. This is created and updated by L<public-inbox-index(1)>. Automatically updated by L<public-inbox-mda(1)>, L<public-inbox-learn(1)> and L<public-inbox-watch(1)>. This directory can always be regenerated with L<public-inbox-index(1)>. If lost or damaged, there is no need to back it up unless the CPU/memory cost of regenerating it outweighs the storage/transfer cost. Since SCHEMA_VERSION 15 and the development of the v2 format, the "overview" DB also exists in the xapian directory for v1 repositories. See L<public-inbox-v2-format(5)/OVERVIEW DB> Our use of the L</OVERVIEW DB> requires Xapian document IDs to remain stable. Using L<public-inbox-compact(1)> and L<public-inbox-xcpdb(1)> wrappers are recommended over tools provided by Xapian. This directory is large, often two to three times the size of the objects stored in a packed git repository. =item $GIT_DIR/ssoma.index This file is no longer used or created by public-inbox, but it is updated if it exists to remain compatible with ssoma installations. A git index file used for MDA updates. The normal git index (in $GIT_DIR/index) is not used at all as there is typically no working tree. =back Each client $GIT_DIR may have multiple mbox/maildir/command targets. It is possible for a client to extract the mail stored in the git repository to multiple mboxes for compatibility with a variety of different tools. =head1 CAVEATS It is NOT recommended to check out the working directory of a git. there may be many files. It is impossible to completely expunge messages, even spam, as git retains full history. Projects may (with adequate notice) cycle to new repositories/branches with history cleaned up via L<git-filter-repo(1)> or L<git-filter-branch(1)>. This is up to the administrators. =head1 COPYRIGHT Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<gitrepository-layout(5)>, L<ssoma(1)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-v2-format.pod�����������������������������������������0000664�0000000�0000000�00000021277�14300314757�0023654�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������% public-inbox developer manual =head1 NAME public-inbox-v2-format - structure of public inbox v2 archives =head1 DESCRIPTION The v2 format is designed primarily to address several scalability problems of the original format described at L<public-inbox-v1-format(5)>. It also handles messages with Message-IDs. =head1 INBOX LAYOUT The key change in v2 is the inbox is no longer a bare git repository, but a directory with two or more git repositories. v2 divides git repositories by time "epochs" and Xapian databases for parallelism by "shards". =head2 INBOX OVERVIEW AND DEFINITIONS $EPOCH - Integer starting with 0 based on time $SCHEMA_VERSION - DB schema version (for Xapian) $SHARD - Integer starting with 0 based on parallelism foo/ # "foo" is the name of the inbox - inbox.lock # lock file to protect global state - git/$EPOCH.git # normal git repositories - all.git # empty, alternates to $EPOCH.git - xap$SCHEMA_VERSION/$SHARD # per-shard Xapian DB - xap$SCHEMA_VERSION/over.sqlite3 # OVER-view DB for NNTP, threading - msgmap.sqlite3 # same the v1 msgmap For blob lookups, the reader only needs to open the "all.git" repository with $GIT_DIR/objects/info/alternates which references every $EPOCH.git repo. Individual $EPOCH.git repos DO NOT use alternates themselves as git currently limits recursion of alternates nesting depth to 5. =head2 GIT EPOCHS One of the inherent scalability problems with git itself is the full history of a project must be stored and carried around to all clients. To address this problem, the v2 format uses multiple git repositories, stored as time-based "epochs". We currently divide epochs into roughly one gigabyte segments; but this size can be configurable (if needed) in the future. A pleasant side-effect of this design is the git packs of older epochs are stable, allowing them to be cloned without requiring expensive pack generation. This also allows clients to clone only the epochs they are interested in to save bandwidth and storage. To minimize changes to existing v1-based code and simplify our code, we use the "alternates" mechanism described in L<gitrepository-layout(5)> to link all the epoch repositories with a single read-only "all.git" endpoint. Processes retrieve blobs via the "all.git" repository, while writers write blobs directly to epochs. =head2 GIT TREE LAYOUT One key problem specific to v1 was large trees were frequently a performance problem as name lookups are expensive and there were limited deltafication opportunities with unpredictable file names. As a result, all Xapian-enabled installations retrieve blob object_ids directly in v1, bypassing tree lookups. While dividing git repositories into epochs caps the growth of trees, worst-case tree size was still unnecessary overhead and worth eliminating. So in contrast to the big trees of v1, the v2 git tree contains only a single file at the top-level of the tree, either 'm' (for 'mail' or 'message') or 'd' (for deleted). A tree does not have 'm' and 'd' at the same time. Mail is still stored in blobs (instead of inline with the commit object) as we still need a stable reference in the indices in case commit history is rewritten to comply with legal requirements. After-the-fact invocations of L<public-inbox-index> will ignore messages written to 'd' after they are written to 'm'. Deltafication is not significantly improved over v1, but overall storage for trees is made as as small as possible. Initial statistics and benchmarks showing the benefits of this approach are documented at: L<https://public-inbox.org/meta/20180209205140.GA11047@dcvr/> =head2 XAPIAN SHARDS Another second scalability problem in v1 was the inability to utilize multiple CPU cores for Xapian indexing. This is addressed by using shards in Xapian to perform import indexing in parallel. As with git alternates, Xapian natively supports a read-only interface which transparently abstracts away the knowledge of multiple shards. This allows us to simplify our read-only code paths. The performance of the storage device is now the bottleneck on larger multi-core systems. In our experience, performance is improved with high-quality and high-quantity solid-state storage. Issuing TRIM commands with L<fstrim(8)> was necessary to maintain consistent performance while developing this feature. Rotational storage devices perform significantly worse than solid state storage for indexing of large mail archives; but are fine for backup and usable for small instances. As of public-inbox 1.6.0, the C<publicInbox.indexSequentialShard> option of L<public-inbox-index(1)> may be used with a high shard count to ensure individual shards fit into page cache when the entire Xapian DB cannot. Our use of the L</OVERVIEW DB> requires Xapian document IDs to remain stable. Using L<public-inbox-compact(1)> and L<public-inbox-xcpdb(1)> wrappers are recommended over tools provided by Xapian. =head2 OVERVIEW DB Towards the end of v2 development, it became apparent Xapian did not perform well for sorting large result sets used to generate the landing page in the PSGI UI (/$INBOX/) or many queries used by the NNTP server. Thus, SQLite was employed and the Xapian "skeleton" DB was renamed to the "overview" DB (after the NNTP OVER/XOVER commands). The overview DB maintains all the header information necessary to implement the NNTP OVER/XOVER commands and non-search endpoints of the PSGI UI. Xapian has become completely optional for v2 (as it is for v1), but SQLite remains required for v2. SQLite turns out to be powerful enough to maintain overview information. Most of the PSGI and all of the NNTP functionality is possible with only SQLite in addition to git. The overview DB was an instrumental piece in maintaining near constant-time read performance on a dataset 2-3 times larger than LKML history as of 2018. =head3 GHOST MESSAGES The overview DB also includes references to "ghost" messages, or messages which have replies but have not been seen by us. Thus it is expected to have more rows than the "msgmap" DB described below. =head2 msgmap.sqlite3 The SQLite msgmap DB is unchanged from v1, but it is now at the top-level of the directory. =head1 OBJECT IDENTIFIERS There are three distinct type of identifiers. content_hash is the new one for v2 and should make message removal and deduplication easier. object_id and Message-ID are already known. =over =item object_id The blob identifier git uses (currently SHA-1). No need to publicly expose this outside of normal git ops (cloning) and there's no need to make this searchable. As with v1 of public-inbox, this is stored as part of the Xapian document so expensive name lookups can be avoided for document retrieval. =item Message-ID The email header; duplicates allowed for archival purposes. This remains a searchable field in Xapian. Note: it's possible for emails to have multiple Message-ID headers (and L<git-send-email(1)> had that bug for a bit); so we take all of them into account. In case of conflicts detected by content_hash below, we generate a new Message-ID based on content_hash; if the generated Message-ID still conflicts, a random one is generated. =item content_hash A hash of relevant headers and raw body content for purging of unwanted content. This is not stored anywhere, but always calculated on-the-fly. For now, the relevant headers are: Subject, From, Date, References, In-Reply-To, To, Cc Received, List-Id, and similar headers are NOT part of content_hash as they differ across lists and we will want removal to be able to cross lists. The textual parts of the body are decoded, CRLF normalized to LF, and trailing whitespace stripped. Notably, hashing the raw body risks being broken by list signatures; but we can use filters (e.g. PublicInbox::Filter::Vger) to clean the body for imports. content_hash is SHA-256 for now; but can be changed at any time without making DB changes. =back =head1 LOCKING L<flock(2)> locking exclusively locks the empty inbox.lock file for all non-atomic operations. =head1 HEADERS Same handling as with v1, except the Message-ID header will be generated if not provided or conflicting. "Bytes", "Lines" and "Content-Length" headers are stripped and not allowed, they can interfere with further processing. The "Status" mbox header is also stripped as that header makes no sense in a public archive. =head1 THANKS Thanks to the Linux Foundation for sponsoring the development and testing of the v2 format. =head1 COPYRIGHT Copyright 2018-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<gitrepository-layout(5)>, L<public-inbox-v1-format(5)> ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-watch.pod���������������������������������������������0000664�0000000�0000000�00000013037�14300314757�0023140�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-watch - mailbox watcher for public-inbox =head1 SYNOPSIS public-inbox-watch In ~/.public-inbox/config: [publicinbox "test"] ; generic public-inbox-config keys: address = test@example.com url = http://example.com/test inboxdir = /path/to/test.example.com.git ; config keys specific to public-inbox-watch: watch = maildir:/path/to/maildirs/.INBOX.test/ ; optional, emails that don't have a header matching ; value will be skipped watchheader = List-Id:<test.example.com> [publicinboxwatch] ; optional, enable use of spamc(1) for checking: spamcheck = spamc ; optional, emails marked as read which appear ; here will be trained as spam and deleted from ; the inboxdirs of any public-inboxes which are ; configured for watch. ; This is global for all publicinbox.* sections watchspam = maildir:/path/to/maildirs/.INBOX.spam =head1 DESCRIPTION public-inbox-watch allows watching a mailbox or newsgroup for the arrival of new messages and automatically importing them into public-inbox git repositories and indices. public-inbox-watch is useful in situations when a user wishes to mirror an existing mailing list, but has no access to run L<public-inbox-mda(1)> on a server. Unlike public-inbox-mda which is invoked once per-message, public-inbox-watch is a persistent process, making it faster for after-the-fact imports of large Maildirs. Upon startup, it scans the mailbox for new messages to be imported while it was not running. As of public-inbox 1.6.0, Maildirs, IMAP folders, and NNTP newsgroups are supported. Previous versions of public-inbox only supported Maildirs. public-inbox-watch should be run inside a L<screen(1)> session or as a L<systemd(1)> service. Errors are emitted to stderr. =head1 OPTIONS public-inbox-watch takes no command-line options. =head1 CONFIGURATION These configuration knobs should be used in the L<public-inbox-config(5)> file =over 8 =item publicinbox.<name>.watch A location to watch. public-inbox 1.5.0 and earlier only supported C<maildir:> paths: [publicinbox "test"] watch = maildir:/path/to/maildirs/.INBOX.test/ public-inbox 1.6.0 supports C<nntp://>, C<nntps://>, C<imap://> and C<imaps://> URLs: watch = nntp://news.example.com/inbox.test.group watch = imaps://user@mail.example.com/INBOX.test This may be specified multiple times to combine several mailboxes into a single public-inbox. URLs requiring authentication will require L<netrc(5)> and/or L<git-credential(1)> (preferred) to fill in the username and password. Default: none =item publicinbox.<name>.watchheader [publicinbox "test"] watchheader = List-Id:<test.example.com> If specified, L<public-inbox-watch(1)> will only process mail matching the given header. If specified multiple times in public-inbox 1.5 or later, mail will be processed if it matches any of the values. Only the last value was used in public-inbox 1.4 and earlier. Default: none =item publicinboxwatch.spamcheck This may be set to C<spamc> to enable the use of SpamAssassin L<spamc(1)> for filtering spam before it is imported into git history. Other spam filtering backends may be supported in the future. Default: none =item publicinboxwatch.watchspam A Maildir to watch for confirmed spam messages to appear in. Messages which appear in this folder with the (S)een flag will be hidden from all configured inboxes based on Message-ID and content matching. Messages without the (S)een flag are not considered for hiding. This hiding affects all configured public-inboxes in PI_CONFIG. As with C<publicinbox.$NAME.watch>, C<imap://> and C<imaps://> URLs are supported in public-inbox 1.6.0+. Default: none; only for L<public-inbox-watch(1)> users =item imap.Starttls / imap.$URL.Starttls Whether or not to use C<STARTTLS> on plain C<imap://> connections. May be specified for certain URLs via L<git-config(1)/--get-urlmatch> in C<git(1)> 1.8.5+. Default: C<true> =item imap.Compress / imap.$URL.Compress Whether or not to use the IMAP COMPRESS (RFC4978) extension to save bandwidth. This is not supported by all IMAP servers and some advertising this feature may not implement it correctly. May be specified only for certain URLs if L<git(1)> 1.8.5+ is installed to use L<git-config(1)/--get-urlmatch> Default: C<false> =item nntp.Starttls / nntp.$URL.Starttls Whether or not to use C<STARTTLS> on plain C<nntp://> connections. May be specified for certain URLs via L<git-config(1)/--get-urlmatch> in C<git(1)> 1.8.5+. Default: C<false> if the hostname is a Tor C<.onion>, C<true> otherwise =back =head1 SIGNALS =over 8 =item SIGHUP Reload the config file (default: ~/.public-inbox/config) =item SIGUSR1 Rescan all watched mailboxes. This is done automatically after startup. =item SIGQUIT / SIGTERM / SIGINT Gracefully shut down. In-flight messages will be stored and indexed. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG config file. default: ~/.public-inbox/config See L<public-inbox-config(5)> =item PERL_INLINE_DIRECTORY This may affect any public-inbox processes, but is intended for long-lived ones such as C<public-inbox-watch> or network daemons. See L<public-inbox-daemon(8)>. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2016-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-config(5)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox-xcpdb.pod���������������������������������������������0000664�0000000�0000000�00000007531�14300314757�0023134�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-xcpdb - upgrade Xapian DB formats =head1 SYNOPSIS public-inbox-xcpdb [OPTIONS] INBOX_DIR public-inbox-xcpdb [OPTIONS] --all =head1 DESCRIPTION public-inbox-xcpdb is similar to L<copydatabase(1)> for upgrading to the latest database format supported by Xapian (e.g. "glass" or "honey"), but is designed to tolerate and accept parallel Xapian database modifications from L<public-inbox-watch(1)>, L<public-inbox-mda(1)>, L<public-inbox-learn(1)>, and L<public-inbox-index(1)>. This command is rarely used, as Xapian DB formats rarely change. =head1 OPTIONS =over =item --all Copy all inboxes configured in ~/.public-inbox/config. This is an alternative to specifying individual inboxes directories on the command-line. =item -c =item --compact In addition to performing the copy operation, run L<xapian-compact(1)> on each Xapian shard after copying but before finalizing it. Compared to the cost of copying a Xapian database, compacting a Xapian database takes only around 5% of the time required to copy. Compared to L<public-inbox-compact(1)>, use of this option is preferable for gigantic inboxes where the coarse-grained lock currently required for L<public-inbox-compact(1)> can cause the compaction to take hours at-a-time. =item -R N =item --reshard=N Reshard the Xapian database on a L<v2|public-inbox-v2-format(5)> inbox to C<N> shards . Since L<xapian-compact(1)> is not suitable for merging, users can rely on this switch to reshard the existing Xapian database(s) to any positive value of C<N>. This is useful in case the Xapian DB was created with too few or too many shards given the capabilities of the current hardware. =item --blocksize =item --no-full =item --fuller These options are passed directly to L<xapian-compact(1)> when used with C<--compact>. =item --no-fsync Disable L<fsync(2)> and L<fdatasync(2)>. See L<public-inbox-index(1)/--no-fsync> for caveats. Available in public-inbox 1.6.0+. =item --sequential-shard Copy each shard sequentially, ignoring C<--jobs>. This also affects indexing done at the end of a run. =item --batch-size=BYTES =item --max-size=BYTES See L<public-inbox-index(1)> for a description of these options. These indexing options indexing at the end of a run. C<public-inbox-xcpdb> may run in parallel with with L<public-inbox-index(1)>, and C<public-inbox-xcpdb> needs to reindex changes made to the old Xapian DBs by L<public-inbox-index(1)> while it was running. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =item XAPIAN_FLUSH_THRESHOLD The number of documents to update before committing changes to disk. This environment is handled directly by Xapian, refer to Xapian API documentation for more details. Default: 10000 =back =head1 UPGRADING This tool is intended for admins upgrading Xapian search databases used by public-inbox, NOT users upgrading public-inbox itself. In particular, it DOES NOT upgrade the schema used by the PSGI search interface (see L<public-inbox-index(1)>). =head1 LIMITATIONS Do not use L<public-inbox-purge(1)> or L<public-inbox-edit(1)> while this is running; old (purged or edited data) may show up. Normal invocations L<public-inbox-index(1)> can safely run while this is running, too. However, reindexing via the L<public-inbox-index(1)/--reindex> switch will be a waste of computing resources. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<copydatabase(1)>, L<xapian-compact(1)>, L<public-inbox-index(1)> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/public-inbox.cgi.pod�����������������������������������������������0000664�0000000�0000000�00000001746�14300314757�0022601�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox.cgi - CGI wrapper for PublicInbox::WWW =head1 SYNOPSIS You generally want to run public-inbox-httpd, instead =head1 DESCRIPTION public-inbox.cgi provides a CGI interface wrapper on top of the PSGI/Plack L<PublicInbox::WWW> module. It is only provided for compatibility reasons and NOT recommended. CGI with Perl is slow due to code loading overhead and web servers lack the scheduling fairness of L<public-inbox-httpd(1)> for handling git clones and streaming large mbox downloads. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/> =head1 COPYRIGHT Copyright 2019-2021 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-httpd(1)>, L<PublicInbox::WWW>, L<public-inbox-daemon(8)>, ��������������������������public-inbox-1.9.0/Documentation/reproducibility.txt������������������������������������������������0000664�0000000�0000000�00000002210�14300314757�0022676�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������reproducibility => forkability ------------------------------ The ability to fork a project is a checks and balances system for free software projects. Reproducibility is key to forkability since every mirror is potential fork. git makes the code history of projects fully reproducible. public-inbox uses git to make the email history of projects reproducible. Keeping all communications as email ensures the full history of the entire project can be mirrored by anyone with the resources to do so. Compact, low-complexity data requires less resources to mirror, so sticking with plain-text ensures more parties can mirror and potentially fork the project with all its data. Any private or irreproducible data is a barrier to forking. These include mailing list subscriber information and non-federated user identities. The "pull" subscriber model of NNTP and Atom feeds combined with open-to-all posting means there's no need for private data. If these things make power hungry project leaders and admins uncomfortable, good. That was the point. It's how checks and balances ought to work. Comments, corrections, etc welcome: meta@public-inbox.org ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/standards.perl�����������������������������������������������������0000775�0000000�0000000�00000006727�14300314757�0021617�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/perl -w use v5.12; # Copyright all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> print <<EOF; Relevant standards for public-inbox users and hackers ----------------------------------------------------- Non-exhaustive list of standards public-inbox software attempts or intends to implement. This list is intended to be a quick reference for hackers and users. Given the goals of interoperability and accessibility; strict conformance to standards is not always possible, but rather best-effort taking into account real-world cases. In particular, "obsolete" standards remain relevant as long as clients and data exists. IETF RFCs --------- EOF my $rfcs = [ 3977 => 'NNTP', 977 => 'NNTP (old)', 1036 => 'Standard for Interchange of USENET Messages', 5536 => 'Netnews Article Format', 5537 => 'Netnews Architecture and Protocols', 1738 => 'Uniform resource locators', 5092 => 'IMAP URL scheme', 5538 => 'NNTP URI schemes', 6048 => 'NNTP additions to LIST command (TODO)', 8054 => 'NNTP compression', 4642 => 'NNTP TLS', 8143 => 'NNTP TLS', 2980 => 'NNTP extensions (obsolete, but NOT irrelevant)', 4287 => 'Atom syndication', 4685 => 'Atom threading extensions', 2919 => 'List-Id mail header', 5064 => 'Archived-At mail header', 3986 => 'URI escaping', 1521 => 'MIME extensions', 2616 => 'HTTP/1.1 (newer updates should apply, too)', 7230 => 'HTTP/1.1 message syntax and routing', 7231 => 'HTTP/1.1 semantics and content', 822 => 'Internet message format (1982)', 2822 => 'Internet message format (2001)', 5322 => 'Internet message format (2008)', 3501 => 'IMAP4rev1', 2177 => 'IMAP IDLE', 2683 => 'IMAP4 Implementation Recommendations', # 5032 = 'WITHIN search extension for IMAP', 4978 => 'IMAP COMPRESS Extension', # 5182 = 'IMAP Extension for Referencing the Last SEARCH Result', # 5256 => 'IMAP SORT and THREAD extensions', # 5738 => 'IMAP Support for UTF-8', # 8474 => 'IMAP Extension for Object Identifiers', # 8620 => JSON Meta Application Protocol (JMAP) # 8621 => JSON Meta Application Protocol (JMAP) for Mail # ... # examples/unsubscribe.milter and PublicInbox::Unsubscribe 2369 => 'URLs as Meta-Syntax for Core Mail List Commands', 8058 => 'Signaling One-Click Functionality for List Email Headers', 1081 => 'Post Office Protocol – Version 3', 1939 => 'Post Office Protocol – Version 3 (STD 53)', 2449 => 'POP3 extension mechanism', 2595 => 'STARTTLS for IMAP and POP3', 2384 => 'POP URL Scheme', # TODO: flesh this out ]; my @rfc_urls = qw(tools.ietf.org/html/rfc%d www.rfc-editor.org/errata_search.php?rfc=%d); for (my $i = 0; $i < $#$rfcs;) { my $num = $rfcs->[$i++]; my $txt = $rfcs->[$i++]; print "rfc$num\t- $txt\n"; printf "\thttps://$_\n", $num foreach @rfc_urls; print "\n"; } print <<'EOF' Other relevant documentation ---------------------------- * IMAP capabilities registry and response codes: https://www.iana.org/assignments/imap-capabilities https://www.iana.org/assignments/imap-response-codes * Documentation/technical/http-protocol.txt in git source code: https://public-inbox.org/git/9c5b6f0fac/s * Various mbox formats (we currently emit and parse mboxrd) https://en.wikipedia.org/wiki/Mbox * PSGI/Plack specifications (as long as our web frontend uses Perl5) git clone https://github.com/plack/psgi-specs.git Copyright --------- Copyright (C) all contributors <meta@public-inbox.org> License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> EOF �����������������������������������������public-inbox-1.9.0/Documentation/technical/���������������������������������������������������������0000775�0000000�0000000�00000000000�14300314757�0020663�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/technical/data_structures.txt��������������������������������������0000664�0000000�0000000�00000017142�14300314757�0024645�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������Internal data structures of public-inbox This is a guide for hackers new to our code base. Do not consider our internal data structures stable for external consumers, this document should be updated when internals change. I recommend reading this document from the source tree, with the source code easily accessible if you need examples. This mainly documents in-memory data structures. If you're interested in the stable on-filesystem formats, see the public-inbox-config(5), public-inbox-v1-format(5) and public-inbox-v2-format(5) manpages. Common abbreviations when used outside of their packages are documented. `$self' is the common variable name when used within their package. PublicInbox::Config ------------------- PublicInbox::Config is the root class which loads a public-inbox-config file and instantiates PublicInbox::Inbox, PublicInbox::WWW, PublicInbox::NNTPD, and other top-level classes. Outside of tests, this is typically a singleton. Per-message classes ------------------- * PublicInbox::Eml - Email::MIME-like class Common abbreviation: $mime, $eml Used by: PublicInbox::WWW, PublicInbox::SearchIdx An representation of an entire email, multipart or not. An option to use libgmime or libmailutils may be supported in the future for performance and memory use. This can be a memory hog with big messages and giant attachments, so our PublicInbox::WWW interface only keeps one object of this class in memory at-a-time. In other words, this is the "meat" of the message, whereas $smsg (below) is just the "skeleton". Our PublicInbox::V2Writable class may have two objects of this type in memory at-a-time for deduplication. In public-inbox 1.4 and earlier, Email::MIME and its subclass, PublicInbox::MIME were used. Despite still slurping, PublicInbox::Eml is faster and uses less memory due to lazy header parsing and lazy subpart instantiation with shorter object lifetimes. * PublicInbox::Smsg - small message skeleton Used by: PublicInbox::{NNTP,WWW,SearchIdx} Common abbreviation: $smsg Represents headers shown in NNTP overview and PSGI message summaries (thread skeleton). This is loaded from either the overview DB (over.sqlite3) or the Xapian DB (docdata.glass), though the Xapian docdata is won't hold NNTP-only fields (Cc:/To:) There may be hundreds or thousands of these objects in memory at-a-time, so fields are pruned if unneeded. * PublicInbox::SearchThread::Msg - subclass of Smsg Common abbreviation: $cont or $node Used by: PublicInbox::WWW The structure we use for a non-recursive[1] variant of JWZ's algorithm: <https://www.jwz.org/doc/threading.html>. Nowadays, this is a re-blessed $smsg with additional fields. As with $smsg objects, there may be hundreds or thousands of these objects in memory at-a-time. We also do not use a linked-list for storing children as JWZ describes, but instead a Perl hashref for {children} which becomes an arrayref upon sorting. [1] https://rt.cpan.org/Ticket/Display.html?id=116727 Per-inbox classes ----------------- * PublicInbox::Inbox - represents a single public-inbox Common abbreviation: $ibx Used everywhere This represents a "publicinbox" section in the config file, see public-inbox-config(5) for details. * PublicInbox::Git - represents a single git repository Common abbreviation: $git, $ibx->git Used everywhere. Each configured "publicinbox" or "coderepo" has one of these. * PublicInbox::Msgmap - msgmap.sqlite3 read-write interface Common abbreviation: $mm, $ibx->mm Used everywhere if SQLite is available. Each indexed inbox has one of these, see public-inbox-v1-format(5) and public-inbox-v2-format(5) manpages for details. * PublicInbox::Over - over.sqlite3 read-only interface Common abbreviation: $over, $ibx->over Used everywhere if SQLite is available. Each indexed inbox has one of these, see public-inbox-v1-format(5) and public-inbox-v2-format(5) manpages for details. * PublicInbox::Search - Xapian read-only interface Common abbreviation: $srch, $ibx->search Used everywhere if Search::Xapian (or Xapian.pm) is available. Each indexed inbox has one of these, see public-inbox-v1-format(5) and public-inbox-v2-format(5) manpages for details. PublicInbox::WWW ---------------- The main PSGI web interface, uses several other packages to form our web interface. PublicInbox::SolverGit ---------------------- This is instantiated from the $INBOX/$BLOB_OID/s/ WWW endpoint and represents the stages and states for "solving" a blob by searching for and applying patches. See the code and comments in PublicInbox/SolverGit.pm PublicInbox::Qspawn ------------------- This is instantiated from various WWW endpoints and represents the stages and states for running and managing subprocesses in a way which won't exceed configured process limits defined via "publicinboxlimiter.*" directives in public-inbox-config(5). ad-hoc structures shared across packages ---------------------------------------- * $ctx - PublicInbox::WWW app request context This holds the PSGI $env as well as any internal variables used by various modules of PublicInbox::WWW. As with the PSGI $env, there is one per-active WWW request+response cycle. It does not exist for idle HTTP clients. daemon classes -------------- * PublicInbox::NNTP - a NNTP client socket Common abbreviation: $nntp Used by: PublicInbox::DS, public-inbox-nntpd Unlike PublicInbox::HTTP, all of the NNTP client logic for serving to NNTP clients is here, including what would be in $ctx on the HTTP or WWW side. There may be thousands of these since we support thousands of NNTP clients. * PublicInbox::HTTP - a HTTP client socket Common abbreviation: $http Used by: PublicInbox::DS, public-inbox-httpd Unlike PublicInbox::NNTP, this class no knowledge of any of the email or git-specific parts of public-inbox, only PSGI. However, it supports APIs and behaviors (e.g. streaming large responses) which PublicInbox::WWW may take advantage of. There may be thousands of these since we support thousands of HTTP clients. * PublicInbox::Listener - a SOCK_STREAM listen socket (TCP or Unix) Used by: PublicInbox::DS, public-inbox-httpd, public-inbox-nntpd Common abbreviation: @listeners in PublicInbox::Daemon This class calls non-blocking accept(2) or accept4(2) on a listen socket to create new PublicInbox::HTTP and PublicInbox::HTTP instances. * PublicInbox::HTTPD Common abbreviation: $httpd Represents an HTTP daemon which creates PublicInbox::HTTP wrappers around client sockets accepted from PublicInbox::Listener. Since the SERVER_NAME and SERVER_PORT PSGI variables needs to be exposed for HTTP/1.0 requests when Host: headers are missing, this is per-Listener socket. * PublicInbox::HTTPD::Async Common abbreviation: $async Used for implementing an asynchronous "push" interface for slow, expensive responses which may require spawning git-httpd-backend(1), git-apply(1) or other commands. This will also be used for dealing with future asynchronous operations such as HTTP reverse proxying and slow storage retrieval operations. * PublicInbox::NNTPD Common abbreviation: $nntpd Represents an NNTP daemon which creates PublicInbox::NNTP wrappers around client sockets accepted from PublicInbox::Listener. This is currently a singleton, but it is associated with a given PublicInbox::Config which may be instantiated more than once in the future. * PublicInbox::EOFpipe Used throughout to trigger a callback when a pipe(7) is closed. This is frequently used to portably detect process exit without relying on a catch-all waitpid(-1, ...) call. ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/technical/ds.txt���������������������������������������������������0000664�0000000�0000000�00000011540�14300314757�0022033�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������PublicInbox::DS - event loop and async I/O base class Our PublicInbox::DS event loop which powers public-inbox-nntpd and public-inbox-httpd diverges significantly from the unmaintained Danga::Socket package we forked from. In fact, it's probably different from most other event loops out there. Most notably: * There is one and only one callback: ->event_step. Unlike other event loops, there are no separate callbacks for read, write, error or hangup events. In fact, we never care which kevent filter or poll/epoll event flag (e.g. POLLIN/POLLOUT/POLLHUP) triggers a call. The lack of read/write callback distinction is driven by the fact TLS libraries (e.g. OpenSSL via IO::Socket::SSL) may declare SSL_WANT_READ on SSL_write(), and SSL_WANT_READ on SSL_read(). So we end up having to let each user object decide whether it wants to make read or write calls depending on its internal state, completely independent of the event loop. Error and hangup (POLLERR and POLLHUP) callbacks are redundant and only triggered in rare cases. They're redundant because the result of every read and write call in ->event_step must be checked, anyways. At best, callbacks for POLLHUP and POLLERR can save one syscall per socket lifetime and not worth the extra code it imposes. Reducing the user-supplied code down to a single callback allows subclasses to keep their logic self-contained. The combination of this change and one-shot wakeups (see below) for bidirectional data flows make asynchronous code easier to reason about. Other divergences: * ->write buffering uses temporary files whereas Danga::Socket used the heap. The rationale for this is the kernel already provides ample (and configurable) space for socket buffers. Modern kernels also cache FS operations aggressively, so systems with ample RAM are unlikely to notice degradation, while small systems are less likely to suffer unpredictable heap fragmentation, swap and OOM penalties. In the future, we may introduce sendfile and mmap+SSL_write to reduce data copies, and use FALLOC_FL_PUNCH_HOLE on Linux to release space after the buffer is partially cleared. Augmented features: * obj->write(CODEREF) passes the object itself to the CODEREF Being able to enqueue subroutine calls is a powerful feature in Danga::Socket for keeping linear logic in an asynchronous environment. Unfortunately, each subroutine takes several kilobytes of memory. One small change to Danga::Socket is to pass the receiver object (aka "$self") to the CODEREF. $self can store any necessary state it needs for a normal (named) subroutine. This allows us to put the same sub into multiple queues without paying a large memory penalty for each one. This idea is also more easily ported to C or other languages which lack anonymous subroutines (aka "closures"). * ->requeue support. An optimization of the AddTimer(0, ...) idiom for immediately dispatching code at the next event loop iteration. public-inbox uses this for fairly generating large responses iteratively (see PublicInbox::NNTP::long_response or ibx_async_cat for blob retrievals). New features * One-shot wakeups allowed via EPOLLONESHOT or EV_DISPATCH. These flags allow us to simplify code in ->event_step callbacks for bidirectional sockets (NNTP and HTTP). Instead of merely reacting to events, control is handed over at ->event_step in one-shot scenarios. The event_step caller (NNTP || HTTP) then becomes proactive in declaring which (if any) events it's interested in for the next loop iteration. * Edge-triggering available via EPOLLET or EV_CLEAR. These reduce wakeups for unidirectional classes when throughput is more important than fairness. * IO::Socket::SSL support (for NNTPS, STARTTLS+NNTP, HTTPS) * dwaitpid (waitpid wrapper) support for reaping dead children * reliable signal wakeups are supported via signalfd on Linux, EVFILT_SIGNAL on *BSDs via IO::KQueue. Removed features * Many fields removed or moved to subclasses, so the underlying hash is smaller and suitable for FDs other than stream sockets. Some fields we enforce (e.g. wbuf, wbuf_off) are autovivified on an as-needed basis to save memory when they're not needed. * TCP_CORK support removed, instead we use MSG_MORE on non-TLS sockets and we may use vectored I/O support via GnuTLS in the future for TLS sockets. * per-FD PLCMap (post-loop callback) removed, we got ->requeue support where no extra hash lookups or assignments are necessary. * read push backs removed. Some subclasses use a read buffer ({rbuf}) but they control it, not this event loop. * Profiling and debug logging removed. Perl and OS-specific tracers and profilers are sufficient. * ->AddOtherFds support removed, everything watched is a subclass of PublicInbox::DS, but we've slimmed down the fields to eliminate the memory penalty for objects. ����������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/technical/memory.txt�����������������������������������������������0000664�0000000�0000000�00000003622�14300314757�0022737�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������semi-automatic memory management in public-inbox ------------------------------------------------ The majority of public-inbox is implemented in Perl 5, a language and interpreter not particularly known for being memory-efficient. We strive to keep processes small to improve locality, allow the kernel to cache more files, and to be a good neighbor to other processes running on the machine. Taking advantage of automatic reference counting (ARC) in Perl allows us deterministically release memory back to the heap. We start with a simple data model with few circular references. This both eases human understanding and reduces the likelihood of bugs. Knowing the relative sizes and quantities of our data structures, we limit the scope of allocations as much as possible and keep large allocations shortest-lived. This minimizes both the cognitive overhead on humans in addition to reducing memory pressure on the machine. Short-lived non-immortal closures (aka "anonymous subs") are avoided in long-running daemons unless required for compatibility with PSGI. Closures are memory-intensive and may make allocation lifetimes less obvious to humans. They are also the source of memory leaks in older versions of Perl, including 5.16.3 found in enterprise distros. We also use Perl's `delete' and `undef' built-ins to drop reference counts sooner than scope allows. These functions are required to break the few reference cycles we have that would otherwise lead to leaks. Of note, `undef' may be used in two ways: 1. to free(3) the underlying buffer: undef $scalar; 2. to reset a buffer but reduce realloc(3) on subsequent growth: $scalar = ""; # useful when repeated appending $scalar = undef; # usually not needed In the future, our internal data model will be further flattened and simplified to reduce the overhead imposed by small objects. Large allocations may also be avoided by optionally using Inline::C. ��������������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/technical/whyperl.txt����������������������������������������������0000664�0000000�0000000�00000014630�14300314757�0023122�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������why public-inbox is currently implemented in Perl 5 --------------------------------------------------- While Perl has many detractors and there's a lot not to like about Perl, we use it anyways because it offers benefits not (yet) available from other languages. This document is somewhat inspired by https://sqlite.org/whyc.html Other languages and runtimes may eventually be a possibility for us, and this document can serve as our requirements list for possible replacements. As always, comments and corrections and additions welcome at <meta@public-inbox.org>. We're not Perl experts, either. Good Things ----------- * Availability Perl 5 is installed on many, if not most GNU/Linux and BSD-based servers and workstations. It is likely the most widely-installed programming environment that offers a significant amount of POSIX functionality. Users won't have to waste bandwidth or space with giant toolchains or architecture-specific binaries. Furthermore, Perl documentation is typically installed locally as manpages, allowing users to quickly refer to documentation as needed. * Scripted, always editable by the end user Users cannot lose access to the source code. Code written entirely in any scripting language automatically satisfies the GPL-2.0, making it easier to satisfy the AGPL-3.0. Use of a scripting language improves auditability for malicious changes. It also reduces storage and bandwidth requirements for distributors, as the same scripts can be shared across multiple OSes and architectures. Perl's availability and the low barrier to entry of scripting ensures it's easy for users to exercise their software freedom. * Predictable performance While Perl is neither fast or memory-efficient, its performance and memory use are predictable and does not require GC tuning by the user. public-inbox is developed for (and mostly on) old hardware. Perl was fast enough to power the web of the late 1990s, and any cheap VPS today has more than enough RAM and CPU for handling plain-text email. Low hardware requirements increases the reach of our software to more users, improving centralization resistance. * Compatibility Unlike similarly powerful scripting languages, there is no forced migration to a major new version. From 2000-2020, Perl had fewer breaking changes than Python or Ruby; we expect that trend to continue given the inertia of Perl 5. As of April 2021, the Perl Steering Committee has confirmed Perl 7 will require `use v7.0' and existing code should continue working unchanged: https://nntp.perl.org/group/perl.perl5.porters/259789 <CAMvkq_SyTKZD=1=mHXwyzVYYDQb8Go0N0TuE5ZATYe_M4BCm-g@mail.gmail.com> * Built for text processing Our focus is plain-text mail, and Perl has many built-ins optimized for text processing. It also has good support for UTF-8 and legacy encodings found in old mail archives. * Integration with distros and non-Perl libraries Perl modules and bindings to common libraries such as SQLite and Xapian are already distributed by many GNU/Linux distros and BSD ports. There should be no need to rely on language-specific package managers such as cpan(1), those systems increase the learning curve for users and systems administrators. * Compactness and terseness Less code generally means less bugs. We try to avoid the "line noise" stereotype of some Perl codebases, yet still manage to write less code than one would with non-scripting languages. * Performance ceiling and escape hatch With optional Inline::C, we can be "as fast as C" in some cases. Inline::C is widely-packaged by distros and it gives us an escape hatch for dealing with missing bindings or performance problems should they arise. Inline::C use (as opposed to XS) also preserves the software freedom and auditability benefits to all users. Unfortunately, most C toolchains are big; so Inline::C will always be optional for users who cannot afford the bandwidth or space. Bad Things ---------- * Slow startup time. Tokenization, parsing, and compilation of pure Perl is not cached. Inline::C does cache its results, however. We work around slow startup times in tests by preloading code, similar to how mod_perl works for CGI. * High space overhead and poor locality of small data structures, including the optree. This may not be fixable in Perl itself given compatibility requirements of the C API. These problems are exacerbated on modern 64-bit platforms, though the Linux x32 ABI offers promise. * Lack of vectored I/O support (writev, sendmmsg, etc. syscalls) and "newer" POSIX functions in general. APIs end up being slurpy, favoring large buffers and memory copies for concatenation rather than rope (aka "cord") structures. * While mmap(2) is available via PerlIO::mmap, string ops (m//, substr(), index(), etc.) still require memory copies into userspace, negating a benefit of zero-copy. * The XS/C API make it difficult to improve internals while preserving compatibility. * Lack of optional type checking. This may be a blessing in disguise, though, as it encourages us to simplify our data models and lowers cognitive overhead. * SMP support is mostly limited to fork(), since many libraries (including much of the standard library) are not thread-safe. Even with threads.pm, sharing data between interpreters within the same process is inefficient due to the lack of lock-free and wait-free data structures from projects such as Userspace RCU. * Process spawning speed degrades as memory use increases. We work around this optionally via Inline::C and vfork(2), since Perl lacks an approximation of posix_spawn(3). We also use `undef' and `delete' ops to free large buffers as soon as we're done using them to save memory. Red herrings to ignore when evaluating other runtimes ----------------------------------------------------- These don't discount a language or runtime from being being used, they're just not interesting. * Lightweight threading While lightweight threading implementations are convenient, they tend to be significantly heavier than a pure event-loop systems (or multi-threaded event-loop systems) Lightweight threading implementations have stack overhead and growth typically measured in kilobytes. The userspace state overhead of event-based systems is an order of magnitude less, and a sunk cost regardless of concurrency model. ��������������������������������������������������������������������������������������������������������public-inbox-1.9.0/Documentation/txt2pre������������������������������������������������������������0000775�0000000�0000000�00000012103�14300314757�0020264�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env perl # Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Stupid script to make HTML from preformatted, utf-8 text versions, # only generating links for http(s). Markdown does too much # and requires indentation to output preformatted text. use strict; use warnings; use PublicInbox::Linkify; use PublicInbox::Hval qw(ascii_html); my %xurls; for (qw[lei(1) lei-add-external(1) lei-add-watch(1) lei-blob(1) lei-config(1) lei-convert(1) lei-daemon(8) lei-daemon-kill(1) lei-daemon-pid(1) lei-edit-search(1) lei-export-kw(1) lei-forget-external(1) lei-forget-mail-sync(1) lei-forget-search(1) lei-import(1) lei-index(1) lei-init(1) lei-inspect(1) lei-lcat(1) lei-ls-external(1) lei-ls-label(1) lei-ls-mail-source(1) lei-ls-mail-sync(1) lei-ls-search(1) lei-ls-watch(1) lei-mail-diff(1) lei-mail-sync-overview(7) lei-overview(7) lei-p2q(1) lei-q(1) lei-rediff(1) lei-refresh-mail-sync(1) lei-rm(1) lei-rm-watch(1) lei-security(7) lei-store-format(5) lei-tag(1) lei-up(1) public-inbox.cgi(1) public-inbox-clone(1) public-inbox-config(5) public-inbox-config(5) public-inbox-convert(1) public-inbox-daemon(8) public-inbox-edit(1) public-inbox-fetch(1) public-inbox-glossary(7) public-inbox-httpd(1) public-inbox-imapd(1) public-inbox-index(1) public-inbox-init(1) public-inbox-learn(1) public-inbox-mda(1) public-inbox-nntpd(1) public-inbox-overview(7) public-inbox-purge(1) public-inbox-v1-format(5) public-inbox-v2-format(5) public-inbox-watch(1) public-inbox-xcpdb(1) ]) { my ($n) = (/([\w\-\.]+)/); $xurls{$_} = "$n.html"; $xurls{$n} = "$n.html"; } for (qw[make(1) flock(2) setrlimit(2) vfork(2) tmpfs(5) inotify(7) unix(7) syslog(3)]) { my ($n, $s) = (/([\w\-]+)\((\d)\)/); $xurls{$_} = "https://www.man7.org/linux/man-pages/man$s/$n.$s.html"; } for (qw[git(1) git-am(1) git-apply(1) git-config(1) git-credential(1) git-daemon(1) git-diff(1) git-fast-import(1) git-fetch(1) git-filter-branch(1) git-format-patch(1) git-gc(1) git-http-backend(1) git-imap-send(1) git-init(1) git-send-email(1) gitrepository-layout(5) gitglossary(7) ]) { my ($n) = (/([\w\-\.]+)/); $xurls{$_} = "https://kernel.org/pub/software/scm/git/docs/$n.html" } for (qw[ sd_listen_fds(3) systemd(1) systemd.unit(5) systemd.socket(5) ]) { my ($n) = (/([\w\-\.]+)/); $xurls{$_} = "https://www.freedesktop.org/software/systemd/man/$n.html"; } # favor upstream docs if they exist, use manpages.debian.org if they don't $xurls{'netrc(5)'} = 'https://manpages.debian.org/stable/ftp/netrc.5.en.html'; $xurls{'mbsync(1)'} = 'https://manpages.debian.org/stable/isync/mbsync.1.en.html'; $xurls{'offlineimap(1)'} = 'https://manpages.debian.org/stable/offlineimap/offlineimap.1.en.html'; $xurls{'spamc(1)'} = 'https://spamassassin.apache.org/full/3.4.x/doc/spamc.html'; $xurls{'grok-pull'} = 'https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git' . '/tree/man/grok-pull.1.rst'; $xurls{'git-filter-repo(1)'} = 'https://github.com/newren/git-filter-repo'. '/blob/master/Documentation/git-filter-repo.txt'; $xurls{'ssoma(1)'} = 'https://ssoma.public-inbox.org/ssoma.txt'; $xurls{'cgitrc(5)'} = 'https://git.zx2c4.com/cgit/tree/cgitrc.5.txt'; $xurls{'prove(1)'} = 'https://perldoc.perl.org/prove.html'; $xurls{'mbox(5)'} = 'https://manpages.debian.org/stable/mutt/mbox.5.en.html'; $xurls{'mmdf(5)'} = 'https://manpages.debian.org/stable/mutt/mmdf.5.en.html'; $xurls{'mutt(1)'} = 'https://manpages.debian.org/stable/mutt/mutt.1.en.html'; $xurls{'torsocks(1)'} = 'https://manpages.debian.org/stable/torsocks/torsocks.1.en.html'; $xurls{'curl(1)'} = 'https://manpages.debian.org/stable/curl/curl.1.en.html'; $xurls{'copydatabase(1)'} = 'https://manpages.debian.org/stable/xapian-tools/copydatabase.1.en.html'; $xurls{'xapian-compact(1)'} = 'https://manpages.debian.org/stable/xapian-tools/xapian-compact.1.en.html'; $xurls{'gzip(1)'} = 'https://manpages.debian.org/stable/gzip/gzip.1.en.html'; $xurls{'chmod(1)'} = 'https://manpages.debian.org/stable/coreutils/chmod.1.en.html'; $xurls{'kqueue(2)'} = 'https://www.freebsd.org/cgi/man.cgi?query=kqueue&sektion=2'; $xurls{'notmuch(1)'} = 'https://notmuchmail.org/manpages/notmuch-1/'; $xurls{'mairix(1)'} = 'https://manpages.debian.org/stable/mairix/mairix.1.en.html'; my $str = do { local $/; <STDIN> }; my ($title) = ($str =~ /\A([^\n]+)/); if ($str =~ /^NAME\n\s+([^\n]+)/sm) { # don't link to ourselves $title = $1; if ($title =~ /([\w\.\-]+)/) { delete $xurls{$1}; } } $title = ascii_html($title); my $l = PublicInbox::Linkify->new; $str = $l->linkify_1($str); $str = ascii_html($str); # longest matches, first my @keys = sort { length($b) <=> length($a) } keys %xurls; my $xkeys = join('|', map { quotemeta } @keys); $str =~ s,(?<![>\w_])($xkeys)(?!(?:[\w<\-]|\.html)), qq(<a\nhref=").$xurls{$1}.qq(">$1).($2//'').'</a>',sge; $str = $l->linkify_2($str); print '<html><head>', qq(<meta\nhttp-equiv="Content-Type"\ncontent="text/html; charset=utf-8"\n/>), "<title>$title", "
",  $str , '
'; STDOUT->flush; public-inbox-1.9.0/HACKING000066400000000000000000000107131430031475700151110ustar00rootroot00000000000000hacking public-inbox -------------------- Send all patches and "git request-pull"-formatted emails to our self-hosting inbox at meta@public-inbox.org It is archived at: https://public-inbox.org/meta/ and http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/ (using Tor) Contributions are email-driven, just like contributing to git itself or the Linux kernel; however anonymous and pseudonymous contributions will always be welcome. Please consider our goals in mind: Decentralization, Accessibility, Compatibility, Performance These goals apply to everyone: users viewing over the web or NNTP, sysadmins running public-inbox, and other hackers working public-inbox. We will reject any feature which advocates or contributes to any particular instance of a public-inbox becoming a single point of failure. Things we've considered but rejected include: * exposing article serial numbers outside of NNTP * allowing readers to inject metadata (e.g. votes) We care about being accessible to folks with vision problems and/or lack the computing resources to view so-called "modern" websites. This includes folks on slow connections and ancient browsers which may be too difficult to upgrade due to resource demands. Only depend on Free Software packages which exist in the "main" section of Debian "stable" distribution. That is Debian 9.x ("stretch") as of this writing, but "oldstable" (8.x, "jessie") remains supported for v1 inboxes. In general, we favor mature and well-tested old things rather than the shiny new. Avoid relying on compiled modules too much. Even if it is Free, compiled code makes packages more expensive to audit, build, distribute and verify. public-inbox itself will only be implemented in scripting languages (currently Perl 5) and optional Just-Ahead-of-Time-compiled C (via Inline::C) Do not recurse on user-supplied data. Neither Perl or C handle deep recursion gracefully. See lib/PublicInbox/SearchThread.pm and lib/PublicInbox/MsgIter.pm for examples of non-recursive alternatives to previously-recursive algorithms. Performance should be reasonably good for server administrators, too, and we will sacrifice features to achieve predictable performance. Encouraging folks to self-host will be easier with lower hardware requirements. See design_www.txt and design_notes.txt in the Documentation/ directory for design decisions made during development. See Documentation/technical/ in the source tree for more details on specific topics, in particular data_structures.txt Optional packages for testing and development --------------------------------------------- Optional packages testing and development: - Plack::Test deb: libplack-test-perl pkg: p5-Plack rpm: perl-Plack-Test - Plack::Test::ExternalServer deb: libplack-test-externalserver-perl pkg: p5-Plack-Test-ExternalServer - Test::Simple deb: perl-modules-5.$MINOR pkg: perl5 rpm: perl-Test-Simple - XML::TreePP deb: libxml-treepp-perl pkg: p5-XML-TreePP rpm: perl-XML-TreePP Email::MIME is optional as of public-inbox v1.5.0 but still used for maintainer comparison tests: * Email::MIME deb: libemail-mime-perl pkg: p5-Email-MIME rpm: perl-Email-MIME Faster tests ------------ The `make test' target provided by MakeMaker does not run in parallel. Our `make check' target supports parallel runs, and it also creates a `.prove' file to optimize `make check-run'. The prove(1) command (distributed with Perl) may also be used for finer-grained testing: prove -bvw t/foo.t If using a make(1) (e.g. GNU make) with `include' support, the `config.mak' Makefile snippet can be used to set environment variables such as PERL_INLINE_DIRECTORY and TMPDIR. With PERL_INLINE_DIRECTORY set to enable Inline::C support and TMPDIR pointed to a tmpfs(5) mount, `make check-run' takes 6-10s (load-dependent) on a busy workstation built in 2010. Perl notes ---------- * \w, \s, \d character classes all match Unicode characters; so write out class ranges (e.g "[0-9]") if you only intend to match ASCII. Do not use the "/a" (ASCII) modifier, that requires Perl 5.14 and we're only depending on 5.10.1 at the moment. public-inbox-1.9.0/INSTALL000066400000000000000000000222541430031475700151560ustar00rootroot00000000000000public-inbox (server-side) installation --------------------------------------- This is for folks who want to setup their own public-inbox instance. Clients should use normal git-clone/git-fetch, IMAP or NNTP clients if they want to import mail into their personal inboxes. As of 2021, public-inbox is packaged by several OS distributions, listed in alphabetical order: Debian, GNU Guix, NixOS, and Void Linux. public-inbox is developed on Debian GNU/Linux systems and will never depend on packages outside of the "main" component of the "stable" distribution, currently Debian 10.x ("buster"), but older versions of Debian remain supported. Most packages are available in other GNU/Linux distributions and FreeBSD. CentOS 7.x users will likely want newer git and Xapian packages for better performance and v2 inbox support: https://public-inbox.org/meta/20210421151308.yz5hzkgm75klunpe@nitro.local/ TODO: this still needs to be documented better, also see the scripts/ and sa_config/ directories in the source tree Requirements ------------ public-inbox requires a number of other packages to access its full functionality. The core tools are, of course: * Git (1.8.0+, 2.6+ for writing v2 inboxes) * Perl 5.10.1+ * DBD::SQLite (needed for IMAP, NNTP, message threading, and v2 inboxes) To accept incoming mail into a public inbox, you'll likely want: * MTA - postfix is recommended (for public-inbox-mda) * SpamAssassin (spamc/spamd) (for public-inbox-watch/public-inbox-mda) Beyond that, there is one non-standard Perl package required: * URI deb: liburi-perl pkg: p5-URI rpm: perl-URI (for HTML/Atom generation) Plack and Date::Parse are optional as of public-inbox v1.3.0, but required for older releases: * Plack deb: libplack-perl pkg: p5-Plack rpm: perl-Plack, perl-Plack-Test, (for HTML/Atom generation) - Date::Parse deb: libtimedate-perl pkg: p5-TimeDate rpm: perl-TimeDate (for broken, mostly historical emails) Where "deb" indicates package names for Debian-derived distributions, "pkg" is for the FreeBSD package (maybe other common BSDs, too), and "rpm" is for RPM-based distributions (only known to work on Fedora). Numerous optional modules are likely to be useful as well: - DBD::SQLite deb: libdbd-sqlite3-perl pkg: p5-DBD-SQLite rpm: perl-DBD-SQLite (for v2, IMAP, NNTP, or gzipped mboxes) - Search::Xapian or Xapian(.pm) deb: libsearch-xapian-perl pkg: p5-Search-Xapian OR p5-Xapian rpm: perl-Search-Xapian (HTTP and IMAP search) - Inline::C deb: libinline-c-perl pkg: pkg-Inline-C rpm: perl-Inline (or perl-Inline-C) (speeds up process spawning on Linux, see public-inbox-daemon(8)) - Email::Address::XS deb: libemail-address-xs-perl pkg: pkg-Email-Address-XS (correct parsing of tricky email addresses, phrases and comments, required for IMAP) - Parse::RecDescent deb: libparse-recdescent-perl pkg: p5-Parse-RecDescent rpm: perl-ParseRecDescent (optional, for public-inbox-imapd(1)) - Mail::IMAPClient deb: libmail-imapclient-perl pkg: p5-Mail-IMAPClient rpm: perl-Mail-IMAPClient (optional for lei and public-inbox-watch) - BSD::Resource deb: libbsd-resource-perl pkg: p5-BSD-Resource rpm: perl-BSD-Resource (optional, for PSGI limiters see public-inbox-config(5)) - Plack::Middleware::ReverseProxy deb: libplack-middleware-reverseproxy-perl pkg: p5-Plack-Middleware-ReverseProxy rpm: perl-Plack-Middleware-ReverseProxy (ensures redirects are correct when running behind nginx or Varnish) * highlight deb: libhighlight-perl (for syntax highlighting with coderepo) * xapian-compact (tool) deb: xapian-tools pkg: xapian-core rpm: xapian-core (optional, for public-inbox-compact(1)) * curl (tool) deb, pkg, rpm: curl (for HTTP(S) externals with curl) - Linux::Inotify2 deb: liblinux-inotify2-perl rpm: perl-Linux-Inotify2 (for lei, public-inbox-watch and -imapd on Linux) - IO::KQueue pkg: p5-IO-KQueue (for lei, public-inbox-watch and -imapd on *BSDs) - Net::Server deb: libnet-server-perl pkg: pkg-Net-Server rpm: perl-Net-Server (for HTTP/IMAP/NNTP background daemons, not needed as systemd services or foreground servers) The following module is typically pulled in by dependencies listed above, so there is no need to explicitly install them: - DBI deb: libdbi-perl pkg: p5-DBI rpm: perl-DBI (pulled in by DBD::SQLite) Uncommonly needed modules (see HACKING for development-only modules): - Socket6 deb: libsocket6-perl pkg: p5-Socket6 rpm: perl-Socket6 (pulled in by SpamAssassin and Net::Server, only necessary if using IPv6 with Plack::Middleware::AccessLog or similar on Perl <= 5.12) - Crypt::CBC deb: libcrypt-cbc-perl pkg: p5-Crypt-CBC (for PublicInbox::Unsubscribe (rarely used)) standard MakeMaker installation (Perl) -------------------------------------- To use MakeMaker, you need to ensure ExtUtils::MakeMaker is available. This is typically installed with Perl, but RPM-based systems will likely need to install the `perl-ExtUtils-MakeMaker' package. Once the dependencies are installed, you should be able to build and install the system (into /usr/local) with: perl Makefile.PL make make test # see HACKING for faster tests for hackers make install # root permissions may be needed symlink-install (public-inbox.git and 1.7.0+) --------------------------------------------- For users who lack permissions and/or wish to minimize their installation footprint, the "symlink-install" target is available in public-inbox.git. The following installs symlinks to $HOME/bin pointing to the source tree: perl Makefile.PL make symlink-install prefix=$HOME Other installation notes ------------------------ Debian 8.x (jessie) users, use Debian 8.5 or later if using Xapian: https://bugs.debian.org/808610 public-inbox-* commands will never store unregeneratable data in Xapian nor any other search database we might use; Xapian corruption will not destroy critical data. Note: `lei' DOES store unregeneratable data in Xapian and SQLite. See the public-inbox-overview(7) man page for the next steps once the installation is complete. The following required packages are part of the Perl standard library. Debian-based distros put them in "libperl5.$MINOR" or "perl-modules-5.$MINOR"; and FreeBSD puts them in "perl5". RPM-based distros split them out into separate packages: * Digest::SHA rpm: perl-Digest-SHA * Data::Dumper rpm: perl-Data-Dumper * Encode rpm: perl-Encode * IO::Compress rpm: perl-IO-Compress * Storable rpm: perl-Storable * Text::ParseWords rpm: perl-Text-Parsewords Copyright --------- Copyright 2013-2021 all contributors License: AGPL-3.0+ public-inbox-1.9.0/MANIFEST000066400000000000000000000346101430031475700152550ustar00rootroot00000000000000.editorconfig .gitattributes .gitignore AUTHORS COPYING Documentation/.gitignore Documentation/RelNotes/v1.0.0.eml Documentation/RelNotes/v1.1.0-pre1.eml Documentation/RelNotes/v1.2.0.eml Documentation/RelNotes/v1.3.0.eml Documentation/RelNotes/v1.4.0.eml Documentation/RelNotes/v1.5.0.eml Documentation/RelNotes/v1.6.0.eml Documentation/RelNotes/v1.6.1.eml Documentation/RelNotes/v1.7.0.eml Documentation/RelNotes/v1.8.0.eml Documentation/RelNotes/v1.9.0.eml Documentation/clients.txt Documentation/common.perl Documentation/dc-dlvr-spam-flow.txt Documentation/design_notes.txt Documentation/design_www.txt Documentation/flow.ge Documentation/flow.txt Documentation/hosted.txt Documentation/include.mk Documentation/lei-add-external.pod Documentation/lei-add-watch.pod Documentation/lei-blob.pod Documentation/lei-config.pod Documentation/lei-convert.pod Documentation/lei-daemon-kill.pod Documentation/lei-daemon-pid.pod Documentation/lei-daemon.pod Documentation/lei-edit-search.pod Documentation/lei-export-kw.pod Documentation/lei-forget-external.pod Documentation/lei-forget-mail-sync.pod Documentation/lei-forget-search.pod Documentation/lei-import.pod Documentation/lei-index.pod Documentation/lei-init.pod Documentation/lei-inspect.pod Documentation/lei-lcat.pod Documentation/lei-ls-external.pod Documentation/lei-ls-label.pod Documentation/lei-ls-mail-source.pod Documentation/lei-ls-mail-sync.pod Documentation/lei-ls-search.pod Documentation/lei-ls-watch.pod Documentation/lei-mail-diff.pod Documentation/lei-mail-formats.pod Documentation/lei-mail-sync-overview.pod Documentation/lei-overview.pod Documentation/lei-p2q.pod Documentation/lei-q.pod Documentation/lei-rediff.pod Documentation/lei-refresh-mail-sync.pod Documentation/lei-reindex.pod Documentation/lei-rm-watch.pod Documentation/lei-rm.pod Documentation/lei-security.pod Documentation/lei-store-format.pod Documentation/lei-tag.pod Documentation/lei-up.pod Documentation/lei.pod Documentation/lei_design_notes.txt Documentation/marketing.txt Documentation/mknews.perl Documentation/public-inbox-clone.pod Documentation/public-inbox-compact.pod Documentation/public-inbox-config.pod Documentation/public-inbox-convert.pod Documentation/public-inbox-daemon.pod Documentation/public-inbox-edit.pod Documentation/public-inbox-extindex-format.pod Documentation/public-inbox-extindex.pod Documentation/public-inbox-fetch.pod Documentation/public-inbox-glossary.pod Documentation/public-inbox-httpd.pod Documentation/public-inbox-imapd.pod Documentation/public-inbox-index.pod Documentation/public-inbox-init.pod Documentation/public-inbox-learn.pod Documentation/public-inbox-mda.pod Documentation/public-inbox-netd.pod Documentation/public-inbox-nntpd.pod Documentation/public-inbox-overview.pod Documentation/public-inbox-pop3d.pod Documentation/public-inbox-purge.pod Documentation/public-inbox-tuning.pod Documentation/public-inbox-v1-format.pod Documentation/public-inbox-v2-format.pod Documentation/public-inbox-watch.pod Documentation/public-inbox-xcpdb.pod Documentation/public-inbox.cgi.pod Documentation/reproducibility.txt Documentation/standards.perl Documentation/technical/data_structures.txt Documentation/technical/ds.txt Documentation/technical/memory.txt Documentation/technical/whyperl.txt Documentation/txt2pre HACKING INSTALL MANIFEST Makefile.PL README TODO certs/.gitignore certs/create-certs.perl ci/README ci/deps.perl ci/profiles.sh ci/run.sh contrib/completion/lei-completion.bash contrib/css/216dark.css contrib/css/216light.css contrib/css/README contrib/selinux/el7/publicinbox.fc contrib/selinux/el7/publicinbox.te devel/README devel/syscall-list examples/README examples/README.unsubscribe examples/cgit-commit-filter.lua examples/cgit-wwwhighlight-filter.lua examples/cgit.psgi examples/grok-pull.post_update_hook.sh examples/highlight.psgi examples/lib/.gitignore examples/logrotate.conf examples/newswww.psgi examples/nginx_proxy examples/public-inbox-config examples/public-inbox-httpd.socket examples/public-inbox-httpd@.service examples/public-inbox-imapd.socket examples/public-inbox-imapd@.service examples/public-inbox-netd.socket examples/public-inbox-netd@.service examples/public-inbox-nntpd.socket examples/public-inbox-nntpd@.service examples/public-inbox-watch.service examples/public-inbox.psgi examples/unsubscribe-milter.socket examples/unsubscribe-milter@.service examples/unsubscribe-psgi.socket examples/unsubscribe-psgi@.service examples/unsubscribe.milter examples/unsubscribe.psgi examples/varnish-4.vcl lei.sh lib/PublicInbox/Address.pm lib/PublicInbox/AddressPP.pm lib/PublicInbox/Admin.pm lib/PublicInbox/AdminEdit.pm lib/PublicInbox/AltId.pm lib/PublicInbox/AutoReap.pm lib/PublicInbox/Cgit.pm lib/PublicInbox/CmdIPC4.pm lib/PublicInbox/CompressNoop.pm lib/PublicInbox/Config.pm lib/PublicInbox/ConfigIter.pm lib/PublicInbox/ContentHash.pm lib/PublicInbox/DS.pm lib/PublicInbox/DSKQXS.pm lib/PublicInbox/DSPoll.pm lib/PublicInbox/DSdeflate.pm lib/PublicInbox/Daemon.pm lib/PublicInbox/DirIdle.pm lib/PublicInbox/DummyInbox.pm lib/PublicInbox/EOFpipe.pm lib/PublicInbox/Emergency.pm lib/PublicInbox/Eml.pm lib/PublicInbox/EmlContentFoo.pm lib/PublicInbox/ExtMsg.pm lib/PublicInbox/ExtSearch.pm lib/PublicInbox/ExtSearchIdx.pm lib/PublicInbox/FakeImport.pm lib/PublicInbox/FakeInotify.pm lib/PublicInbox/Feed.pm lib/PublicInbox/Fetch.pm lib/PublicInbox/Filter/Base.pm lib/PublicInbox/Filter/Gmane.pm lib/PublicInbox/Filter/Mirror.pm lib/PublicInbox/Filter/RubyLang.pm lib/PublicInbox/Filter/SubjectTag.pm lib/PublicInbox/Filter/Vger.pm lib/PublicInbox/Gcf2.pm lib/PublicInbox/Gcf2Client.pm lib/PublicInbox/GetlineBody.pm lib/PublicInbox/Git.pm lib/PublicInbox/GitAsyncCat.pm lib/PublicInbox/GitCredential.pm lib/PublicInbox/GitHTTPBackend.pm lib/PublicInbox/GzipFilter.pm lib/PublicInbox/HTTP.pm lib/PublicInbox/HTTPD.pm lib/PublicInbox/HTTPD/Async.pm lib/PublicInbox/HlMod.pm lib/PublicInbox/Hval.pm lib/PublicInbox/IMAP.pm lib/PublicInbox/IMAPClient.pm lib/PublicInbox/IMAPD.pm lib/PublicInbox/IMAPTracker.pm lib/PublicInbox/IMAPsearchqp.pm lib/PublicInbox/IPC.pm lib/PublicInbox/IdxStack.pm lib/PublicInbox/Import.pm lib/PublicInbox/In2Tie.pm lib/PublicInbox/Inbox.pm lib/PublicInbox/InboxIdle.pm lib/PublicInbox/InboxWritable.pm lib/PublicInbox/InputPipe.pm lib/PublicInbox/Isearch.pm lib/PublicInbox/KQNotify.pm lib/PublicInbox/LEI.pm lib/PublicInbox/LI2Wrap.pm lib/PublicInbox/LeiALE.pm lib/PublicInbox/LeiAddExternal.pm lib/PublicInbox/LeiAddWatch.pm lib/PublicInbox/LeiAuth.pm lib/PublicInbox/LeiBlob.pm lib/PublicInbox/LeiConfig.pm lib/PublicInbox/LeiConvert.pm lib/PublicInbox/LeiCurl.pm lib/PublicInbox/LeiDedupe.pm lib/PublicInbox/LeiEditSearch.pm lib/PublicInbox/LeiExportKw.pm lib/PublicInbox/LeiExternal.pm lib/PublicInbox/LeiFinmsg.pm lib/PublicInbox/LeiForgetExternal.pm lib/PublicInbox/LeiForgetMailSync.pm lib/PublicInbox/LeiForgetSearch.pm lib/PublicInbox/LeiHelp.pm lib/PublicInbox/LeiImport.pm lib/PublicInbox/LeiImportKw.pm lib/PublicInbox/LeiIndex.pm lib/PublicInbox/LeiInit.pm lib/PublicInbox/LeiInput.pm lib/PublicInbox/LeiInspect.pm lib/PublicInbox/LeiLcat.pm lib/PublicInbox/LeiLsExternal.pm lib/PublicInbox/LeiLsLabel.pm lib/PublicInbox/LeiLsMailSource.pm lib/PublicInbox/LeiLsMailSync.pm lib/PublicInbox/LeiLsSearch.pm lib/PublicInbox/LeiLsWatch.pm lib/PublicInbox/LeiMailDiff.pm lib/PublicInbox/LeiMailSync.pm lib/PublicInbox/LeiMirror.pm lib/PublicInbox/LeiNoteEvent.pm lib/PublicInbox/LeiOverview.pm lib/PublicInbox/LeiP2q.pm lib/PublicInbox/LeiPmdir.pm lib/PublicInbox/LeiQuery.pm lib/PublicInbox/LeiRediff.pm lib/PublicInbox/LeiRefreshMailSync.pm lib/PublicInbox/LeiReindex.pm lib/PublicInbox/LeiRemote.pm lib/PublicInbox/LeiRm.pm lib/PublicInbox/LeiRmWatch.pm lib/PublicInbox/LeiSavedSearch.pm lib/PublicInbox/LeiSearch.pm lib/PublicInbox/LeiSelfSocket.pm lib/PublicInbox/LeiStore.pm lib/PublicInbox/LeiStoreErr.pm lib/PublicInbox/LeiSucks.pm lib/PublicInbox/LeiTag.pm lib/PublicInbox/LeiToMail.pm lib/PublicInbox/LeiUp.pm lib/PublicInbox/LeiViewText.pm lib/PublicInbox/LeiWatch.pm lib/PublicInbox/LeiXSearch.pm lib/PublicInbox/Linkify.pm lib/PublicInbox/Listener.pm lib/PublicInbox/Lock.pm lib/PublicInbox/MDA.pm lib/PublicInbox/MID.pm lib/PublicInbox/MIME.pm lib/PublicInbox/ManifestJsGz.pm lib/PublicInbox/Mbox.pm lib/PublicInbox/MboxGz.pm lib/PublicInbox/MboxLock.pm lib/PublicInbox/MboxReader.pm lib/PublicInbox/MdirReader.pm lib/PublicInbox/MiscIdx.pm lib/PublicInbox/MiscSearch.pm lib/PublicInbox/MsgIter.pm lib/PublicInbox/MsgTime.pm lib/PublicInbox/Msgmap.pm lib/PublicInbox/MultiGit.pm lib/PublicInbox/NNTP.pm lib/PublicInbox/NNTPD.pm lib/PublicInbox/NetNNTPSocks.pm lib/PublicInbox/NetReader.pm lib/PublicInbox/NetWriter.pm lib/PublicInbox/NewsWWW.pm lib/PublicInbox/OnDestroy.pm lib/PublicInbox/Over.pm lib/PublicInbox/OverIdx.pm lib/PublicInbox/POP3.pm lib/PublicInbox/POP3D.pm lib/PublicInbox/PktOp.pm lib/PublicInbox/ProcessPipe.pm lib/PublicInbox/Qspawn.pm lib/PublicInbox/Reply.pm lib/PublicInbox/SaPlugin/ListMirror.pm lib/PublicInbox/SaPlugin/ListMirror.pod lib/PublicInbox/Search.pm lib/PublicInbox/SearchIdx.pm lib/PublicInbox/SearchIdxShard.pm lib/PublicInbox/SearchQuery.pm lib/PublicInbox/SearchThread.pm lib/PublicInbox/SearchView.pm lib/PublicInbox/SharedKV.pm lib/PublicInbox/Sigfd.pm lib/PublicInbox/Smsg.pm lib/PublicInbox/SolverGit.pm lib/PublicInbox/Spamcheck.pm lib/PublicInbox/Spamcheck/Spamc.pm lib/PublicInbox/Spawn.pm lib/PublicInbox/SpawnPP.pm lib/PublicInbox/Syscall.pm lib/PublicInbox/TLS.pm lib/PublicInbox/TestCommon.pm lib/PublicInbox/Tmpfile.pm lib/PublicInbox/URIimap.pm lib/PublicInbox/URInntps.pm lib/PublicInbox/Unsubscribe.pm lib/PublicInbox/UserContent.pm lib/PublicInbox/V2Writable.pm lib/PublicInbox/View.pm lib/PublicInbox/ViewDiff.pm lib/PublicInbox/ViewVCS.pm lib/PublicInbox/WQBlocked.pm lib/PublicInbox/WQWorker.pm lib/PublicInbox/WWW.pm lib/PublicInbox/WWW.pod lib/PublicInbox/Watch.pm lib/PublicInbox/WwwAltId.pm lib/PublicInbox/WwwAtomStream.pm lib/PublicInbox/WwwAttach.pm lib/PublicInbox/WwwHighlight.pm lib/PublicInbox/WwwListing.pm lib/PublicInbox/WwwStatic.pm lib/PublicInbox/WwwStream.pm lib/PublicInbox/WwwText.pm lib/PublicInbox/Xapcmd.pm lib/PublicInbox/gcf2_libgit2.h sa_config/Makefile sa_config/README sa_config/root/etc/spamassassin/public-inbox.pre sa_config/user/.spamassassin/user_prefs script/lei script/public-inbox-clone script/public-inbox-compact script/public-inbox-convert script/public-inbox-edit script/public-inbox-extindex script/public-inbox-fetch script/public-inbox-httpd script/public-inbox-imapd script/public-inbox-index script/public-inbox-init script/public-inbox-learn script/public-inbox-mda script/public-inbox-netd script/public-inbox-nntpd script/public-inbox-pop3d script/public-inbox-purge script/public-inbox-watch script/public-inbox-xcpdb script/public-inbox.cgi scripts/README scripts/dc-dlvr scripts/dc-dlvr.pre scripts/dupe-finder scripts/edit-sa-prefs scripts/import_maildir scripts/import_slrnspool scripts/import_vger_from_mbox scripts/report-spam scripts/slrnspool2maildir scripts/ssoma-replay scripts/xhdr-num2mid t/.gitconfig t/address.t t/admin.t t/alt.psgi t/altid.t t/altid_v2.t t/cgi.t t/check-www-inbox.perl t/cmd_ipc.t t/config.t t/config_limiter.t t/content_hash.t t/convert-compact.t t/data-gen/.gitignore t/data/0001.patch t/data/binary.patch t/data/message_embed.eml t/dir_idle.t t/ds-kqxs.t t/ds-leak.t t/ds-poll.t t/edit.t t/emergency.t t/eml.t t/eml_content_disposition.t t/eml_content_type.t t/epoll.t t/extindex-psgi.t t/extsearch.t t/fail-bin/spamc t/fake_inotify.t t/feed.t t/filter_base-junk.eml t/filter_base-xhtml.eml t/filter_base.t t/filter_mirror.t t/filter_rubylang.t t/filter_subjecttag.t t/filter_vger.t t/gcf2.t t/gcf2_client.t t/git-http-backend.psgi t/git.fast-import-data t/git.t t/gzip_filter.t t/hl_mod.t t/home2/.gitignore t/home2/Makefile t/home2/README t/httpd-corner.psgi t/httpd-corner.t t/httpd-https.t t/httpd-unix.t t/httpd.t t/hval.t t/idx_stack.t t/imap.t t/imap_searchqp.t t/imap_tracker.t t/imapd-tls.t t/imapd.t t/import.t t/inbox.t t/inbox_idle.t t/index-git-times.t t/indexlevels-mirror-v1.t t/indexlevels-mirror.t t/init.t t/ipc.t t/iso-2202-jp.eml t/kqnotify.t t/lei-auto-watch.t t/lei-convert.t t/lei-daemon.t t/lei-export-kw.t t/lei-externals.t t/lei-import-http.t t/lei-import-imap.t t/lei-import-maildir.t t/lei-import-nntp.t t/lei-import.t t/lei-index.t t/lei-inspect.t t/lei-lcat.t t/lei-mirror.psgi t/lei-mirror.t t/lei-p2q.t t/lei-q-kw.t t/lei-q-remote-import.t t/lei-q-save.t t/lei-q-thread.t t/lei-refresh-mail-sync.t t/lei-reindex.t t/lei-sigpipe.t t/lei-tag.t t/lei-up.t t/lei-watch.t t/lei.t t/lei_dedupe.t t/lei_external.t t/lei_lcat.t t/lei_mail_sync.t t/lei_overview.t t/lei_saved_search.t t/lei_store.t t/lei_to_mail.t t/lei_xsearch.t t/linkify.t t/main-bin/spamc t/mbox_lock.t t/mbox_reader.t t/mda-mime.eml t/mda.t t/mda_filter_rubylang.t t/mdir_reader.t t/mid.t t/mime.t t/miscsearch.t t/msg_iter-nested.eml t/msg_iter-order.eml t/msg_iter.t t/msgmap.t t/msgtime.t t/multi-mid.t t/net_reader-imap.t t/netd.t t/nntp.t t/nntpd-tls.t t/nntpd-v2.t t/nntpd.t t/nodatacow.t t/nulsubject.t t/on_destroy.t t/over.t t/plack-2-txt-bodies.eml t/plack-attached-patch.eml t/plack-qp.eml t/plack.t t/pop3d.t t/precheck.t t/psgi_attach.eml t/psgi_attach.t t/psgi_bad_mids.t t/psgi_mount.t t/psgi_multipart_not.t t/psgi_scan_all.t t/psgi_search.t t/psgi_text.t t/psgi_v2-new.eml t/psgi_v2-old.eml t/psgi_v2.t t/purge.t t/qspawn.t t/reindex-time-range.t t/rename_noreplace.t t/replace.t t/reply.t t/run.perl t/search-amsg.eml t/search-thr-index.t t/search.t t/shared_kv.t t/sigfd.t t/solve/0001-simple-mod.patch t/solve/0002-rename-with-modifications.patch t/solve/bare.patch t/solver_git.t t/spamcheck_spamc.t t/spawn.t t/thread-cycle.t t/thread-index-gap.t t/time.t t/uri_imap.t t/uri_nntps.t t/utf8.eml t/v1-add-remove-add.t t/v1reindex.t t/v2-add-remove-add.t t/v2dupindex.t t/v2index-late-dupe.t t/v2mda.t t/v2mirror.t t/v2reindex.t t/v2writable.t t/view.t t/watch_filter_rubylang.t t/watch_imap.t t/watch_maildir.t t/watch_maildir_v2.t t/watch_multiple_headers.t t/www_altid.t t/www_listing.t t/www_static.t t/x-unknown-alpine.eml t/xcpdb-reshard.t version-gen.perl xt/cmp-msgstr.t xt/cmp-msgview.t xt/create-many-inboxes.t xt/eml_check_limits.t xt/eml_octet-stream.t xt/git-http-backend.t xt/git_async_cmp.t xt/httpd-async-stream.t xt/imapd-mbsync-oimap.t xt/imapd-validate.t xt/lei-auth-fail.t xt/lei-onion-convert.t xt/mem-imapd-tls.t xt/mem-msgview.t xt/mem-nntpd-tls.t xt/msgtime_cmp.t xt/net_nntp_socks.t xt/net_writer-imap.t xt/nntpd-validate.t xt/over-fsck.perl xt/perf-msgview.t xt/perf-nntpd.t xt/perf-obfuscate.t xt/perf-threading.t xt/pop3d-mpop.t xt/solver.t xt/stress-sharedkv.t public-inbox-1.9.0/Makefile.PL000066400000000000000000000210261430031475700160730ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use ExtUtils::MakeMaker; open my $m, '<', 'MANIFEST' or die "open(MANIFEST): $!\n"; chomp(my @manifest = (<$m>)); push @manifest, 'lib/PublicInbox.pm'; # generated my @EXE_FILES = grep(m!^script/!, @manifest); my $v = {}; my $t = {}; # do not sort my @RELEASES = qw(v1.9.0 v1.8.0 v1.7.0 v1.6.1 v1.6.0 v1.5.0 v1.4.0 v1.3.0 v1.2.0 v1.1.0-pre1 v1.0.0); $v->{news_deps} = [ map { "Documentation/RelNotes/$_.eml" } @RELEASES ]; $v->{txt} = [ qw(INSTALL README COPYING TODO HACKING) ]; my @dtxt = grep(m!\ADocumentation/[^/]+\.txt\z!, @manifest); push @dtxt, 'Documentation/standards.txt'; push @dtxt, 'Documentation/flow.txt'; push @dtxt, @{$v->{txt}}; for my $txt (@dtxt) { my $html = $txt; $html =~ s/\.txt\z/.html/ or $html .= '.html'; $t->{"$html : $txt"} = [ "\$(txt2pre) <$txt", "touch -r $txt \$@" ]; } $v->{t_slash_star_dot_t} = [ grep(m!\At/.*\.t\z!, @manifest) ]; my @scripts = qw(scripts/ssoma-replay); # legacy my @syn = (@EXE_FILES, grep(m!^lib/.*\.pm$!, @manifest), @scripts); @syn = grep(!/DSKQXS\.pm/, @syn) if !eval { require IO::KQueue }; @syn = grep(!/Unsubscribe\.pm/, @syn) if !eval { require Crypt::CBC }; @syn = grep(!/SaPlugin/, @syn) if !eval { require Mail::SpamAssasin }; $v->{syn_files} = \@syn; $v->{my_syntax} = [map { "$_.syntax" } @syn]; my @no_pod; $v->{-m1} = [ map { my $x = (split('/'))[-1]; my $pod = "Documentation/$x.pod"; if (-f $pod) { $x; } else { warn "W: $pod missing\n"; push @no_pod, $x; (); } } @EXE_FILES, qw( lei-add-external lei-add-watch lei-blob lei-config lei-convert lei-daemon-kill lei-daemon-pid lei-edit-search lei-export-kw lei-forget-external lei-forget-mail-sync lei-forget-search lei-import lei-index lei-init lei-inspect lei-lcat lei-ls-external lei-ls-label lei-ls-mail-source lei-ls-mail-sync lei-ls-search lei-ls-watch lei-mail-diff lei-p2q lei-q lei-rediff lei-refresh-mail-sync lei-rm lei-rm-watch lei-tag lei-up)]; $v->{-m5} = [ qw(public-inbox-config public-inbox-v1-format public-inbox-v2-format public-inbox-extindex-format lei-mail-formats lei-store-format ) ]; $v->{-m7} = [ qw(lei-mail-sync-overview lei-overview lei-security public-inbox-overview public-inbox-tuning public-inbox-glossary) ]; $v->{-m8} = [ qw(public-inbox-daemon lei-daemon) ]; my @sections = (1, 5, 7, 8); $v->{check_80} = []; $v->{manuals} = []; $v->{mantxt} = []; for my $i (@sections) { my $ary = $v->{"-m$i"}; $v->{"m$i"} = $ary; for my $m (@$ary) { my $pod = "Documentation/$m.pod"; my $txt = "Documentation/$m.txt"; $t->{"$m.$i : $pod"} = [ "\$(podman) -s$i $pod \$@" ]; $t->{"$txt : $m.$i"} = [ "\$(man2text) ./$m.$i >\$\@+", "touch -r $pod \$\@+ ./$m.$i", "mv \$\@+ \$@" ]; $t->{"Documentation/$m.html : $txt"} = [ "\$(txt2pre) <$txt", "touch -r $txt \$@" ]; $t->{".$m.cols : $m.$i"} = [ "\@echo CHECK80 $m.$i;". "COLUMNS=80 \$(MAN) ./$m.$i | \$(check_man)", '>$@' ]; $t->{".$m.lexgrog: $m.$i"} = [ "\@echo LEXGROG $m.$i;" . "\$(LEXGROG) ./$m.$i >\$\@+ && mv \$\@+ \$@" ]; } push @{$v->{check_80}}, map { ".$_.cols" } @$ary; push @{$v->{check_lexgrog}}, map { ".$_.lexgrog" } @$ary; my $manuals = $v->{"man$i"} = [ map { "$_.$i" } @$ary ]; push @{$v->{manuals}}, @$manuals; push @{$v->{mantxt}}, map { "Documentation/$_.txt" } @$ary; } $v->{docs} = [ @dtxt, 'NEWS' ]; $v->{docs_html} = [ map {; my $x = $_; $x =~ s/\.txt\z//; "$x.html" } (@{$v->{docs}}, @{$v->{mantxt}}) ]; $v->{gz_docs} = [ map { "$_.gz" } (@{$v->{docs}},@{$v->{docs_html}}) ]; $v->{rsync_docs} = [ @{$v->{gz_docs}}, @{$v->{docs}}, @{$v->{docs_html}}, qw(NEWS.atom NEWS.atom.gz)]; my $TGTS = join("\n", map {; my $tgt_prereq = $_; my $cmds = $t->{$_}; "$tgt_prereq\n".join('', map { "\t$_\n" } @$cmds); } sort keys %$t); my $VARS = join("\n", map {; my $varname = $_; join('', map { "$varname += $_\n" } sort @{$v->{$varname}}); } grep(!/^-/, sort keys %$v)); $VARS .= "\nRELEASES = ".join(' ', @RELEASES)."\n"; # Don't waste user's disk space by installing some pods from # imported code or internal use only my %man3 = map {; # semi-colon tells Perl this is a BLOCK (and not EXPR) my $base = $_; my $mod = $base; $mod =~ s!/!::!g; $mod =~ s/\.\w+\z//; "lib/PublicInbox/$_" => "blib/man3/PublicInbox::$mod.\$(MAN3EXT)" } qw(Git.pm Import.pm WWW.pod SaPlugin/ListMirror.pod); my $warn_no_pod = @no_pod ? "\n\t\@echo W: missing .pod: @no_pod\n" : ''; chomp(my $lexgrog = `which lexgrog 2>/dev/null`); my $check_lexgrog = $lexgrog ? 'check-lexgrog' : ''; WriteMakefile( NAME => 'PublicInbox', # n.b. camel-case is not our choice # XXX drop "PENDING" in .pod before updating this! VERSION => '1.9.0.PENDING', AUTHOR => 'public-inbox hackers ', ABSTRACT => 'an "archives first" approach to mailing lists', EXE_FILES => \@EXE_FILES, # DO NOT blindly put "use v5.12" in *.pm files, unicode_strings # causes known breakages. "use v5.10.1" is safe, though MIN_PERL_VERSION => '5.12.0', LICENSE => 'agpl_3', # AGPL-3.0+, CPAN::Meta::Spec doesn't have '+' PREREQ_PM => { # note: we use spamc(1), NOT the Perl modules # We also depend on git. # Keep this sorted and synced to the INSTALL document # perl-modules-5.xx or libperl5.xx in Debian-based # part of "perl5" on FreeBSD 'Compress::Raw::Zlib' => 0, 'Compress::Zlib' => 0, 'Data::Dumper' => 0, 'Digest::SHA' => 0, # rpm: perl-Digest-SHA 'Encode' => 2.35, # 2.35 shipped with 5.10.1 'IO::Compress::Gzip' => 0, 'IO::Uncompress::Gunzip' => 0, 'Storable' => 0, # rpm: perl-Storable 'Text::ParseWords' => 0, # rpm: perl-Text-ParseWords # Plack is needed for public-inbox-httpd and PublicInbox::WWW # 'Plack' => 0, 'URI' => 0, # We have more test dependencies, but do not force # users to install them. See INSTALL # All Perl installs I know about have these, but RH-based # distros make them separate even though 'perl' pulls them in 'File::Path' => 0, 'File::Temp' => '0.19', # for ->tmpdir support 'Getopt::Long' => 0, 'Exporter' => 0, # ExtUtils::MakeMaker # this file won't run w/o it... }, MAN3PODS => \%man3, clean => { FILES => 't/home*/setup* t/home*/t* t/home*/.public-inbox '. 't/data-gen/*' }, PM => { map { s[^lib/][]s; +('lib/' . $_ => '$(INST_LIB)/' . $_); } grep { # Will include *.pod and an *.h file, but so # would ExtUtils::MakeMaker. m[^lib/]; } @manifest }, ); sub MY::postamble { my $N = (`{ getconf _NPROCESSORS_ONLN || nproc; } 2>/dev/null` || 1); $N += 1; # account for sleeps in some tests (and makes an IV) <MANIFEST.gen 2>&1; then \\ diff -u MANIFEST MANIFEST.gen; fi check-manifest : MANIFEST \$(check_manifest) # the traditional way running per-*.t processes: check-each :: pure_all \$(EATMYDATA) \$(PROVE) --state=save -bvw -j\$(N) -@\$(check_manifest) # lightly-tested way to run tests, relies "--state=save" in check-each # for best performance check-run :: pure_all check-man \$(EATMYDATA) \$(PROVE) -bvw t/run.perl :: -j\$(N) -@\$(check_manifest) check :: check-each lib/PublicInbox/UserContent.pm :: contrib/css/216dark.css \$(PERL) -I lib \$@ \$? # Ensure new .pm files will always be installed by updating # the timestamp of Makefile.PL which forces Makefile to be remade Makefile.PL : MANIFEST touch -r MANIFEST \$@ \$(PERLRUN) \$@ # Install symlinks to ~/bin (which is hopefuly in PATH) which point to # this source tree. # prefix + bindir matches git.git Makefile: prefix = \$(HOME) bindir = \$(prefix)/bin symlink-install : lib/PublicInbox.pm mkdir -p \$(bindir) lei=\$\$(realpath lei.sh) && cd \$(bindir) && \\ for x in \$(EXE_FILES); do \\ ln -sf "\$\$lei" \$\$(basename "\$\$x"); \\ done pure_all :: lib/PublicInbox.pm lib/PublicInbox.pm : FORCE VERSION=\$(VERSION) \$(PERL) -w ./version-gen.perl update-copyrights : \@case '\$(GNULIB_PATH)' in '') echo >&2 GNULIB_PATH unset; false;; esac git ls-files | UPDATE_COPYRIGHT_HOLDER='all contributors' \\ UPDATE_COPYRIGHT_USE_INTERVALS=2 \\ xargs \$(GNULIB_PATH)/build-aux/update-copyright EOF } public-inbox-1.9.0/README000066400000000000000000000152221430031475700150020ustar00rootroot00000000000000public-inbox - an "archives first" approach to mailing lists ------------------------------------------------------------ public-inbox implements the sharing of an email inbox via git to complement or replace traditional mailing lists. Readers may read via NNTP, IMAP, Atom feeds or HTML archives. public-inbox spawned around three main ideas: * Publicly accessible and archived communication is essential to Free Software development. * Contributing to Free Software projects should not require the use of non-Free services or software. * Graphical user interfaces should not be required for text-based communication. Users may have broken graphics drivers, limited eyesight, or be unable to afford modern hardware. public-inbox aims to be easy-to-deploy and manage; encouraging projects to run their own instances with minimal overhead. Implementation -------------- public-inbox stores mail in git repositories as documented in https://public-inbox.org/public-inbox-v2-format.txt and https://public-inbox.org/public-inbox-v1-format.txt By storing (and optionally) exposing an inbox via git, it is fast and efficient to host and mirror public-inboxes. Traditional mailing lists use the "push" model. For readers, that requires commitment to subscribe and effort to unsubscribe. New readers may also have difficulty following existing discussions if archives do not expose Message-ID and References headers. List server admins are also burdened with delivery failures. public-inbox uses the "pull" model. Casual readers may follow the list via NNTP, IMAP, Atom feed or HTML archives. If a reader loses interest, they simply stop following. Since we use git, mirrors are easy-to-setup, and lists are easy-to-relocate to different mail addresses without losing or splitting archives. _Anybody_ may also setup a delivery-only mailing list server to replay a public-inbox git archive to subscribers via SMTP. Features -------- * anybody may participate via plain-text email * stores email in git, readers may have a complete archive of the inbox * Atom feed, IMAP, NNTP allows casual readers to follow via local tools * uses only well-documented and easy-to-implement data formats Try it out now, see https://try.public-inbox.org/ Requirements for reading: * any software capable of IMAP, NNTP or following Atom feeds Any basic web browser will do for the HTML archives. We primarily develop on w3m to maximize accessibility. Requirements (participant) -------------------------- * any MUA which may send text-only emails ("git send-email" works!) Users are strongly encouraged to use the "reply-all" feature of their mailers to reduce the impact of a public-inbox as a single point of failure. * The HTTP web interface exposes mboxrd files, and NNTP clients often feature reply-by-email functionality * participants do not need to install public-inbox, only server admins Requirements (server) --------------------- See https://public-inbox.org/INSTALL Hacking ------- AGPL source code is available via git: git clone https://public-inbox.org/public-inbox.git git clone https://repo.or.cz/public-inbox.git torsocks git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git torsocks git clone http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/public-inbox See below for contact info. Contact ------- We are happy to see feedback of all types via plain-text email. public-inbox discussion is self-hosting on public-inbox.org Please send comments, user/developer discussion, patches, bug reports, and pull requests to our public-inbox address at: meta@public-inbox.org Please Cc: all recipients when replying as we do not require subscription. This also makes it easier to rope in folks of tangentially related projects we depend on (e.g. git developers on git@vger.kernel.org). The archives are readable via IMAP, NNTP or HTTP: nntps://news.public-inbox.org/inbox.comp.mail.public-inbox.meta imaps://;AUTH=ANONYMOUS@public-inbox.org/inbox.comp.mail.public-inbox.meta.0 https://public-inbox.org/meta/ AUTH=ANONYMOUS is recommended for IMAP, but any username + password works And as Tor hidden services: http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/ nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta imap://;AUTH=ANONYMOUS@4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta.0 You may also clone all messages via git: git clone --mirror https://public-inbox.org/meta/ torsocks git clone --mirror http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/ Anti-Spam --------- The maintainer of public-inbox has found SpamAssassin a good tool for filtering his personal mail, and it will be the default spam filtering tool in public-inbox. See https://public-inbox.org/dc-dlvr-spam-flow.html for more info. Content Filtering ----------------- To discourage phishing, trackers, exploits and other nuisances, only plain-text emails are allowed and HTML is rejected by default. This improves accessibility, and saves bandwidth and storage as mail is archived forever. As of the 2010s, successful online social networks and forums are the ones which heavily restrict users formatting options; so public-inbox aims to preserve the focus on content, and not presentation. Copyright --------- Copyright 2013-2021 all contributors License: AGPL-3.0+ This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . Additional permission under GNU GPL version 3 section 7: If you modify this program, or any covered work, by linking or combining it with the OpenSSL project's OpenSSL library (or a modified version of that library), containing parts covered by the terms of the OpenSSL or SSLeay licenses, the copyright holder(s) grants you additional permission to convey the resulting work. Corresponding Source for a non-source form of such a combination shall include the source code for the parts of OpenSSL used as well as that of the covered work. public-inbox-1.9.0/TODO000066400000000000000000000127121430031475700146130ustar00rootroot00000000000000TODO items for public-inbox (Not in any particular order, and performance, ease-of-setup, installation, maintainability, etc all need to be considered for everything we introduce) * general performance improvements, but without relying on XS or pre-built modules any more than we currently do. (Optional Inline::C and user-compiled re2c acceptable) * mailmap support (same as git) for remapping expired email addresses * support remapping of expired URLs similar to mailmap (coordinate with git.git with this?) * HTTP, IMAP, NNTP, POP3 proxy support. Allow us to be a frontend for firewalled off (or Tor-exclusive) instances. The use case is for offering a publicly accessible IP with a cheap VPS, yet storing large amounts of data on computers without a public IP behind a home Internet connection. * support HTTP(S) CONNECT proxying to IMAP/NNTP/POP3 for users with firewall problems * DHT (distributed hash table) for mapping Message-IDs to various archive locations to avoid SPOF. * optional Cache::FastMmap support so production deployments won't need Varnish (Varnish doesn't protect NNTP nor IMAP, either) * dogfood and take advantage of new kernel APIs (while maintaining portability to older Linux, free BSDs and maybe Hurd). * dogfood latest Xapian, Perl5, SQLite, git and various modules to ensure things continue working as they should (or more better) while retaining compatibility with old versions. * Support more of RFC 3977 (NNTP) Is there anything left for read-only support? * Configurable linkification for per-inbox shorthands: "$gmane/123456" could be configured to expand to the appropriate link pointing to the gmane.io list archives, likewise "[Bug #123456]" could be configured to expand to point to some project's bug tracker at http://example.com/bug/123456 * configurable synonym and spelling support in Xapian * Support optional "HTTPS Everywhere" for mapping old HTTP to HTTPS links if (and only if) the user wants to use HTTPS. We may also be able to configure redirects for expired URLs. Note: message bodies rendered as HTML themselves must NOT change, the links should point to an anchor tag within the same page, instead; giving the user options. * configurable constants (index limits, search results) * handle messages with multiple Message-IDs (done for v2, doable for v1) * handle broken double-bracketed References properly (maybe) and totally broken Message-IDs cf. https://public-inbox.org/git/20160814012706.GA18784@starla/ * improve documentation * linkify thread skeletons better https://public-inbox.org/git/6E3699DEA672430CAEA6DEFEDE6918F4@PhilipOakley/ * Further lower mail parser memory usage. We still slurp entire message bodies into memory and incur 2-3x overhead on multipart messages. Inline::C (and maybe gmime) could work. * use REQUEST_URI properly for CGI / mod_perl2 compatibility with Message-IDs which include '%' (done?) * better test cases, make faster by reusing more setup code across tests * large mbox/Maildir/MH/NNTP spool import (in lei, but not for public-facing inboxes) * MH import support (read-only, at least) * Read-only WebDAV interface to the git repo so it can be mounted via davfs2 or fusedav to avoid full clones. davfs2 needs Range: request support for this to be feasible: https://savannah.nongnu.org/bugs/?33259 https://savannah.nongnu.org/support/?107649 * Contribute something like IMAP IDLE for "git fetch". Inboxes (and any git repos) can be kept up-to-date without relying on polling. * Improve bundle support in git to make it cheaper to host/clone with dumb HTTP(S) servers. * Expose targeted reindexing of individual messages. Sometimes an indexing bug only affects a handful of messages, so it's not worth the trouble of doing a full reindex. * code repository integration (cgit: done, TODO: gitweb, etc...) * migration path to v2 (making it transparent for "git fetch" may not be possible, but "public-inbox-fetch" will handle it) * imperfect scraper importers for obfuscated list archives (e.g. obfuscated Mailman stuff, Google Groups, etc...) * improve performance and avoid head-of-line blocking on slow storage (done for most git blob retrievals, Xapian needs work) * HTTP(S) search API (likely JMAP, but GraphQL could be an option) It should support git-specific prefixes (dfpre:, dfpost:, dfn:, etc) as extensions. If JMAP, it should have HTTP(S) analogues to various IMAP extensions. * scalability to tens/hundreds of thousands of inboxes - inotify-based manifest.js.gz updates ... * lei - see %CMD in lib/PublicInbox/LEI.pm (there's a truckload here..) * make "git cat-file --batch" detect unlinked packfiles so we don't have to restart processes (very long-term) * linter to check validity of config file * linter option and WWW endpoint to graph relationships and flows between inboxes, addresses, Maildirs, coderepos, newsgroups, IMAP mailboxes, etc... * pygments support - via Python script similar to `git cat-file --batch' to avoid startup penalty. pygments.rb (Ruby) can be inspiration, too. * highlighting + linkification for "git format-patch --interdiff" output * highlighting for "git format-patch --range-diff" output (linkification is too expensive, as it requires mirroring) * support UUCP addresses for legacy archives * support pipelining as an IMAP/NNTP client for -watch + lei * expose lei contents via read/write IMAP/JMAP server for personal use * git SHA-256 migration/coexistence path public-inbox-1.9.0/certs/000077500000000000000000000000001430031475700152405ustar00rootroot00000000000000public-inbox-1.9.0/certs/.gitignore000066400000000000000000000000301430031475700172210ustar00rootroot00000000000000*.pem *.der *.enc *.p12 public-inbox-1.9.0/certs/create-certs.perl000077500000000000000000000070121430031475700205100ustar00rootroot00000000000000#!/usr/bin/perl -w # License: GPL-1.0+ or Artistic-1.0-Perl # from IO::Socket::SSL 2.063 / https://github.com/noxxi/p5-io-socket-ssl use strict; use warnings; use IO::Socket::SSL::Utils; use Net::SSLeay; my $dir = -d 'certs' && -f 'Makefile.PL' ? './certs/' : './'; my $now = time(); my $later = 0x7fffffff; # 2038 problems on 32-bit :< Net::SSLeay::SSLeay_add_ssl_algorithms(); my $sha256 = Net::SSLeay::EVP_get_digestbyname('sha256') or die; my $printfp = sub { my ($w,$cert) = @_; print $w.' sha256$'.unpack('H*',Net::SSLeay::X509_digest($cert, $sha256))."\n" }; my %time_valid = (not_before => $now, not_after => $later); my @ca = CERT_create( CA => 1, subject => { CN => 'IO::Socket::SSL Demo CA' }, %time_valid, ); save('test-ca.pem',PEM_cert2string($ca[0])); my @server = CERT_create( CA => 0, subject => { CN => 'server.local' }, purpose => 'server', issuer => \@ca, %time_valid, ); save('server-cert.pem',PEM_cert2string($server[0])); save('server-key.pem',PEM_key2string($server[1])); $printfp->(server => $server[0]); @server = CERT_create( CA => 0, subject => { CN => 'server2.local' }, purpose => 'server', issuer => \@ca, %time_valid, ); save('server2-cert.pem',PEM_cert2string($server[0])); save('server2-key.pem',PEM_key2string($server[1])); $printfp->(server2 => $server[0]); @server = CERT_create( CA => 0, subject => { CN => 'server-ecc.local' }, purpose => 'server', issuer => \@ca, key => KEY_create_ec(), %time_valid, ); save('server-ecc-cert.pem',PEM_cert2string($server[0])); save('server-ecc-key.pem',PEM_key2string($server[1])); $printfp->('server-ecc' => $server[0]); my @client = CERT_create( CA => 0, subject => { CN => 'client.local' }, purpose => 'client', issuer => \@ca, %time_valid, ); save('client-cert.pem',PEM_cert2string($client[0])); save('client-key.pem',PEM_key2string($client[1])); $printfp->(client => $client[0]); my @swc = CERT_create( CA => 0, subject => { CN => 'server.local' }, purpose => 'server', issuer => \@ca, subjectAltNames => [ [ DNS => '*.server.local' ], [ IP => '127.0.0.1' ], [ DNS => 'www*.other.local' ], [ DNS => 'smtp.mydomain.local' ], [ DNS => 'xn--lwe-sna.idntest.local' ] ], %time_valid, ); save('server-wildcard.pem',PEM_cert2string($swc[0]),PEM_key2string($swc[1])); my @subca = CERT_create( CA => 1, issuer => \@ca, subject => { CN => 'IO::Socket::SSL Demo Sub CA' }, %time_valid, ); save('test-subca.pem',PEM_cert2string($subca[0])); @server = CERT_create( CA => 0, subject => { CN => 'server.local' }, purpose => 'server', issuer => \@subca, %time_valid, ); save('sub-server.pem',PEM_cert2string($server[0]).PEM_key2string($server[1])); my @cap = CERT_create( CA => 1, subject => { CN => 'IO::Socket::SSL::Intercept' }, %time_valid, ); save('proxyca.pem',PEM_cert2string($cap[0]).PEM_key2string($cap[1])); sub save { my $file = shift; open(my $fd,'>',$dir.$file) or die $!; print $fd @_; } system(< # License: AGPL-3.0+ # Helper script for installing/uninstalling packages for CI use # Intended for use on non-production chroots or VMs since it # changes installed packages use strict; my $usage = "$0 PKG_FMT PROFILE [PROFILE_MOD]"; my $pkg_fmt = shift; @ARGV or die $usage, "\n"; my @test_essential = qw(Test::Simple); # we actually use Test::More # package profiles my $profiles = { # the smallest possible profile for testing essential => [ qw( git perl Digest::SHA Encode ExtUtils::MakeMaker IO::Compress::Gzip URI ), @test_essential ], # everything optional for normal use optional => [ qw( Date::Parse BSD::Resource DBD::SQLite DBI Inline::C Net::Server Plack Plack::Test Plack::Middleware::ReverseProxy Search::Xapian Socket6 highlight.pm xapian-compact ) ], # optional developer stuff devtest => [ qw( XML::TreePP curl w3m Plack::Test::ExternalServer ) ], }; # account for granularity differences between package systems and OSes my @precious; if ($^O eq 'freebsd') { @precious = qw(perl curl Socket6 IO::Compress::Gzip); } elsif ($pkg_fmt eq 'rpm') { @precious = qw(perl curl); } if (@precious) { my $re = join('|', map { quotemeta($_) } @precious); for my $list (values %$profiles) { @$list = grep(!/\A(?:$re)\z/, @$list); } push @{$profiles->{essential}}, @precious; } # bare minimum for v2 $profiles->{v2essential} = [ @{$profiles->{essential}}, qw(DBD::SQLite DBI) ]; # package names which can't be mapped automatically: my $non_auto = { 'perl' => { pkg => 'perl5' }, 'Date::Parse' => { deb => 'libtimedate-perl', pkg => 'p5-TimeDate', rpm => 'perl-TimeDate', }, 'Digest::SHA' => { deb => 'perl', # libperl5.XX, but the XX varies pkg => 'perl5', }, 'Encode' => { deb => 'perl', # libperl5.XX, but the XX varies pkg => 'perl5', rpm => 'perl-Encode', }, 'ExtUtils::MakeMaker' => { deb => 'perl', # perl-modules-5.xx pkg => 'perl5', rpm => 'perl-ExtUtils-MakeMaker', }, 'IO::Compress::Gzip' => { deb => 'perl', # perl-modules-5.xx pkg => 'perl5', rpm => 'perl-IO-Compress', }, 'DBD::SQLite' => { deb => 'libdbd-sqlite3-perl' }, 'Plack::Test' => { deb => 'libplack-perl', pkg => 'p5-Plack', rpm => 'perl-Plack-Test', }, 'URI' => { deb => 'liburi-perl', pkg => 'p5-URI', rpm => 'perl-URI', }, 'Test::Simple' => { deb => 'perl', # perl-modules-5.XX, but the XX varies pkg => 'perl5', rpm => 'perl-Test-Simple', }, 'highlight.pm' => { deb => 'libhighlight-perl', pkg => [], rpm => [], }, # we call xapian-compact(1) in public-inbox-compact(1) 'xapian-compact' => { deb => 'xapian-tools', pkg => 'xapian-core', rpm => 'xapian-core', # ??? }, # OS-specific 'IO::KQueue' => { deb => [], pkg => 'p5-IO-KQueue', rpm => [], }, }; my (@pkg_install, @pkg_remove, %all); for my $ary (values %$profiles) { $all{$_} = \@pkg_remove for @$ary; } if ($^O eq 'freebsd') { $all{'IO::KQueue'} = \@pkg_remove; } $profiles->{all} = [ keys %all ]; # pseudo-profile for all packages # parse the profile list from the command-line for my $profile (@ARGV) { if ($profile =~ s/-\z//) { # like apt-get, trailing "-" means remove profile2dst($profile, \@pkg_remove); } else { profile2dst($profile, \@pkg_install); } } # fill in @pkg_install and @pkg_remove: while (my ($pkg, $dst_pkg_list) = each %all) { push @$dst_pkg_list, list(pkg2ospkg($pkg, $pkg_fmt)); } my @apt_opts = qw(-o APT::Install-Recommends=false -o APT::Install-Suggests=false); # OS-specific cleanups appreciated if ($pkg_fmt eq 'deb') { my @quiet = $ENV{V} ? () : ('-q'); root('apt-get', @apt_opts, qw(install --purge -y), @quiet, @pkg_install, # apt-get lets you suffix a package with "-" to # remove it in an "install" sub-command: map { "$_-" } @pkg_remove); root('apt-get', @apt_opts, qw(autoremove --purge -y), @quiet); } elsif ($pkg_fmt eq 'pkg') { my @quiet = $ENV{V} ? () : ('-q'); # FreeBSD, maybe other *BSDs are similar? # don't remove stuff that isn't installed: exclude_uninstalled(\@pkg_remove); root(qw(pkg remove -y), @quiet, @pkg_remove) if @pkg_remove; root(qw(pkg install -y), @quiet, @pkg_install) if @pkg_install; root(qw(pkg autoremove -y), @quiet); # TODO: yum / rpm support } elsif ($pkg_fmt eq 'rpm') { my @quiet = $ENV{V} ? () : ('-q'); exclude_uninstalled(\@pkg_remove); root(qw(yum remove -y), @quiet, @pkg_remove) if @pkg_remove; root(qw(yum install -y), @quiet, @pkg_install) if @pkg_install; } else { die "unsupported package format: $pkg_fmt\n"; } exit 0; # map a generic package name to an OS package name sub pkg2ospkg { my ($pkg, $fmt) = @_; # check explicit overrides, first: if (my $ospkg = $non_auto->{$pkg}->{$fmt}) { return $ospkg; } # check common Perl module name patterns: if ($pkg =~ /::/ || $pkg =~ /\A[A-Z]/) { if ($fmt eq 'deb') { $pkg =~ s/::/-/g; $pkg =~ tr/A-Z/a-z/; return "lib$pkg-perl"; } elsif ($fmt eq 'rpm') { $pkg =~ s/::/-/g; return "perl-$pkg" } elsif ($fmt eq 'pkg') { $pkg =~ s/::/-/g; return "p5-$pkg" } else { die "unsupported package format: $fmt for $pkg\n" } } # use package name as-is (e.g. 'curl' or 'w3m') $pkg; } # maps a install profile to a package list (@pkg_remove or @pkg_install) sub profile2dst { my ($profile, $dst_pkg_list) = @_; if (my $pkg_list = $profiles->{$profile}) { $all{$_} = $dst_pkg_list for @$pkg_list; } elsif ($all{$profile}) { # $profile is just a package name $all{$profile} = $dst_pkg_list; } else { die "unrecognized profile or package: $profile\n"; } } sub exclude_uninstalled { my ($list) = @_; my %inst_check = ( pkg => sub { system(qw(pkg info -q), $_[0]) == 0 }, deb => sub { system("dpkg -s $_[0] >/dev/null 2>&1") == 0 }, rpm => sub { system("rpm -qs $_[0] >/dev/null 2>&1") == 0 }, ); my $cb = $inst_check{$pkg_fmt} || die <<""; don't know how to check install status for $pkg_fmt my @tmp; for my $pkg (@$list) { push @tmp, $pkg if $cb->($pkg); } @$list = @tmp; } sub root { print join(' ', @_), "\n"; return if $ENV{DRY_RUN}; return if system(@_) == 0; warn 'command failed: ', join(' ', @_), "\n"; exit($? >> 8); } # ensure result can be pushed into an array: sub list { my ($pkg) = @_; ref($pkg) eq 'ARRAY' ? @$pkg : $pkg; } public-inbox-1.9.0/ci/profiles.sh000077500000000000000000000030531430031475700166760ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # Prints OS-specific package profiles to stdout (one per-newline) to use # as command-line args for ci/deps.perl. Called automatically by ci/run.sh # set by os-release(5) or similar ID= VERSION_ID= case $(uname -o) in GNU/Linux) for f in /etc/os-release /usr/lib/os-release do test -f $f || continue . $f # Debian sid (and testing) have no VERSION_ID case $ID--$VERSION_ID in debian--) case $PRETTY_NAME in */sid) VERSION_ID=sid ;; *) echo >&2 "$ID, but no VERSION_ID" echo >&2 "==> $f <==" cat >&2 $f exit 1 ;; esac ;; esac case $ID--$VERSION_ID in -|*--|--*) continue ;; *--*) break ;; esac done ;; FreeBSD) ID=freebsd VERSION_ID=$(uname -r | cut -d . -f 1) test "$VERSION_ID" -lt 11 && { echo >&2 "ID=$ID $(uname -r) too old to support"; exit 1 } esac case $ID in freebsd) PKG_FMT=pkg ;; debian|ubuntu) PKG_FMT=deb ;; centos|redhat|fedora) PKG_FMT=rpm ;; *) echo >&2 "PKG_FMT undefined for ID=$ID in $0" esac case $ID-$VERSION_ID in freebsd-11|freebsd-12) sed "s/^/$PKG_FMT /" < # License: AGPL-3.0+ set -e SUDO=${SUDO-'sudo'} PERL=${PERL-'perl'} MAKE=${MAKE-'make'} DO=${DO-''} set -x if test -f Makefile then $DO $MAKE clean fi ./ci/profiles.sh | while read args do $DO $SUDO $PERL -w ci/deps.perl $args $DO $PERL Makefile.PL $DO $MAKE $DO $MAKE check $DO $MAKE clean done public-inbox-1.9.0/contrib/000077500000000000000000000000001430031475700155605ustar00rootroot00000000000000public-inbox-1.9.0/contrib/completion/000077500000000000000000000000001430031475700177315ustar00rootroot00000000000000public-inbox-1.9.0/contrib/completion/lei-completion.bash000066400000000000000000000012101430031475700235020ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # preliminary bash completion support for lei (Local Email Interface) # Needs a lot of work, see `lei__complete' in lib/PublicInbox::LEI.pm _lei() { local wordlist="$(lei _complete ${COMP_WORDS[@]})" case $wordlist in *':'* | *'='* | '//'*) compopt -o nospace ;; *) compopt +o nospace ;; # the default esac wordlist="${wordlist//;/\\\\;}" # escape ';' for ';UIDVALIDITY' and such COMPREPLY=($(compgen -W "$wordlist" -- "${COMP_WORDS[COMP_CWORD]}")) return 0 } complete -o default -o bashdefault -F _lei lei public-inbox-1.9.0/contrib/css/000077500000000000000000000000001430031475700163505ustar00rootroot00000000000000public-inbox-1.9.0/contrib/css/216dark.css000066400000000000000000000033731430031475700202420ustar00rootroot00000000000000/* * CC0-1.0 * Dark color scheme using 216 web-safe colors, inspired * somewhat by the default color scheme in mutt. * It reduces eyestrain for me, and energy usage for all: * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme */ * { font-size: 100%; font-family: monospace; background:#000; color:#ccc } pre { white-space: pre-wrap } /* * Underlined links add visual noise which make them hard-to-read. * Use colors to make them stand out, instead. */ a:link { color:#69f; text-decoration:none } a:visited { color:#96f } /* quoted text in emails gets a different color */ *.q { color:#09f } /* * these may be used with cgit , too. * (cgit uses
, public-inbox uses ) */ *.add { color:#0ff } /* diff post-image lines */ *.del { color:#f0f } /* diff pre-image lines */ *.head { color:#fff } /* diff header (metainformation) */ *.hunk { color:#c93 } /* diff hunk-header */ /* * highlight 3.x colors (tested 3.18) for displaying blobs. * This doesn't use most of the colors available, as I find too * many colors overwhelming, so the default is commented out. */ .hl.num { color:#f30 } /* number */ .hl.esc { color:#f0f } /* escape character */ .hl.str { color:#f30 } /* string */ .hl.ppc { color:#f0f } /* preprocessor */ .hl.pps { color:#f30 } /* preprocessor string */ .hl.slc { color:#09f } /* single-line comment */ .hl.com { color:#09f } /* multi-line comment */ /* .hl.opt { color:#ccc } */ /* operator */ /* .hl.ipl { color:#ccc } */ /* interpolation */ /* keyword groups kw[a-z] */ .hl.kwa { color:#ff0 } .hl.kwb { color:#0f0 } .hl.kwc { color:#ff0 } /* .hl.kwd { color:#ccc } */ /* line-number (unused by public-inbox) */ /* .hl.lin { color:#ccc } */ public-inbox-1.9.0/contrib/css/216light.css000066400000000000000000000030561430031475700204260ustar00rootroot00000000000000/* * CC0-1.0 * Light color scheme using 216 web-safe colors. * Suitable for print, and blinding people with brightness. * Haphazardly thrown together because bright colors hurt my eyes */ * { font-size: 100%; font-family: monospace; background:#fff; color:#003 } pre { white-space: pre-wrap } /* * Underlined links add visual noise which make them hard-to-read. * Use colors to make them stand out, instead. */ a:link { color:#00f; text-decoration:none } a:visited { color:#808 } /* quoted text gets a different color */ *.q { color:#006 } /* * these may be used with cgit, too * (cgit uses
, public-inbox uses ) */ *.add { color:#060 } *.del {color:#900 } *.head { color:#000 } *.hunk { color:#960 } /* * highlight 3.x colors (tested 3.18) for displaying blobs. * This doesn't use most of the colors available, as I find too * many colors overwhelming, so the default is commented out. */ .hl.num { color:#f30 } /* number */ .hl.esc { color:#f0f } /* escape character */ .hl.str { color:#f30 } /* string */ .hl.ppc { color:#c3c } /* preprocessor */ .hl.pps { color:#f30 } /* preprocessor string */ .hl.slc { color:#099 } /* single-line comment */ .hl.com { color:#099 } /* multi-line comment */ /* .hl.opt { color:#ccc } */ /* operator */ /* .hl.ipl { color:#ccc } */ /* interpolation */ /* keyword groups kw[a-z] */ .hl.kwa { color:#f90 } .hl.kwb { color:#060 } .hl.kwc { color:#f90 } /* .hl.kwd { color:#ccc } */ /* line-number (unused by public-inbox) */ /* .hl.lin { color:#ccc } */ public-inbox-1.9.0/contrib/css/README000066400000000000000000000033301430031475700172270ustar00rootroot00000000000000Example CSS for use with public-inbox. CSS::Minifier or CSS::Minifier::XS will be tried for minimizing CSS at startup if available(*). Multiple CSS files may be configured for user-selectability via the "title" attribute or for different media. Local CSS files are read into memory once at startup. If only one CSS file is given without "title", it will be inlined. Snippet from ~/.public-inbox/config, order matters to browsers. -----8<----- [publicinbox] ; Depending on the browser, the first entry is the default. ; So having "/dev/null" at the top means no colors by default. ; Using the "title" attribute enables `View -> "Page Style"' ; choices in Firefox. css = /dev/null title=default ; git-config supports backslash to continue long lines ; Attributes ('media', 'title') must use single quotes(') ; or no quotes at all, but not double-quotes, as git-config(1) ; won't preserve them: css = /path/to/public-inbox/contrib/css/216dark.css \ title=216dark \ media='screen,(prefers-color-scheme:dark)' ; for tree haters who print web pages :P css = /path/to/public-inbox/contrib/css/216light.css \ title=216light \ media='screen,print,(prefers-color-scheme:light)' ; external CSS may be specified with href. ; Using "//" (protocol-relative) URLs is allowed, as is ; "https://" or "http://" for hosts which only support one protocol. css = href=//example.com/fugly.css title=external All files in these example directory are CC0-1.0 (public domain): To the extent possible under law, Eric Wong has waived all copyright and related or neighboring rights to these examples. https://creativecommons.org/publicdomain/zero/1.0/legalcode (*) "libcss-minifier-perl" or "libcss-minifier-xs-perl" on Debian-based systems public-inbox-1.9.0/contrib/selinux/000077500000000000000000000000001430031475700172475ustar00rootroot00000000000000public-inbox-1.9.0/contrib/selinux/el7/000077500000000000000000000000001430031475700177365ustar00rootroot00000000000000public-inbox-1.9.0/contrib/selinux/el7/publicinbox.fc000066400000000000000000000013041430031475700225640ustar00rootroot00000000000000/usr/(local/)?bin/public-inbox-httpd -- gen_context(system_u:object_r:publicinbox_daemon_exec_t,s0) /usr/(local/)?bin/public-inbox-nntpd -- gen_context(system_u:object_r:publicinbox_daemon_exec_t,s0) /usr/(local/)?bin/public-inbox-watch -- gen_context(system_u:object_r:publicinbox_deliver_exec_t,s0) /usr/(local/)?bin/public-inbox-mda -- gen_context(system_u:object_r:publicinbox_deliver_exec_t,s0) /var/lib/public-inbox(/.*)? gen_context(system_u:object_r:publicinbox_var_lib_t,s0) /var/run/public-inbox(/.*)? gen_context(system_u:object_r:publicinbox_var_run_t,s0) /var/log/public-inbox(/.*)? gen_context(system_u:object_r:publicinbox_log_t,s0) public-inbox-1.9.0/contrib/selinux/el7/publicinbox.te000066400000000000000000000100371430031475700226070ustar00rootroot00000000000000################## # This policy allows running public-inbox-httpd and public-inbox-nntpd # on reasonable ports (119 for nntpd and 80/443/8080 for httpd) # # It also allows delivering mail via postfix-pipe to public-inbox-mda # # Author: Konstantin Ryabitsev # policy_module(publicinbox, 1.0.3) require { type postfix_pipe_t; type spamc_t; type spamd_t; } ################## # Declarations type publicinbox_daemon_t; type publicinbox_daemon_exec_t; init_daemon_domain(publicinbox_daemon_t, publicinbox_daemon_exec_t) type publicinbox_var_lib_t; files_type(publicinbox_var_lib_t) type publicinbox_log_t; logging_log_file(publicinbox_log_t) type publicinbox_var_run_t; files_tmp_file(publicinbox_var_run_t) type publicinbox_tmp_t; files_tmp_file(publicinbox_tmp_t) type publicinbox_deliver_t; type publicinbox_deliver_exec_t; init_daemon_domain(publicinbox_deliver_t, publicinbox_deliver_exec_t) # Uncomment to put these domains into permissive mode #permissive publicinbox_daemon_t; #permissive publicinbox_deliver_t; ################## # Daemons policy domain_use_interactive_fds(publicinbox_daemon_t) files_read_etc_files(publicinbox_daemon_t) miscfiles_read_localization(publicinbox_daemon_t) allow publicinbox_daemon_t self:tcp_socket create_stream_socket_perms; allow publicinbox_daemon_t self:tcp_socket { accept listen }; # Need to be able to manage and exec them for Inline::C manage_files_pattern(publicinbox_daemon_t, publicinbox_var_run_t, publicinbox_var_run_t) exec_files_pattern(publicinbox_daemon_t, publicinbox_var_run_t, publicinbox_var_run_t) # Logging append_files_pattern(publicinbox_daemon_t, publicinbox_log_t, publicinbox_log_t) create_files_pattern(publicinbox_daemon_t, publicinbox_log_t, publicinbox_log_t) setattr_files_pattern(publicinbox_daemon_t, publicinbox_log_t, publicinbox_log_t) logging_log_filetrans(publicinbox_daemon_t, publicinbox_log_t, { file dir }) # Run on httpd and nntp ports (called innd_port_t) corenet_tcp_bind_generic_node(publicinbox_daemon_t) corenet_tcp_bind_http_port(publicinbox_daemon_t) corenet_tcp_bind_http_cache_port(publicinbox_daemon_t) corenet_tcp_bind_innd_port(publicinbox_daemon_t) # Allow reading anything publicinbox_var_lib_t list_dirs_pattern(publicinbox_daemon_t, publicinbox_var_lib_t, publicinbox_var_lib_t) read_files_pattern(publicinbox_daemon_t, publicinbox_var_lib_t, publicinbox_var_lib_t) # The daemon doesn't need to write to this dir dontaudit publicinbox_daemon_t publicinbox_var_lib_t:file write; # Allow executing bin (for git, mostly) corecmd_exec_bin(publicinbox_daemon_t) # Manage our tmp files manage_dirs_pattern(publicinbox_daemon_t, publicinbox_tmp_t, publicinbox_tmp_t) manage_files_pattern(publicinbox_daemon_t, publicinbox_tmp_t, publicinbox_tmp_t) files_tmp_filetrans(publicinbox_daemon_t, publicinbox_tmp_t, { file dir }) ################## # mda/watch policy # # Allow transitioning to deliver_t from postfix pipe domtrans_pattern(postfix_pipe_t, publicinbox_deliver_exec_t, publicinbox_deliver_t) postfix_rw_inherited_master_pipes(publicinbox_deliver_t) postfix_read_spool_files(publicinbox_deliver_t) files_read_etc_files(publicinbox_deliver_t) # Allow managing anything in publicinbox_var_lib_t manage_dirs_pattern(publicinbox_deliver_t, publicinbox_var_lib_t, publicinbox_var_lib_t) manage_files_pattern(publicinbox_deliver_t, publicinbox_var_lib_t, publicinbox_var_lib_t) # Allow executing bin (for git, mostly) corecmd_exec_bin(publicinbox_deliver_t) # git-fast-import wants to access system state and other bits kernel_dontaudit_read_system_state(publicinbox_deliver_t) # Allow using spamc spamassassin_domtrans_client(publicinbox_deliver_t) manage_files_pattern(spamc_t, publicinbox_var_lib_t, publicinbox_var_lib_t) read_files_pattern(spamd_t, publicinbox_var_lib_t, publicinbox_var_lib_t) # Manage our tmp files manage_dirs_pattern(publicinbox_deliver_t, publicinbox_tmp_t, publicinbox_tmp_t) manage_files_pattern(publicinbox_deliver_t, publicinbox_tmp_t, publicinbox_tmp_t) files_tmp_filetrans(publicinbox_deliver_t, publicinbox_tmp_t, { file dir }) public-inbox-1.9.0/devel/000077500000000000000000000000001430031475700152175ustar00rootroot00000000000000public-inbox-1.9.0/devel/README000066400000000000000000000001011430031475700160670ustar00rootroot00000000000000scripts use for public-inbox development that don't belong in t/ public-inbox-1.9.0/devel/syscall-list000077500000000000000000000035611430031475700175750ustar00rootroot00000000000000# Copyright all contributors # License: AGPL-3.0+ # Dump syscall numbers under Linux and any other kernel which # promises stable syscall numbers. This is to maintain # PublicInbox::Syscall # DO NOT USE this for *BSDs, none of the current BSD kernels # we know about promise stable syscall numbers, we'll use # Inline::C to support them. eval 'exec perl -S $0 ${1+"$@"}' # no shebang if 0; # running under some shell use strict; use v5.10.1; use File::Temp 0.19; use POSIX qw(uname); say '$machine='.(POSIX::uname())[-1]; my $cc = $ENV{CC} // 'cc'; my @cflags = split(/\s+/, $ENV{CFLAGS} // '-Wall'); my $str = do { local $/; }; my $tmp = File::Temp->newdir('syscall-list-XXXX', TMPDIR => 1); my $f = "$tmp/sc.c"; my $x = "$tmp/sc"; open my $fh, '>', $f or die "open $f $!"; print $fh $str or die "print $f $!"; close $fh or die "close $f $!"; system($cc, '-o', $x, $f, @cflags) == 0 or die "cc failed \$?=$?"; exec($x); __DATA__ #define _GNU_SOURCE #include #include #ifdef __linux__ #include #endif #include #include #include #define D(x) printf("$" #x " = %ld;\n", (long)x) int main(void) { #ifdef __linux__ D(SYS_epoll_create1); D(SYS_epoll_ctl); #ifdef SYS_epoll_wait D(SYS_epoll_wait); #endif D(SYS_epoll_pwait); D(SYS_signalfd4); D(SYS_inotify_init1); D(SYS_inotify_add_watch); D(SYS_inotify_rm_watch); D(SYS_prctl); D(SYS_fstatfs); D(SYS_sendmsg); D(SYS_recvmsg); #ifdef FS_IOC_GETFLAGS printf("FS_IOC_GETFLAGS=%#lx\nFS_IOC_SETFLAGS=%#lx\n", (unsigned long)FS_IOC_GETFLAGS, (unsigned long)FS_IOC_SETFLAGS); #endif #ifdef SYS_renameat2 D(SYS_renameat2); #endif #endif /* Linux, any other OSes with stable syscalls? */ printf("size_t=%zu off_t=%zu pid_t=%zu\n", sizeof(size_t), sizeof(off_t), sizeof(pid_t)); return 0; } public-inbox-1.9.0/examples/000077500000000000000000000000001430031475700157365ustar00rootroot00000000000000public-inbox-1.9.0/examples/README000066400000000000000000000010041430031475700166110ustar00rootroot00000000000000Various example configuration files related to public-inbox ----------------------------------------------------------- For all server admins --------------------- public-inbox-config - configuration file, this maps configured inboxes For PSGI/Plack (HTTP) servers ----------------------------- public-inbox.psgi - starting point for PSGI/Plack users in production and dev Contact ------- Please send any related feedback to public-inbox: meta@public-inbox.org Our public-inbox is: https://public-inbox.org/meta/ public-inbox-1.9.0/examples/README.unsubscribe000066400000000000000000000031371430031475700211450ustar00rootroot00000000000000Unsubscribe endpoints for mlmmj users (and possibly Mailman, too) * examples/unsubscribe.milter filters outgoing messages and appends an HTTPS URL to the List-Unsubscribe header. This List-Unsubscribe header should point to the PSGI described below. Currently, this is only active for a whitelist of test addresses in /etc/unsubscribe-milter.whitelist with one email address per line. * examples/unsubscribe.psgi is a PSGI which needs to run as the mlmmj user with permission to run mlmmj-unsub. This depends on the PublicInbox::Unsubscribe module which may be extracted from the rest of public-inbox. It is strongly recommended to NOT run the rest of the public-inbox WWW code in the same process as this PSGI. (The public-inbox WWW code will never need write permissions to anything besides stderr). * Both the .milter and .psgi examples are bundled with systemd service and socket activation examples. AFAIK no other PSGI server besides public-inbox-httpd supports systemd socket activation. To wire up the milter for postfix, I use the following in /etc/postfix/main.cf: # Milter configuration milter_default_action = accept milter_protocol = 2 # other milters may be chained here (e.g. opendkim) # chroot users will need to adjust this path smtpd_milters = local:/var/spool/postfix/unsubscribe/unsubscribe.sock # This is not needed for mlmmj since mlmmj uses SMTP: # non_smtpd_milters = local:/var/spool/postfix/unsubscribe/unsubscribe.sock Copyright (C) 2016-2021 all contributors License: AGPL-3.0+ public-inbox-1.9.0/examples/cgit-commit-filter.lua000066400000000000000000000030401430031475700221350ustar00rootroot00000000000000-- Copyright (C) 2015-2021 all contributors -- License: GPLv2 or later -- This commit filter maps a subject line to a search URL of a public-inbox -- disclaimer: written by someone who does not know Lua. -- -- This requires cgit linked with Lua -- Usage (in your cgitrc(5) config file): -- -- commit-filter=lua:/path/to/this/script.lua -- -- Example site: https://80x24.org/public-inbox.git/ local urls = {} urls['public-inbox.git'] = 'https://public-inbox.org/meta/' -- additional URLs here... -- TODO we should be able to auto-generate this based on "coderepo" -- directives in the public-inbox config file; but keep in mind -- the mapping is M:N between inboxes and coderepos function filter_open(...) lineno = 0 buffer = "" end function filter_close() -- cgit opens and closes this filter for the commit subject -- and body separately, and we only generate the link based -- on the commit subject: if lineno == 1 and string.find(buffer, "\n") == nil then u = urls[os.getenv('CGIT_REPO_URL')] if u == nil then html(buffer) else html("') html_txt(buffer) html('') end else -- pass the body-through as-is -- TODO: optionally use WwwHighlight for linkification like -- cgit-wwwhighlight-filter.lua html(buffer) end return 0 end function filter_write(str) lineno = lineno + 1 buffer = buffer .. str end public-inbox-1.9.0/examples/cgit-wwwhighlight-filter.lua000066400000000000000000000045051430031475700233700ustar00rootroot00000000000000-- Copyright (C) 2019-2021 all contributors -- License: GPL-2.0+ -- -- This filter accesses the PublicInbox::WwwHighlight PSGI endpoint -- (see examples/highlight.psgi) -- -- Dependencies: lua-http -- -- disclaimer: written by someone who does not know Lua. -- -- This requires cgit linked with Lua -- Usage (in your cgitrc(5) config file): -- -- source-filter=lua:/path/to/this/script.lua -- about-filter=lua:/path/to/this/script.lua -- local wwwhighlight_url = 'http://127.0.0.1:9090/' local req_timeout = 10 local too_big = false -- match $PublicInbox::HTTP::MAX_REQUEST_BUFFER local max_len = 10 * 1024 * 1024 -- about-filter needs surrounding
 tags if all we do is
-- highlight and linkify
local pre = true

function filter_open(...)
	req_body = ""

	-- detect when we're used in an about-filter
	local repo_url = os.getenv('CGIT_REPO_URL')
	if repo_url then
		local path_info = os.getenv('PATH_INFO')
		rurl = path_info:match("^/(.+)/about/?$")
		pre = rurl == repo_url
	end

	-- hand filename off for language detection
	local fn = select(1, ...)
	if fn then
		local http_util = require 'http.util'
		wwwhighlight_url = wwwhighlight_url .. http_util.encodeURI(fn)
	end
end

-- try to buffer the entire source in memory
function filter_write(str)
	if too_big then
		html(str)
	elseif (req_body:len() + str:len()) > max_len then
		too_big = true
		req_body = ""
		html(req_body)
		html(str)
	else
		req_body = req_body .. str
	end
end

function fail(err)
	io.stderr:write(tostring(err), "\n")
	if pre then
		html("
")
	end
	html_txt(req_body)
	if pre then
		html("
") end return 1 end function filter_close() if too_big then return 0 end local request = require 'http.request' local req = request.new_from_uri(wwwhighlight_url) req.headers:upsert(':method', 'PUT') req:set_body(req_body) -- don't wait for 100-Continue message from the PSGI app req.headers:delete('expect') local headers, stream = req:go(req_timeout) if headers == nil then return fail(stream) end local status = headers:get(':status') if status ~= '200' then return fail('status ' .. status) end local body, err = stream:get_body_as_string() if not body and err then return fail(err) end if pre then html("
")
	end
	html(body)
	if pre then
		html("
") end return 0 end public-inbox-1.9.0/examples/cgit.psgi000066400000000000000000000012551430031475700175530ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2021 all contributors # License: GPL-3.0+ # # PublicInbox::Cgit may be used independently of WWW. # # Usage (development, with auto-reload): # plackup -I lib -o 127.0.0.1 -R lib -r examples/cgit.psgi # # Usage (production, with public-inbox-httpd(1)): # public-inbox-httpd [OPTIONS] /path/to/examples/cgit.psgi use strict; use warnings; use Plack::Builder; use PublicInbox::Cgit; use PublicInbox::Config; my $pi_cfg = PublicInbox::Config->new; my $cgit = PublicInbox::Cgit->new($pi_cfg); builder { eval { enable 'ReverseProxy' }; enable 'Head'; sub { $cgit->call($_[0]) } } public-inbox-1.9.0/examples/grok-pull.post_update_hook.sh000077500000000000000000000101161430031475700235560ustar00rootroot00000000000000#!/bin/sh # use flock(1) from util-linux to avoid seek contention on slow HDDs # when using multiple `pull_threads' with grok-pull: # [ "${FLOCKER}" != "$0" ] && exec env FLOCKER="$0" flock "$0" "$0" "$@" || : # post_update_hook for repos.conf as used by grok-pull, takes a full # git repo path as it's first and only arg. full_git_dir="$1" url_base=http://127.0.0.1:8080/ # same default as other public-inbox-* tools PI_CONFIG=${PI_CONFIG-~/.public-inbox/config} # FreeBSD expr(1) only supports BRE, so no '+' EPOCH2MAIN='\(..*\)/git/[0-9][0-9]*\.git' # see if it's v2 or v1 based on tree contents, since somebody could # theoretically name a v1 inbox with a path that looks like a v2 epoch if git --git-dir="$full_git_dir" ls-tree --name-only HEAD | \ grep -E '^(m|d)$' >/dev/null then inbox_fmt=2 inbox_dir=$(expr "$full_git_dir" : "$EPOCH2MAIN") inbox_name=$(basename "$inbox_dir") msgmap="$inbox_dir"/msgmap.sqlite3 inbox_lock="$inbox_dir"/inbox.lock else inbox_fmt=1 inbox_dir="$full_git_dir" inbox_name=$(basename "$inbox_dir" .git) msgmap="$inbox_dir"/public-inbox/msgmap.sqlite3 inbox_lock="$inbox_dir"/ssoma.lock fi # run public-inbox-init iff unconfigured cfg_dir=$(git config -f "$PI_CONFIG" publicinbox."$inbox_name".inboxdir) # check legacy name for "inboxdir" case $cfg_dir in '') cfg_dir=$(git config -f "$PI_CONFIG" publicinbox."$inbox_name".mainrepo) ;; esac case $cfg_dir in '') remote_git_url=$(git --git-dir="$full_git_dir" config remote.origin.url) case $remote_git_url in '') echo >&2 "remote.origin.url unset in $full_git_dir/config" exit 1 ;; esac case $inbox_fmt in 1) remote_inbox_url="$remote_git_url" ;; 2) remote_inbox_url=$(expr "$remote_git_url" : "$EPOCH2MAIN") ;; esac config_url="$remote_inbox_url"/_/text/config/raw remote_config="$inbox_dir"/remote.config.$$ infourls= trap 'rm -f "$remote_config"' EXIT if curl --compressed -sSf -v "$config_url" >"$remote_config" then # n.b. inbox_name on the remote may not match our local # inbox_name, so we match all addresses in the remote config addresses=$(git config -f "$remote_config" -l | \ sed -ne 's/^publicinbox\..\+\.address=//p') case $addresses in '') echo >&2 'unable to extract address(es) from ' \ "$remote_config" exit 1 ;; esac newsgroups=$(git config -f "$remote_config" -l | \ sed -ne 's/^publicinbox\..\+\.newsgroup=//p') infourls=$(git config -f "$remote_config" -l | \ sed -ne 's/^publicinbox\..\+.infourl=//p') else newsgroups= addresses="$inbox_name@$$.$(hostname).example.com" echo >&2 "E: curl $config_url failed" echo >&2 "E: using bogus <$addresses> for $inbox_dir" fi local_url="$url_base$inbox_name" public-inbox-init -V$inbox_fmt "$inbox_name" \ "$inbox_dir" "$local_url" $addresses if test $? -ne 0 then echo >&2 "E: public-inbox-init failed on $inbox_dir" exit 1 fi for ng in $newsgroups do git config -f "$PI_CONFIG" \ "publicinbox.$inbox_name.newsgroup" "$ng" # only one newsgroup per inbox break done for url in $infourls do git config -f "$PI_CONFIG" \ "publicinbox.$inbox_name.infourl" "$url" done curl -sSfv "$remote_inbox_url"/description >"$inbox_dir"/description echo "I: $inbox_name at $inbox_dir ($addresses) $local_url" ;; esac # only run public-inbox-index if an index exists and has messages, # since epochs may be cloned out-of-order by grokmirror and we also # don't know what indexlevel a user wants if test -f "$msgmap" then # We need to use flock(1) (from util-linux) to avoid timeouts # and SQLite locking problems. # FreeBSD has a similar lockf(1) utility, but it unlinks by # default so we use `-k' to keep the lock on the FS. FLOCK=flock case $(uname -s) in FreeBSD) FLOCK='lockf -k' ;; # ... other OSes here esac n=$(echo 'SELECT COUNT(*) FROM msgmap' | \ $FLOCK $inbox_lock sqlite3 -readonly "$msgmap") case $n in 0|'') : v2 inboxes may be init-ed with an empty msgmap ;; *) # if on HDD and limited RAM, add `--sequential-shard' # and possibly a large `--batch-size' if you have much # memory in public-inbox 1.6.0+ $EATMYDATA public-inbox-index -v "$inbox_dir" ;; esac fi public-inbox-1.9.0/examples/highlight.psgi000066400000000000000000000011661430031475700205750ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # # Usage: plackup [OPTIONS] /path/to/this/file # A startup command for development which monitors changes: # plackup -I lib -o 127.0.0.1 -R lib -r examples/highlight.psgi # # .psgi paths may also be passed to public-inbox-httpd(1) for # production deployments: # public-inbox-httpd [OPTIONS] /path/to/examples/highlight.psgi use strict; use warnings; use PublicInbox::WwwHighlight; use Plack::Builder; my $hl = PublicInbox::WwwHighlight->new; builder { sub { $hl->call(@_) }; } public-inbox-1.9.0/examples/lib/000077500000000000000000000000001430031475700165045ustar00rootroot00000000000000public-inbox-1.9.0/examples/lib/.gitignore000066400000000000000000000002511430031475700204720ustar00rootroot00000000000000# empty directory to placate newer versions of plackup -r/--reload # (or dependent modules) which fail on missing "lib" directory # relative to the .psgi file being run public-inbox-1.9.0/examples/logrotate.conf000066400000000000000000000012751430031475700206120ustar00rootroot00000000000000# ==> /etc/logrotate.d/public-inbox <== # # See the logrotate(8) manpage for more information: # http://linux.die.net/man/8/logrotate /var/log/public-inbox/*.log { weekly missingok rotate 52 compress delaycompress notifempty sharedscripts dateext # note the lack of the racy "copytruncate" option in this # config. public-inbox-*d supports the USR1 signal and # we send it as our "lastaction": lastaction # systemd users do not need PID files, # only signal the @1 process since the @2 is short-lived # For systemd users, assuming you use two services systemctl kill -s SIGUSR1 public-inbox-httpd@1.service systemctl kill -s SIGUSR1 public-inbox-nntpd@1.service endscript } public-inbox-1.9.0/examples/newswww.psgi000066400000000000000000000025371430031475700203520ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2021 all contributors # License: GPL-3.0+ # # NewsWWW may be used independently of WWW. This can be useful # for mapping HTTP/HTTPS requests to the hostname of an NNTP server # to redirect users to the proper HTTP/HTTPS endpoint for a given # inbox. NewsWWW exists because people (or software) can mishandle # "nntp://" or "news://" URLs as "http://" (or "https://") # # Usage (development, with auto-reload): # plackup -I lib -o 127.0.0.1 -R lib -r examples/newswww.psgi # # Usage (production, with public-inbox-httpd(1)): # public-inbox-httpd [OPTIONS] /path/to/examples/newsww.psgi use strict; use warnings; use Plack::Builder; use PublicInbox::WWW; use PublicInbox::NewsWWW; my $newswww = PublicInbox::NewsWWW->new; # Optional, (you may drop the "mount '/'" section below) my $www = PublicInbox::WWW->new; $www->preload; builder { # HTTP/1.1 requests to "Host: news.example.com" will hit this: mount 'http://news.example.com/' => builder { enable 'Head'; sub { $newswww->call($_[0]) }; }; # rest of requests will hit this (optional) part for the # regular PublicInbox::WWW code: # see comments in examples/public-inbox.psgi for more info: mount '/' => builder { eval { enable 'ReverseProxy' }; enable 'Head'; sub { $www->call($_[0]) } }; } public-inbox-1.9.0/examples/nginx_proxy000066400000000000000000000013711430031475700202470ustar00rootroot00000000000000# Example NGINX configuration to proxy-pass requests # to public-inbox-httpd or to a standalone PSGI/Plack server. # The daemon is assumed to be running locally on port 8001. # Adjust ssl certificate paths if you use any, or remove # the ssl configuration directives if you don't. server { server_name _; listen 80; access_log /var/log/nginx/public-inbox-httpd_access.log; error_log /var/log/nginx/public-inbox-httpd_error.log; location ~* ^/(.*)$ { proxy_set_header HOST $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-Proto $scheme; proxy_pass http://127.0.0.1:8001$request_uri; } listen 443 ssl; ssl_certificate /path/to/certificate.pem; ssl_certificate_key /path/to/certificate_key.pem; } public-inbox-1.9.0/examples/public-inbox-config000066400000000000000000000010471430031475700215210ustar00rootroot00000000000000# this usually in ~/.public-inbox/config and parseable with git-config(1) # update t/config.t if changing this, that test relies on this [publicinbox "test"] address = try@public-inbox.org address = sandbox@public-inbox.org address = test@public-inbox.org ; note: "mainrepo" is the old name for "inboxdir", both ; remain supported for backwards compatibility. inboxdir = /home/pi/test-main.git url = http://example.com/test [publicinbox "meta"] address = meta@public-inbox.org inboxdir = /home/pi/meta-main.git url = http://example.com/meta public-inbox-1.9.0/examples/public-inbox-httpd.socket000066400000000000000000000005161430031475700226660ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-httpd.socket <== # Consider looking at public-inbox-netd.socket instead of this file # to simplify management when serving multiple protocols. [Unit] Description = public-inbox-httpd socket [Socket] ListenStream = 80 Service = public-inbox-httpd@1.service [Install] WantedBy = sockets.target public-inbox-1.9.0/examples/public-inbox-httpd@.service000066400000000000000000000025651430031475700231440ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-httpd@.service <== # Consider looking at public-inbox-netd@.service instead of this file # to simplify management when serving multiple protocols. # # Since SIGUSR2 upgrades do not work under systemd, this service file # allows starting two simultaneous services during upgrade time # (e.g. public-inbox-httpd@1 public-inbox-httpd@2) with the intention # that they take turns running in-between upgrades. This should # allow upgrading without downtime. # For servers expecting visitors from multiple timezones, TZ=UTC # is needed to ensure a consistent approxidate experience with search. [Unit] Description = public-inbox PSGI server %i Wants = public-inbox-httpd.socket After = public-inbox-httpd.socket [Service] Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin \ TZ=UTC \ PERL_INLINE_DIRECTORY=/tmp/.pub-inline LimitNOFILE = 30000 ExecStartPre = /bin/mkdir -p -m 1777 /tmp/.pub-inline ExecStart = /usr/local/bin/public-inbox-httpd \ -1 /var/log/public-inbox/httpd.out.log StandardError = syslog # NonBlocking is REQUIRED to avoid a race condition if running # simultaneous services NonBlocking = true Sockets = public-inbox-httpd.socket KillSignal = SIGQUIT User = nobody Group = nogroup ExecReload = /bin/kill -HUP $MAINPID TimeoutStopSec = 86400 KillMode = process [Install] WantedBy = multi-user.target public-inbox-1.9.0/examples/public-inbox-imapd.socket000066400000000000000000000016141430031475700226350ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-imapd.socket <== # Consider looking at public-inbox-netd.socket instead of this file # to simplify management when serving multiple protocols. # # This contains 5 sockets for an public-inbox-imapd instance. # The TCP ports are well-known ports registered in /etc/services. # The /run/imapd.onion.sock entry is meant for the Tor hidden service # enabled by the following line in the torrc(5) file: # HiddenServicePort 143 unix:/run/imapd.onion.sock [Unit] Description = public-inbox-imapd sockets [Socket] ListenStream = 0.0.0.0:143 ListenStream = 0.0.0.0:993 ListenStream = /run/imapd.onion.sock # Separating IPv4 from IPv6 listeners makes for nicer output # of IPv4 addresses in various reporting/monitoring tools BindIPv6Only = ipv6-only ListenStream = [::]:143 ListenStream = [::]:993 Service = public-inbox-imapd@1.service [Install] WantedBy = sockets.target public-inbox-1.9.0/examples/public-inbox-imapd@.service000066400000000000000000000025131430031475700231040ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-imapd@.service <== # Consider looking at public-inbox-netd@.service instead of this file # to simplify management when serving multiple protocols. # # Since SIGUSR2 upgrades do not work under systemd, this service file # allows starting two simultaneous services during upgrade time # (e.g. public-inbox-imapd@1 public-inbox-imapd@2) with the intention # that they take turns running in-between upgrades. This should # allow upgrading without downtime. [Unit] Description = public-inbox-imapd IMAP server %i Wants = public-inbox-imapd.socket After = public-inbox-imapd.socket [Service] Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin \ PERL_INLINE_DIRECTORY=/tmp/.pub-inline LimitNOFILE = 30000 ExecStartPre = /bin/mkdir -p -m 1777 /tmp/.pub-inline ExecStart = /usr/local/bin/public-inbox-imapd -W0 \ -1 /var/log/public-inbox/imapd.out.log \ --cert /etc/ssl/certs/news.example.com.pem \ --key /etc/ssl/private/news.example.com.key StandardError = syslog # NonBlocking is REQUIRED to avoid a race condition if running # simultaneous services NonBlocking = true Sockets = public-inbox-imapd.socket KillSignal = SIGQUIT User = nobody Group = ssl-cert ExecReload = /bin/kill -HUP $MAINPID TimeoutStopSec = 86400 KillMode = process [Install] WantedBy = multi-user.target public-inbox-1.9.0/examples/public-inbox-netd.socket000066400000000000000000000024521430031475700224760ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-netd.socket <== # This contains all the services that public-inbox-netd can run; # allowing it to replace (or run in parallel to) any existing -httpd, # -imapd, -nntpd, or -pop3d instances. # # The TCP ports are well-known ports registered in /etc/services. # The /run/*.sock entries are meant for the Tor hidden service # enabled by the following lines in the torrc(5) file: # HiddenServicePort 110 unix:/run/pop3.sock # HiddenServicePort 119 unix:/run/nntp.sock # HiddenServicePort 143 unix:/run/imap.sock [Unit] Description = public-inbox-netd sockets [Socket] # for tor (see torrc(5)) ListenStream = /run/imap.sock ListenStream = /run/pop3.sock ListenStream = /run/nntp.sock # this is for varnish: ListenStream = 127.0.0.1:280 # public facing ListenStream = 0.0.0.0:110 ListenStream = 0.0.0.0:119 ListenStream = 0.0.0.0:143 ListenStream = 0.0.0.0:563 ListenStream = 0.0.0.0:993 ListenStream = 0.0.0.0:995 # Separating IPv4 from IPv6 listeners makes for nicer output # of IPv4 addresses in various reporting/monitoring tools BindIPv6Only = ipv6-only ListenStream = [::]:110 ListenStream = [::]:119 ListenStream = [::]:143 ListenStream = [::]:563 ListenStream = [::]:993 ListenStream = [::]:995 Service = public-inbox-netd@1.service [Install] WantedBy = sockets.target public-inbox-1.9.0/examples/public-inbox-netd@.service000066400000000000000000000051371430031475700227510ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-netd@.service <== # Since SIGUSR2 upgrades do not work under systemd, this service file # allows starting two simultaneous services during upgrade time # (e.g. public-inbox-netd@1 public-inbox-netd@2) with the intention # that they take turns running in-between upgrades. This should # allow upgrading without downtime. # For servers expecting visitors from multiple timezones, TZ=UTC # is needed to ensure a consistent approxidate experience with search. [Unit] Description = public-inbox-netd server %i Wants = public-inbox-netd.socket After = public-inbox-netd.socket [Service] Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin \ TZ=UTC \ PERL_INLINE_DIRECTORY=/tmp/.netd-inline LimitNOFILE = 30000 LimitCORE = infinity ExecStartPre = /bin/mkdir -p -m 1777 /tmp/.netd-inline # The '-l' args below map each socket in public-inbox-netd.socket to # the appropriate IANA service name: ExecStart = /usr/local/bin/public-inbox-netd -W0 \ -1 /var/log/netd/stdout.out.log \ --cert /etc/ssl/certs/news.example.com.pem \ --key /etc/ssl/private/news.example.com.key -l imap:///run/imap.sock?out=/var/log/netd/imap.out,err=/var/log/netd/imap.err \ -l nntp:///run/nntp.sock?out=/var/log/netd/nntp.out,err=/var/log/netd/nntp.err \ -l pop3:///run/pop3.sock?out=/var/log/netd/pop3.out,err=/var/log/netd/pop3.err \ -l imap://0.0.0.0/?out=/var/log/netd/imap.out,err=/var/log/netd/imap.err \ -l nntp://0.0.0.0/?out=/var/log/netd/nntp.out,err=/var/log/netd/nntp.err \ -l pop3://0.0.0.0/?out=/var/log/netd/pop3.out,err=/var/log/netd/pop3.err \ -l imap://[::]/?out=/var/log/netd/imap.out,err=/var/log/netd/imap.err \ -l nntp://[::]/?out=/var/log/netd/nntp.out,err=/var/log/netd/nntp.err \ -l pop3://[::]/?out=/var/log/netd/pop3.out,err=/var/log/netd/pop3.err \ -l imaps://0.0.0.0/?out=/var/log/netd/imap.out,err=/var/log/netd/imap.err \ -l nntps://0.0.0.0/?out=/var/log/netd/nntp.out,err=/var/log/netd/nntp.err \ -l pop3s://0.0.0.0/?out=/var/log/netd/pop3.out,err=/var/log/netd/pop3.err \ -l imaps://[::]/?out=/var/log/netd/imap.out,err=/var/log/netd/imap.err \ -l nntps://[::]/?out=/var/log/netd/nntp.out,err=/var/log/netd/nntp.err \ -l pop3s://[::]/?out=/var/log/netd/pop3.out,err=/var/log/netd/pop3.err \ -l http://127.0.0.1:280/?psgi=/etc/public.psgi,err=/var/log/netd/http.err # NonBlocking is REQUIRED to avoid a race condition if running # simultaneous services NonBlocking = true Sockets = public-inbox-netd.socket KillSignal = SIGQUIT User = news Group = ssl-cert ExecReload = /bin/kill -HUP $MAINPID TimeoutStopSec = 30 KillMode = process [Install] WantedBy = multi-user.target public-inbox-1.9.0/examples/public-inbox-nntpd.socket000066400000000000000000000016141430031475700226660ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-nntpd.socket <== # Consider looking at public-inbox-netd.socket instead of this file # to simplify management when serving multiple protocols. # # This contains 5 sockets for an public-inbox-nntpd instance. # The TCP ports are well-known ports registered in /etc/services. # The /run/nntpd.onion.sock entry is meant for the Tor hidden service # enabled by the following line in the torrc(5) file: # HiddenServicePort 119 unix:/run/nntpd.onion.sock [Unit] Description = public-inbox-nntpd sockets [Socket] ListenStream = 0.0.0.0:119 ListenStream = 0.0.0.0:563 ListenStream = /run/nntpd.onion.sock # Separating IPv4 from IPv6 listeners makes for nicer output # of IPv4 addresses in various reporting/monitoring tools BindIPv6Only = ipv6-only ListenStream = [::]:119 ListenStream = [::]:563 Service = public-inbox-nntpd@1.service [Install] WantedBy = sockets.target public-inbox-1.9.0/examples/public-inbox-nntpd@.service000066400000000000000000000025011430031475700231320ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-nntpd@.service <== # Consider looking at public-inbox-netd@.service instead of this file # to simplify management when serving multiple protocols. # # Since SIGUSR2 upgrades do not work under systemd, this service file # allows starting two simultaneous services during upgrade time # (e.g. public-inbox-nntpd@1 public-inbox-nntpd@2) with the intention # that they take turns running in-between upgrades. This should # allow upgrading without downtime. [Unit] Description = public-inbox NNTP server %i Wants = public-inbox-nntpd.socket After = public-inbox-nntpd.socket [Service] Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin \ PERL_INLINE_DIRECTORY=/tmp/.pub-inline LimitNOFILE = 30000 ExecStartPre = /bin/mkdir -p -m 1777 /tmp/.pub-inline ExecStart = /usr/local/bin/public-inbox-nntpd \ -1 /var/log/public-inbox/nntpd.out.log \ --cert /etc/ssl/certs/news.example.com.pem \ --key /etc/ssl/private/news.example.com.key StandardError = syslog # NonBlocking is REQUIRED to avoid a race condition if running # simultaneous services NonBlocking = true Sockets = public-inbox-nntpd.socket KillSignal = SIGQUIT User = nobody Group = ssl-cert ExecReload = /bin/kill -HUP $MAINPID TimeoutStopSec = 86400 KillMode = process [Install] WantedBy = multi-user.target public-inbox-1.9.0/examples/public-inbox-watch.service000066400000000000000000000007551430031475700230260ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-watch.service <== [Unit] Description = public-inbox Maildir watch After = spamassassin.service [Service] Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin ExecStart = /usr/local/bin/public-inbox-watch StandardOutput = syslog StandardError = syslog ExecReload = /bin/kill -HUP $MAINPID # this user must have read access to Maildirs it watches User = pi KillMode = process [Install] WantedBy = multi-user.target public-inbox-1.9.0/examples/public-inbox.psgi000066400000000000000000000034411430031475700212170ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2014-2021 all contributors # License: GPL-3.0+ # Note: this is part of our test suite, update t/plack.t if this changes # Usage: plackup [OPTIONS] /path/to/this/file # # A startup command for development which monitors changes: # plackup -I lib -o 127.0.0.1 -R lib -r examples/public-inbox.psgi # # .psgi paths may also be passed to public-inbox-httpd(1) for # production deployments: # public-inbox-httpd [OPTIONS] /path/to/examples/public-inbox.psgi use strict; use warnings; use PublicInbox::WWW; use Plack::Builder; my $www = PublicInbox::WWW->new; $www->preload; # share the public-inbox code itself: my $src = $ENV{SRC_GIT_DIR}; # '/path/to/public-inbox.git' $src = PublicInbox::Git->new($src) if defined $src; builder { # Enable to ensure redirects and Atom feed URLs are generated # properly when running behind a reverse proxy server which # sets the X-Forwarded-Proto request header. # See Plack::Middleware::ReverseProxy documentation for details eval { enable 'ReverseProxy' }; $@ and warn "Plack::Middleware::ReverseProxy missing,\n", "URL generation for redirects may be wrong if behind a reverse proxy\n"; # Optional: Log timing information for requests to track performance. # Logging to STDOUT is recommended since public-inbox-httpd knows # how to reopen it via SIGUSR1 after log rotation. # enable 'AccessLog::Timed', # logger => sub { syswrite(STDOUT, $_[0]) }, # format => '%t "%r" %>s %b %D'; enable 'Head'; sub { my ($env) = @_; # share public-inbox.git code! if ($src && $env->{PATH_INFO} =~ m!\A/(?:public-inbox(?:\.git)?/)? ($PublicInbox::GitHTTPBackend::ANY)\z!xo) { PublicInbox::GitHTTPBackend::serve($env, $src, $1); } else { $www->call($env); } }; } public-inbox-1.9.0/examples/unsubscribe-milter.socket000066400000000000000000000003741430031475700227720ustar00rootroot00000000000000# ==> /etc/systemd/system/unsubscribe-milter.socket <== [Unit] Description = unsubscribe.milter socket [Socket] ListenStream = /var/spool/postfix/unsubscribe/unsubscribe.sock Service = unsubscribe-milter@1.service [Install] WantedBy = sockets.target public-inbox-1.9.0/examples/unsubscribe-milter@.service000066400000000000000000000020051430031475700232330ustar00rootroot00000000000000# ==> /etc/systemd/system/unsubscribe-milter@.service <== # The '@' is to allow multiple simultaneous services to start # and share the same socket so new code can be cycled in # without downtime [Unit] Description = unsubscribe milter %i Wants = unsubscribe-milter.socket After = unsubscribe-milter.socket [Service] # First 8 bytes is for the key, next 8 bytes is for the IV # using Blowfish. We want as short URLs as possible to avoid # copy+paste errors # umask 077 && dd if=/dev/urandom bs=16 count=1 of=.unsubscribe.key ExecStart = /usr/local/sbin/unsubscribe.milter /home/mlmmj/.unsubscribe.key # UNIQUE_MAILTO makes the List-Unsubscribe mailto: header unique # so unsubcribing becomes one-step (requires MDA/MTA configuration, # see the bottom of examples/unsubscribe.milter # Environment = UNIQUE_MAILTO=1 Sockets = unsubscribe-milter.socket # the corresponding PSGI app needs permissions to modify the # mlmmj spool, so we might as well use the same user since User = mlmmj [Install] WantedBy = multi-user.target public-inbox-1.9.0/examples/unsubscribe-psgi.socket000066400000000000000000000004161430031475700224350ustar00rootroot00000000000000# ==> /etc/systemd/system/unsubscribe-psgi.socket <== [Unit] Description = unsubscribe PSGI socket [Socket] # Forward to the PSGI using nginx or similar ListenStream = /run/unsubscribe-psgi.sock Service = unsubscribe-psgi@1.service [Install] WantedBy = sockets.target public-inbox-1.9.0/examples/unsubscribe-psgi@.service000066400000000000000000000013461430031475700227100ustar00rootroot00000000000000# ==> /etc/systemd/system/unsubscribe-psgi@.service <== # The '@' is to allow multiple simultaneous services to start # and share the same socket so new code can be cycled in # without downtime [Unit] Description = unsubscribe PSGI %i Wants = unsubscribe-psgi.socket After = unsubscribe-psgi.socket [Service] # any PSGI server ought to work, # but public-inbox-httpd supports socket activation like unsubscribe.milter ExecStart = /usr/local/bin/public-inbox-httpd -W0 /etc/unsubscribe.psgi # NonBlocking is REQUIRED to avoid a race condition if running # simultaneous services NonBlocking = true Sockets = unsubscribe-psgi.socket # we need to modify the mlmmj spool User = mlmmj KillMode = process [Install] WantedBy = multi-user.target public-inbox-1.9.0/examples/unsubscribe.milter000066400000000000000000000107361430031475700215070ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use Sendmail::PMilter qw(:all); use IO::Socket; use Crypt::CBC; use MIME::Base64 qw(encode_base64url); my $key_file = shift @ARGV or die "Usage: $0 KEY_FILE\n"; open my $fh, '<', $key_file or die "failed to open $key_file\n"; my ($key, $iv); if (read($fh, $key, 8) != 8 || read($fh, $iv, 8) != 8 || read($fh, my $end, 8) != 0) { die "KEY_FILE must be 16 bytes\n"; } # optionally support unique mailto: subject in List-Unsubscribe, # requires a custom rule in front of mlmmj, see __END__ my $unique_mailto = $ENV{UNIQUE_MAILTO}; # these parameters were chosen to generate shorter parameters # to reduce the possibility of copy+paste errors my $crypt = Crypt::CBC->new(-key => $key, -iv => $iv, -header => 'none', -cipher => 'Blowfish'); $fh = $iv = $key = undef; my %cbs; $cbs{connect} = sub { my ($ctx) = @_; eval { $ctx->setpriv({ header => {}, envrcpt => {} }) }; warn $@ if $@; SMFIS_CONTINUE; }; $cbs{envrcpt} = sub { my ($ctx, $addr) = @_; eval { $addr =~ tr!<>!!d; $ctx->getpriv->{envrcpt}->{$addr} = 1; }; warn $@ if $@; SMFIS_CONTINUE; }; $cbs{header} = sub { my ($ctx, $k, $v) = @_; eval { my $k_ = lc $k; if ($k_ eq 'list-unsubscribe') { my $header = $ctx->getpriv->{header} ||= {}; my $ary = $header->{$k_} ||= []; # we create placeholders in case there are # multiple headers of the same name my $cur = []; push @$ary, $cur; # This relies on mlmmj convention: # $LIST+unsubscribe@$DOMAIN if ($v =~ /\A]+)>\z/) { @$cur = ($k, $v, $1, $2); # Mailman convention: # $LIST-request@$DOMAIN?subject=unsubscribe } elsif ($v =~ /\A\z/x) { # @$cur = ($k, $v, $1, $2); } } }; warn $@ if $@; SMFIS_CONTINUE; }; # We don't want people unsubscribing archivers: sub archive_addr { my ($addr) = @_; return 1 if ($addr =~ /\@m\.gmane(?:-mx)?\.org\z/); return 1 if ($addr eq 'archive@mail-archive.com'); 0 } $cbs{eom} = sub { my ($ctx) = @_; eval { my $priv = $ctx->getpriv; $ctx->setpriv({ header => {}, envrcpt => {} }); my @rcpt = keys %{$priv->{envrcpt}}; # one recipient, one unique HTTP(S) URL return SMFIS_CONTINUE if @rcpt != 1; return SMFIS_CONTINUE if archive_addr(lc($rcpt[0])); my $unsub = $priv->{header}->{'list-unsubscribe'} || []; my $n = 0; my $added; foreach my $u (@$unsub) { # Milter indices are 1-based, # not 0-based like Perl arrays my $index = ++$n; my ($k, $v, $list, $domain) = @$u; next unless $k && $v && $list && $domain; my $u = $crypt->encrypt($rcpt[0]); $u = encode_base64url($u); if ($unique_mailto) { # $u needs to be in the Subject: header since # +$EXTENSION is case-insensitive my $s = "subject=$u"; $v = ""; } $v .= ",\n "; $ctx->chgheader($k, $index, $v); $added = 1; } # RFC 8058 $added and $ctx->addheader('List-Unsubscribe-Post', 'List-Unsubscribe=One-Click'); }; warn $@ if $@; SMFIS_CONTINUE; }; my $milter = Sendmail::PMilter->new; # Try to inherit a socket from systemd or similar: my $fds = $ENV{LISTEN_FDS}; if ($fds && (($ENV{LISTEN_PID} || 0) == $$)) { die "$0 can only listen on one FD\n" if $fds != 1; my $start_fd = 3; my $s = IO::Socket->new_from_fd($start_fd, 'r') or die "inherited bad FD from LISTEN_FDS: $!\n"; $milter->set_socket($s); } else { # fall back to binding a socket: my $sock = 'unix:/var/spool/postfix/unsubscribe/unsubscribe.sock'; $milter->set_listen(1024); my $umask = umask 0000; $milter->setconn($sock); umask $umask; } $milter->register('unsubscribe', \%cbs, SMFI_CURR_ACTS); $milter->main(); __END__ # TMPMSG comes from dc-dlvr, it's populated before the above runs: # TMPMSG=$(mktemp -t dc-dlvr.orig.$USER.XXXXXX || exit 1) # cat >$TMPMSG # I use something like this in front of mlmmj for UNIQUE_MAILTO # $EXTENSION and $ORIGINAL_RECIPIENT are set by postfix, $list # is a local mapping of addresses to mailing list names. case $ORIGINAL_RECIPIENT in foo+*) list=foo ;; # ... esac case $EXTENSION in unique-unsub) u="$(formail -z -c -x Subject <$TMPMSG)" d=$(expr "$ORIGINAL_RECIPIENT" : '^.*@\(.*\)') # forward this to the unsubscribe.psgi service curl -sSf https://$d/u/$u/$list >/dev/null exit ;; esac /usr/bin/mlmmj-receive -L /path/to/mlmmj-spool/$list <"$TMPMSG" exit public-inbox-1.9.0/examples/unsubscribe.psgi000066400000000000000000000043541430031475700211540ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2021 all contributors # License: GPL-3.0+ # This should not require any other PublicInbox code, but may use # PublicInbox::Config if ~/.public-inbox/config exists or # PI_CONFIG is pointed to an appropriate location use strict; use Plack::Builder; use PublicInbox::Unsubscribe; my $app = PublicInbox::Unsubscribe->new( pi_config => eval { # optional, for pointing out archives require PublicInbox::Config; # uses ~/.public-inbox/config by default, # can override with PI_CONFIG or here since # I run this .psgi as the mlmmj user while the # public-inbox-mda code which actually writes to # the archives runs as a different user. PublicInbox::Config->new('/home/pi/.public-inbox/config') }, # change if you fork code_url => 'https://public-inbox.org/public-inbox.git', owner_email => 'BOFH@example.com', confirm => 0, # First 8 bytes is for the key, next 8 bytes is for the IV # using Blowfish. We want as short URLs as possible to avoid # copy+paste errors # umask 077 && dd if=/dev/urandom bs=16 count=1 of=.unsubscribe.key key_file => '/home/mlmmj/.unsubscribe.key', # this runs as whatever user has perms to run /usr/bin/mlmmj-unsub # users of other mailing lists. Returns '' on success. unsubscribe => sub { my ($user_addr, $list_addr) = @_; # map list_addr to mlmmj spool, I use: # /home/mlmmj/spool/$LIST here my ($list, $domain) = split('@', $list_addr, 2); my $spool = "/home/mlmmj/spool/$list"; return "Invalid list: $list" unless -d $spool; # -c to send a confirmation email, -s is important # in case a user is click-happy and clicks twice. my @cmd = (qw(/usr/bin/mlmmj-unsub -c -s), '-L', $spool, '-a', $user_addr); # we don't know which version they're subscribed to, # try both non-digest and digest my $normal = system(@cmd); my $digest = system(@cmd, '-d'); # success if either succeeds: return '' if ($normal == 0 || $digest == 0); # missing executable or FS error, # otherwise -s always succeeds, right? return 'Unknown error, contact admin'; }, ); builder { mount '/u' => builder { eval { enable 'ReverseProxy' }; # optional enable 'Head'; sub { $app->call(@_) }; }; }; public-inbox-1.9.0/examples/varnish-4.vcl000066400000000000000000000035351430031475700202650ustar00rootroot00000000000000# Example VCL for Varnish 4.0 with public-inbox WWW code # This is based on what shipped for 3.x a long time ago (I think) # and I'm hardly an expert in VCL (nor should we expect anybody # who maintains a public-inbox HTTP interface to be). # # It seems to work for providing some protection from traffic # bursts; but perhaps the public-inbox WWW interface can someday # provide enough out-of-the-box performance that configuration # of an extra component is pointless. vcl 4.0; backend default { # this is where public-inbox-httpd listens .host = "127.0.0.1"; .port = "280"; } sub vcl_recv { /* pipe POST and any other weird methods directly to backend */ if (req.method != "GET" && req.method != "HEAD") { return (pipe); } if (req.http.Authorization || req.http.Cookie) { /* Not cacheable by default */ return (pass); } return (hash); } sub vcl_pipe { # By default Connection: close is set on all piped requests by varnish, # but public-inbox-httpd supports persistent connections well :) unset bereq.http.connection; return (pipe); } sub vcl_hash { hash_data(req.url); if (req.http.host) { hash_data(req.http.host); } else { hash_data(server.ip); } /* we generate fully-qualified URLs for Atom feeds and redirects */ if (req.http.X-Forwarded-Proto) { hash_data(req.http.X-Forwarded-Proto); } return (lookup); } sub vcl_backend_response { set beresp.grace = 60s; set beresp.do_stream = true; if (beresp.ttl <= 0s || /* no point in caching stuff git already stores on disk */ beresp.http.Content-Type ~ "application/x-git" || beresp.http.Set-Cookie || beresp.http.Vary == "*") { /* Mark as "Hit-For-Pass" for the next 2 minutes */ set beresp.ttl = 120 s; set beresp.uncacheable = true; return (deliver); } else { /* short TTL for up-to-dateness, our PSGI is not that slow */ set beresp.ttl = 10s; } return (deliver); } public-inbox-1.9.0/lei.sh000077500000000000000000000006071430031475700152330ustar00rootroot00000000000000#!/bin/sh -e # symlink this file to a directory in PATH to run lei (or anything in script/*) # without needing perms to install globally. Used by "make symlink-install" p=$(realpath "$0" || readlink "$0") # neither is POSIX, but common p=$(dirname "$p") c=$(basename "$0") # both are POSIX exec ${PERL-perl} -w -I"$p"/lib "$p"/script/"${c%.sh}" "$@" : this script is too short to copyright public-inbox-1.9.0/lib/000077500000000000000000000000001430031475700146665ustar00rootroot00000000000000public-inbox-1.9.0/lib/PublicInbox/000077500000000000000000000000001430031475700171045ustar00rootroot00000000000000public-inbox-1.9.0/lib/PublicInbox/Address.pm000066400000000000000000000017721430031475700210360ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ package PublicInbox::Address; use strict; use v5.10.1; use parent 'Exporter'; our @EXPORT_OK = qw(pairs); sub xs_emails { grep { defined } map { $_->address() } parse_email_addresses($_[0]) } sub xs_names { grep { defined } map { my $n = $_->name; my $addr = $_->address; $n = $_->user if defined($addr) && $n eq $addr; $n; } parse_email_addresses($_[0]); } sub xs_pairs { # for JMAP, RFC 8621 section 4.1.2.3 [ map { # LHS (name) may be undef [ $_->phrase // $_->comment, $_->address ] } parse_email_addresses($_[0]) ]; } eval { require Email::Address::XS; Email::Address::XS->import(qw(parse_email_addresses)); *emails = \&xs_emails; *names = \&xs_names; *pairs = \&xs_pairs; }; if ($@) { require PublicInbox::AddressPP; *emails = \&PublicInbox::AddressPP::emails; *names = \&PublicInbox::AddressPP::names; *pairs = \&PublicInbox::AddressPP::pairs; } 1; public-inbox-1.9.0/lib/PublicInbox/AddressPP.pm000066400000000000000000000030361430031475700212710ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ package PublicInbox::AddressPP; use strict; # very loose regexes, here. We don't need RFC-compliance, # just enough to make thing sanely displayable and pass to git # We favor Email::Address::XS for conformance if available sub emails { ($_[0] =~ /([\w\.\+=\?"\(\)\-!#\$%&'\*\/\^\`\|\{\}~]+\@[\w\.\-\(\)]+) (?:\s[^>]*)?>?\s*(?:\(.*?\))?(?:,\s*|\z)/gx) } sub names { # split by address and post-address comment my @p = split(/]+)\@[\w\.\-]+>?\s*(\(.*?\))?(?:,\s*|\z)/, $_[0]); my @ret; for (my $i = 0; $i <= $#p;) { my $phrase = $p[$i++]; $phrase =~ tr/\r\n\t / /s; $phrase =~ s/\A['"\s]*//; $phrase =~ s/['"\s]*\z//; my $user = $p[$i++] // ''; my $comment = $p[$i++] // ''; if ($phrase =~ /\S/) { $phrase =~ s/\@\S+\z//; push @ret, $phrase; } elsif ($comment =~ /\A\((.*?)\)\z/) { push @ret, $1; } else { push @ret, $user; } } @ret; } sub pairs { # for JMAP, RFC 8621 section 4.1.2.3 my ($s) = @_; [ map { my $addr = $_; if ($s =~ s/\A\s*(.*?)\s*<\Q$addr\E>\s*(.*?)\s*(?:,|\z)// || $s =~ s/\A\s*(.*?)\s*\Q$addr\E\s*(.*?)\s*(?:,|\z)//) { my ($phrase, $comment) = ($1, $2); $phrase =~ tr/\r\n\t / /s; $phrase =~ s/\A['"\s]*//; $phrase =~ s/['"\s]*\z//; $phrase =~ s/\s*<*\s*\z//; $phrase = undef if $phrase !~ /\S/; $comment = ($comment =~ /\((.*?)\)/) ? $1 : undef; [ $phrase // $comment, $addr ] } else { (); } } emails($s) ]; } 1; public-inbox-1.9.0/lib/PublicInbox/Admin.pm000066400000000000000000000235251430031475700205010ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # common stuff for administrative command-line tools # Unstable internal API package PublicInbox::Admin; use strict; use parent qw(Exporter); our @EXPORT_OK = qw(setup_signals); use PublicInbox::Config; use PublicInbox::Inbox; use PublicInbox::Spawn qw(popen_rd); use PublicInbox::Eml; *rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed; sub setup_signals { my ($cb, $arg) = @_; # optional require POSIX; # we call exit() here instead of _exit() so DESTROY methods # get called (e.g. File::Temp::Dir and PublicInbox::Msgmap) $SIG{INT} = $SIG{HUP} = $SIG{PIPE} = $SIG{TERM} = sub { my ($sig) = @_; # https://www.tldp.org/LDP/abs/html/exitcodes.html eval { $cb->($sig, $arg) } if $cb; $sig = 'SIG'.$sig; exit(128 + POSIX->$sig); }; } sub resolve_eidxdir { my ($cd) = @_; my $try = $cd // '.'; my $root_dev_ino; while (1) { # favor v2, first if (-f "$try/ei.lock") { return rel2abs_collapsed($try); } elsif (-d $try) { my @try = stat _; $root_dev_ino //= do { my @root = stat('/') or die "stat /: $!\n"; "$root[0]\0$root[1]"; }; return undef if "$try[0]\0$try[1]" eq $root_dev_ino; $try .= '/..'; # continue, cd up } else { die "`$try' is not a directory\n"; } } } sub resolve_inboxdir { my ($cd, $ver) = @_; my $try = $cd // '.'; my $root_dev_ino; while (1) { # favor v2, first if (-f "$try/inbox.lock") { $$ver = 2 if $ver; return rel2abs_collapsed($try); } elsif (-d $try) { my @try = stat _; $root_dev_ino //= do { my @root = stat('/') or die "stat /: $!\n"; "$root[0]\0$root[1]"; }; last if "$try[0]\0$try[1]" eq $root_dev_ino; $try .= '/..'; # continue, cd up } else { die "`$try' is not a directory\n"; } } # try v1 bare git dirs my $cmd = [ qw(git rev-parse --git-dir) ]; my $fh = popen_rd($cmd, undef, {-C => $cd}); my $dir = do { local $/; <$fh> }; close $fh or die "error in @$cmd (cwd:${\($cd // '.')}): $!\n"; chomp $dir; $$ver = 1 if $ver; rel2abs_collapsed($dir eq '.' ? ($cd // $dir) : $dir); } # for unconfigured inboxes sub detect_indexlevel ($) { my ($ibx) = @_; my $over = $ibx->over; my $srch = $ibx->search; delete @$ibx{qw(over search)}; # don't leave open FDs lying around # brand new or never before indexed inboxes default to full return 'full' unless $over; my $l = 'basic'; return $l unless $srch; if (my $xdb = $srch->xdb) { $l = 'full'; my $m = $xdb->get_metadata('indexlevel'); if ($m eq 'medium') { $l = $m; } elsif ($m ne '') { warn <<""; $ibx->{inboxdir} has unexpected indexlevel in Xapian: $m } $ibx->{-skip_docdata} = 1 if $xdb->get_metadata('skip_docdata'); } $l; } sub unconfigured_ibx ($$) { my ($dir, $i) = @_; my $name = "unconfigured-$i"; PublicInbox::Inbox->new({ name => $name, address => [ "$name\@example.com" ], inboxdir => $dir, # consumers (-convert) warn on this: -unconfigured => 1, }); } sub resolve_inboxes ($;$$) { my ($argv, $opt, $cfg) = @_; $opt ||= {}; $cfg //= PublicInbox::Config->new; if ($opt->{all}) { my $cfgfile = PublicInbox::Config::default_file(); $cfg or die "--all specified, but $cfgfile not readable\n"; @$argv and die "--all specified, but directories specified\n"; } my (@old, @ibxs, @eidx); if ($opt->{-eidx_ok}) { require PublicInbox::ExtSearchIdx; my $i = -1; @$argv = grep { $i++; if (defined(my $ei = resolve_eidxdir($_))) { $ei = PublicInbox::ExtSearchIdx->new($ei, $opt); push @eidx, $ei; undef; } else { 1; } } @$argv; } my $min_ver = $opt->{-min_inbox_version} || 0; # lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames, # pathnames are not unique due to symlinks and bind mounts if ($opt->{all}) { $cfg->each_inbox(sub { my ($ibx) = @_; if (-e $ibx->{inboxdir}) { push(@ibxs, $ibx) if $ibx->version >= $min_ver; } else { warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n"; } }); } else { # directories specified on the command-line my @dirs = @$argv; push @dirs, '.' if !@dirs && $opt->{-use_cwd}; my %s2i; # "st_dev\0st_ino" => array index for (my $i = 0; $i <= $#dirs; $i++) { my $dir = $dirs[$i]; my @st = stat($dir) or die "stat($dir): $!\n"; $dir = $dirs[$i] = resolve_inboxdir($dir, \(my $ver)); if ($ver >= $min_ver) { $s2i{"$st[0]\0$st[1]"} //= $i; } else { push @old, $dir; } } my $done = \'done'; eval { $cfg->each_inbox(sub { my ($ibx) = @_; return if $ibx->version < $min_ver; my $dir = $ibx->{inboxdir}; if (my @s = stat $dir) { my $i = delete($s2i{"$s[0]\0$s[1]"}) // return; $ibxs[$i] = $ibx; die $done if !keys(%s2i); } else { warn "W: $ibx->{name} $dir: $!\n"; } }); }; die $@ if $@ && $@ ne $done; for my $i (sort { $a <=> $b } values %s2i) { $ibxs[$i] = unconfigured_ibx($dirs[$i], $i); } @ibxs = grep { defined } @ibxs; # duplicates are undef } if (@old) { die "-V$min_ver inboxes not supported by $0\n\t", join("\n\t", @old), "\n"; } $opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs; } my @base_mod = (); my @over_mod = qw(DBD::SQLite DBI); my %mod_groups = ( -index => [ @base_mod, @over_mod ], -base => \@base_mod, -search => [ @base_mod, @over_mod, 'Search::Xapian' ], ); sub scan_ibx_modules ($$) { my ($mods, $ibx) = @_; if (!$ibx->{indexlevel} || $ibx->{indexlevel} ne 'basic') { $mods->{'Search::Xapian'} = 1; } else { $mods->{$_} = 1 foreach @over_mod; } } sub check_require { my (@mods) = @_; my $err = {}; while (my $mod = shift @mods) { if (my $groups = $mod_groups{$mod}) { push @mods, @$groups; } elsif ($mod eq 'Search::Xapian') { require PublicInbox::Search; PublicInbox::Search::load_xapian() or $err->{'Search::Xapian || Xapian'} = $@; } else { eval "require $mod"; $err->{$mod} = $@ if $@; } } scalar keys %$err ? $err : undef; } sub missing_mod_msg { my ($err) = @_; my @mods = map { "`$_'" } sort keys %$err; my $last = pop @mods; @mods ? (join(', ', @mods)."' and $last") : $last } sub require_or_die { my $err = check_require(@_) or return; die missing_mod_msg($err)." required for $0\n"; } sub indexlevel_ok_or_die ($) { my ($indexlevel) = @_; my $req; if ($indexlevel eq 'basic') { $req = '-index'; } elsif ($indexlevel =~ /\A(?:medium|full)\z/) { $req = '-search'; } else { die <<""; invalid indexlevel=$indexlevel (must be `basic', `medium', or `full') } my $err = check_require($req) or return; die missing_mod_msg($err) ." required for indexlevel=$indexlevel\n"; } sub index_terminate { my (undef, $ibx) = @_; # $_[0] = signal name $ibx->git->cleanup; } sub index_inbox { my ($ibx, $im, $opt) = @_; require PublicInbox::InboxWritable; my $jobs = delete $opt->{jobs} if $opt; if (my $pr = $opt->{-progress}) { $pr->("indexing $ibx->{inboxdir} ...\n"); } local @SIG{keys %SIG} = values %SIG; setup_signals(\&index_terminate, $ibx); my $idx = { current_info => $ibx->{inboxdir} }; local $SIG{__WARN__} = sub { return if PublicInbox::Eml::warn_ignore(@_); warn($idx->{current_info}, ': ', @_); }; if ($ibx->version == 2) { eval { require PublicInbox::V2Writable }; die "v2 requirements not met: $@\n" if $@; $ibx->{-creat_opt}->{nproc} = $jobs; my $v2w = $im // $ibx->importer($opt->{reindex} // $jobs); if (defined $jobs) { if ($jobs == 0) { $v2w->{parallel} = 0; } else { my $n = $v2w->{shards}; if ($jobs < ($n + 1) && !$opt->{reshard}) { warn <new($ibx, 1); } $idx->index_sync($opt); $idx->{nidx} // 0; # returns number processed } sub progress_prepare ($;$) { my ($opt, $dst) = @_; # public-inbox-index defaults to quiet, -xcpdb and -compact do not if (defined($opt->{quiet}) && $opt->{quiet} < 0) { $opt->{quiet} = !$opt->{verbose}; } if ($opt->{quiet}) { open my $null, '>', '/dev/null' or die "failed to open /dev/null: $!\n"; $opt->{1} = $null; # suitable for spawn() redirect } else { $opt->{verbose} ||= 1; $dst //= *STDERR{GLOB}; $opt->{-progress} = sub { print $dst '# ', @_ }; } } # same unit factors as git: sub parse_unsigned ($) { my ($val) = @_; $$val =~ /\A([0-9]+)([kmg])?\z/i or return; my ($n, $unit_factor) = ($1, $2 // ''); my %u = ( k => 1024, m => 1024**2, g => 1024**3 ); $$val = $n * ($u{lc($unit_factor)} // 1); 1; } sub index_prepare ($$) { my ($opt, $cfg) = @_; my $env; if ($opt->{compact}) { require PublicInbox::Xapcmd; PublicInbox::Xapcmd::check_compact(); $opt->{compact_opt} = { -coarse_lock => 1, compact => 1 }; if (defined(my $jobs = $opt->{jobs})) { $opt->{compact_opt}->{jobs} = $jobs; } } for my $k (qw(max_size batch_size)) { my $git_key = "publicInbox.index".ucfirst($k); $git_key =~ s/_([a-z])/\U$1/g; defined(my $v = $opt->{$k} // $cfg->{lc($git_key)}) or next; parse_unsigned(\$v) or die "`$git_key=$v' not parsed\n"; $v > 0 or die "`$git_key=$v' must be positive\n"; $opt->{$k} = $v; } # out-of-the-box builds of Xapian 1.4.x are still limited to 32-bit # https://getting-started-with-xapian.readthedocs.io/en/latest/concepts/indexing/limitations.html $opt->{batch_size} and $env = { XAPIAN_FLUSH_THRESHOLD => '4294967295' }; for my $k (qw(sequential-shard)) { my $git_key = "publicInbox.index".ucfirst($k); $git_key =~ s/-([a-z])/\U$1/g; defined(my $s = $opt->{$k} // $cfg->{lc($git_key)}) or next; defined(my $v = $cfg->git_bool($s)) or die "`$git_key=$s' not boolean\n"; $opt->{$k} = $v; } for my $k (qw(since until)) { my $v = $opt->{$k} // next; $opt->{reindex} or die "--$k=$v requires --reindex\n"; } $env; } sub do_chdir ($) { my $chdir = $_[0] // return; for my $d (@$chdir) { next if $d eq ''; # same as git(1) chdir $d or die "cd $d: $!"; } } 1; public-inbox-1.9.0/lib/PublicInbox/AdminEdit.pm000066400000000000000000000034011430031475700212760ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # common stuff between -edit, -purge (and maybe -learn in the future) package PublicInbox::AdminEdit; use strict; use warnings; use PublicInbox::Admin; our @OPT = qw(all force|f verbose|v! help|h); sub check_editable ($) { my ($ibxs) = @_; foreach my $ibx (@$ibxs) { my $lvl = $ibx->{indexlevel}; if (defined $lvl) { PublicInbox::Admin::indexlevel_ok_or_die($lvl); next; } # Undefined indexlevel, so `full'... # Search::Xapian exists and the DB can be read, at least, fine $ibx->search and next; # it's possible for a Xapian directory to exist, # but Search::Xapian to go missing/broken. # Make sure it's purged in that case: $ibx->over or die "no over.sqlite3 in $ibx->{inboxdir}\n"; require PublicInbox::Search; my $xdir_ro = PublicInbox::Search->new($ibx)->xdir(1); my $nshard = 0; foreach my $shard (<$xdir_ro/*>) { if (-d $shard && $shard =~ m!/[0-9]+\z!) { my $bytes = 0; $bytes += -s $_ foreach glob("$shard/*"); $nshard++ if $bytes; } } if ($nshard) { PublicInbox::Admin::require_or_die('-search'); } else { # somebody could "rm -r" all the Xapian directories; # let them purge the overview, at least $ibx->{indexlevel} ||= 'basic'; } } } # takes the output of V2Writable::purge and V2Writable::replace # $rewrites = [ array commits keyed by epoch ] sub show_rewrites ($$$) { my ($fh, $ibx, $rewrites) = @_; print $fh "$ibx->{inboxdir}:"; if (scalar @$rewrites) { my $epoch = -1; my @out = map {; ++$epoch; "$epoch.git: ".(defined($_) ? $_ : '(unchanged)') } @$rewrites; print $fh join("\n\t", '', @out), "\n"; } else { print $fh " NONE\n"; } } 1; public-inbox-1.9.0/lib/PublicInbox/AltId.pm000066400000000000000000000034021430031475700204360ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # Used for giving serial numbers to messages. This can be tied to # the msgmap for live updates to living lists (see # PublicInbox::Filters::RubyLang), or kept separate for imports # of defunct NNTP groups (e.g. scripts/xhdr-num2mid) # # Introducing NEW uses of serial numbers is discouraged because of # it leads to reliance on centralization. However, being able # to use existing serial numbers is beneficial. package PublicInbox::AltId; use strict; use warnings; use URI::Escape qw(uri_unescape); use PublicInbox::Msgmap; # spec: TYPE:PREFIX:param1=value1¶m2=value2&... # The PREFIX will be a searchable boolean prefix in Xapian # Example: serial:gmane:file=/path/to/altmsgmap.sqlite3 sub new { my ($class, $ibx, $spec, $writable) = @_; my ($type, $prefix, $query) = split(/:/, $spec, 3); $type eq 'serial' or die "non-serial not supported, yet\n"; $prefix =~ /\A\w+\z/ or warn "non-word prefix not searchable\n"; my %params = map { my ($k, $v) = split(/=/, uri_unescape($_), 2); $v = '' unless defined $v; ($k, $v); } split(/[&;]/, $query); my $f = $params{file} or die "file: required for $type spec $spec\n"; unless (index($f, '/') == 0) { if ($ibx->version == 1) { $f = "$ibx->{inboxdir}/public-inbox/$f"; } else { $f = "$ibx->{inboxdir}/$f"; } } bless { filename => $f, writable => $writable, prefix => $prefix, xprefix => 'X'.uc($prefix), }, $class; } sub mm_alt { my ($self) = @_; $self->{mm_alt} ||= eval { my $f = $self->{filename}; my $writable = $self->{writable}; PublicInbox::Msgmap->new_file($f, $writable); }; } sub mid2alt { my ($self, $mid) = @_; $self->mm_alt->num_for($mid); } 1; public-inbox-1.9.0/lib/PublicInbox/AutoReap.pm000066400000000000000000000014261430031475700211650ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # automatically kill + reap children when this goes out-of-scope package PublicInbox::AutoReap; use v5.10.1; use strict; sub new { my (undef, $pid, $cb) = @_; bless { pid => $pid, cb => $cb, owner => $$ }, __PACKAGE__ } sub kill { my ($self, $sig) = @_; CORE::kill($sig // 'TERM', $self->{pid}); } sub join { my ($self, $sig) = @_; my $pid = delete $self->{pid} or return; $self->{cb}->() if defined $self->{cb}; CORE::kill($sig, $pid) if defined $sig; my $ret = waitpid($pid, 0) // die "waitpid($pid): $!"; $ret == $pid or die "BUG: waitpid($pid) != $ret"; } sub DESTROY { my ($self) = @_; return if $self->{owner} != $$; $self->join('TERM'); } 1; public-inbox-1.9.0/lib/PublicInbox/Cgit.pm000066400000000000000000000072241430031475700203350ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # wrapper for cgit(1) and git-http-backend(1) for browsing and # serving git code repositories. Requires 'publicinbox.cgitrc' # directive to be set in the public-inbox config file. package PublicInbox::Cgit; use strict; use PublicInbox::GitHTTPBackend; use PublicInbox::Git; # not bothering with Exporter for a one-off *input_prepare = \&PublicInbox::GitHTTPBackend::input_prepare; *serve = \&PublicInbox::GitHTTPBackend::serve; use PublicInbox::Qspawn; use PublicInbox::WwwStatic qw(r); sub locate_cgit ($) { my ($pi_cfg) = @_; my $cgit_bin = $pi_cfg->{'publicinbox.cgitbin'}; my $cgit_data = $pi_cfg->{'publicinbox.cgitdata'}; # /var/www/htdocs/cgit is the default install path from cgit.git # /usr/{lib,share}/cgit is where Debian puts cgit # TODO: check other distros for common paths unless (defined $cgit_bin) { foreach (qw(/var/www/htdocs/cgit /usr/lib/cgit)) { my $x = "$_/cgit.cgi"; next unless -x $x; $cgit_bin = $x; last; } } unless (defined $cgit_data) { my @dirs = qw(/var/www/htdocs/cgit /usr/share/cgit); # local installs of cgit from source have # CGIT_SCRIPT_PATH==CGIT_DATA_PATH by default, # so we can usually infer the cgit_data path from cgit_bin if (defined($cgit_bin) && $cgit_bin =~ m!\A(.+?)/[^/]+\z!) { unshift @dirs, $1 if -d $1; } foreach my $d (@dirs) { my $f = "$d/cgit.css"; next unless -f $f; $cgit_data = $d; last; } } ($cgit_bin, $cgit_data); } sub new { my ($class, $pi_cfg) = @_; my ($cgit_bin, $cgit_data) = locate_cgit($pi_cfg); # TODO: support gitweb and other repository viewers? if (defined(my $cgitrc = $pi_cfg->{-cgitrc_unparsed})) { $pi_cfg->parse_cgitrc($cgitrc, 0); } my $self = bless { cmd => [ $cgit_bin ], cgit_data => $cgit_data, pi_cfg => $pi_cfg, }, $class; # some cgit repos may not be mapped to inboxes, so ensure those exist: my $code_repos = $pi_cfg->{-code_repos}; foreach my $k (keys %$pi_cfg) { $k =~ /\Acoderepo\.(.+)\.dir\z/ or next; my $dir = $pi_cfg->{$k}; $code_repos->{$1} ||= $pi_cfg->fill_code_repo($1); } while (my ($nick, $repo) = each %$code_repos) { $self->{"\0$nick"} = $repo; } my $cgit_static = $pi_cfg->{-cgit_static}; my $static = join('|', map { quotemeta $_ } keys %$cgit_static); $self->{static} = qr/\A($static)\z/; $self; } # only what cgit cares about: my @PASS_ENV = qw( HTTP_HOST QUERY_STRING REQUEST_METHOD SCRIPT_NAME SERVER_NAME SERVER_PORT HTTP_COOKIE HTTP_REFERER CONTENT_LENGTH ); # XXX: cgit filters may care about more variables... my $parse_cgi_headers = \&PublicInbox::GitHTTPBackend::parse_cgi_headers; sub call { my ($self, $env) = @_; my $path_info = $env->{PATH_INFO}; my $cgit_data; # handle requests without spawning cgit iff possible: if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!ox) { my ($nick, $path) = ($1, $2); if (my PublicInbox::Git $git = $self->{"\0$nick"}) { return serve($env, $git, $path); } } elsif ($path_info =~ m!$self->{static}! && defined($cgit_data = $self->{cgit_data})) { my $f = $cgit_data.$1; # {static} only matches leading slash return PublicInbox::WwwStatic::response($env, [], $f); } my $cgi_env = { PATH_INFO => $path_info }; foreach (@PASS_ENV) { defined(my $v = $env->{$_}) or next; $cgi_env->{$_} = $v; } $cgi_env->{'HTTPS'} = 'on' if $env->{'psgi.url_scheme'} eq 'https'; my $rdr = input_prepare($env) or return r(500); my $qsp = PublicInbox::Qspawn->new($self->{cmd}, $cgi_env, $rdr); my $limiter = $self->{pi_cfg}->limiter('-cgit'); $qsp->psgi_return($env, $limiter, $parse_cgi_headers); } 1; public-inbox-1.9.0/lib/PublicInbox/CmdIPC4.pm000066400000000000000000000026071430031475700205720ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # callers should use PublicInbox::CmdIPC4->can('send_cmd4') (or recv_cmd4) # first choice for script/lei front-end and 2nd choice for lei backend # libsocket-msghdr-perl is in Debian but not many other distros as of 2021. package PublicInbox::CmdIPC4; use v5.12; use Socket qw(SOL_SOCKET SCM_RIGHTS); BEGIN { eval { require Socket::MsgHdr; # XS no warnings 'once'; # any number of FDs per-sendmsg(2) + buffer *send_cmd4 = sub ($$$$) { # (sock, fds, buf, flags) = @_; my ($sock, $fds, undef, $flags) = @_; my $mh = Socket::MsgHdr->new(buf => $_[2]); $mh->cmsghdr(SOL_SOCKET, SCM_RIGHTS, pack('i' x scalar(@$fds), @$fds)); my $s; my $try = 0; do { $s = Socket::MsgHdr::sendmsg($sock, $mh, $flags); } while (!defined($s) && ($!{ENOBUFS} || $!{ENOMEM} || $!{ETOOMANYREFS}) && (++$try < 50) && warn "sleeping on sendmsg: $! (#$try)\n" && select(undef, undef, undef, 0.1) == 0); $s; }; *recv_cmd4 = sub ($$$) { my ($s, undef, $len) = @_; # $_[1] = destination buffer my $mh = Socket::MsgHdr->new(buflen => $len, controllen => 256); my $r = Socket::MsgHdr::recvmsg($s, $mh, 0) // return (undef); $_[1] = $mh->buf; return () if $r == 0; my (undef, undef, $data) = $mh->cmsghdr; defined($data) ? unpack('i' x (length($data) / 4), $data) : (); }; } } # /eval /BEGIN 1; public-inbox-1.9.0/lib/PublicInbox/CompressNoop.pm000066400000000000000000000007761430031475700221030ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # Provide the same methods as Compress::Raw::Zlib::Deflate but # does no transformation of outgoing data package PublicInbox::CompressNoop; use strict; use Compress::Raw::Zlib qw(Z_OK); sub new { bless \(my $self), __PACKAGE__ } sub deflate { # ($self, $input, $output) $_[2] .= $_[1]; Z_OK; } sub flush { # ($self, $output, $flags = Z_FINISH) $_[1] //= ''; Z_OK; } 1; public-inbox-1.9.0/lib/PublicInbox/Config.pm000066400000000000000000000347211430031475700206560ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # # Used throughout the project for reading configuration # # Note: I hate camelCase; but git-config(1) uses it, but it's better # than alllowercasewithoutunderscores, so use lc('configKey') where # applicable for readability package PublicInbox::Config; use strict; use v5.10.1; use PublicInbox::Inbox; use PublicInbox::Spawn qw(popen_rd); our $LD_PRELOAD = $ENV{LD_PRELOAD}; # only valid at startup our $DEDUPE; # set to {} to dedupe or clear cache sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] } # returns key-value pairs of config directives in a hash # if keys may be multi-value, the value is an array ref containing all values sub new { my ($class, $file, $errfh) = @_; $file //= default_file(); my $self; my $set_dedupe; if (ref($file) eq 'SCALAR') { # used by some tests open my $fh, '<', $file or die; # PerlIO::scalar $self = config_fh_parse($fh, "\n", '='); bless $self, $class; } else { if (-f $file && $DEDUPE) { $file = rel2abs_collapsed($file); $self = $DEDUPE->{$file} and return $self; $set_dedupe = 1; } $self = git_config_dump($class, $file, $errfh); $self->{'-f'} = $file; } # caches $self->{-by_addr} = {}; $self->{-by_list_id} = {}; $self->{-by_name} = {}; $self->{-by_newsgroup} = {}; $self->{-by_eidx_key} = {}; $self->{-no_obfuscate} = {}; $self->{-limiters} = {}; $self->{-code_repos} = {}; # nick => PublicInbox::Git object $self->{-cgitrc_unparsed} = $self->{'publicinbox.cgitrc'}; if (my $no = delete $self->{'publicinbox.noobfuscate'}) { $no = _array($no); my @domains; foreach my $n (@$no) { my @n = split(/\s+/, $n); foreach (@n) { if (/\S+@\S+/) { # full address $self->{-no_obfuscate}->{lc $_} = 1; } else { # allow "example.com" or "@example.com" s/\A@//; push @domains, quotemeta($_); } } } my $nod = join('|', @domains); $self->{-no_obfuscate_re} = qr/(?:$nod)\z/i; } if (my $css = delete $self->{'publicinbox.css'}) { $self->{css} = _array($css); } $DEDUPE->{$file} = $self if $set_dedupe; $self; } sub noop {} sub fill_all ($) { each_inbox($_[0], \&noop) } sub _lookup_fill ($$$) { my ($self, $cache, $key) = @_; $self->{$cache}->{$key} // do { fill_all($self); $self->{$cache}->{$key}; } } sub lookup { my ($self, $recipient) = @_; _lookup_fill($self, '-by_addr', lc($recipient)); } sub lookup_list_id { my ($self, $list_id) = @_; _lookup_fill($self, '-by_list_id', lc($list_id)); } sub lookup_name ($$) { my ($self, $name) = @_; $self->{-by_name}->{$name} // _fill_ibx($self, $name); } sub lookup_ei { my ($self, $name) = @_; $self->{-ei_by_name}->{$name} //= _fill_ei($self, $name); } sub lookup_eidx_key { my ($self, $eidx_key) = @_; _lookup_fill($self, '-by_eidx_key', $eidx_key); } # special case for [extindex "all"] sub ALL { lookup_ei($_[0], 'all') } sub each_inbox { my ($self, $cb, @arg) = @_; # may auto-vivify if config file is non-existent: foreach my $section (@{$self->{-section_order}}) { next if $section !~ m!\Apublicinbox\.([^/]+)\z!; my $ibx = lookup_name($self, $1) or next; $cb->($ibx, @arg); } } sub lookup_newsgroup { my ($self, $ng) = @_; _lookup_fill($self, '-by_newsgroup', lc($ng)); } sub limiter { my ($self, $name) = @_; $self->{-limiters}->{$name} //= do { require PublicInbox::Qspawn; my $max = $self->{"publicinboxlimiter.$name.max"} || 1; my $limiter = PublicInbox::Qspawn::Limiter->new($max); $limiter->setup_rlimit($name, $self); $limiter; }; } sub config_dir { $ENV{PI_DIR} // "$ENV{HOME}/.public-inbox" } sub default_file { $ENV{PI_CONFIG} // (config_dir() . '/config'); } sub config_fh_parse ($$$) { my ($fh, $rs, $fs) = @_; my (%rv, %seen, @section_order, $line, $k, $v, $section, $cur, $i); local $/ = $rs; while (defined($line = <$fh>)) { # perf critical with giant configs $i = index($line, $fs); $k = substr($line, 0, $i); $v = substr($line, $i + 1, -1); # chop off $fs $section = substr($k, 0, rindex($k, '.')); $seen{$section} //= push(@section_order, $section); if (defined($cur = $rv{$k})) { if (ref($cur) eq "ARRAY") { push @$cur, $v; } else { $rv{$k} = [ $cur, $v ]; } } else { $rv{$k} = $v; } } $rv{-section_order} = \@section_order; \%rv; } sub git_config_dump { my ($class, $file, $errfh) = @_; return bless {}, $class unless -e $file; my $cmd = [ qw(git config -z -l --includes), "--file=$file" ]; my $fh = popen_rd($cmd, undef, { 2 => $errfh // 2 }); my $rv = config_fh_parse($fh, "\0", "\n"); close $fh or die "@$cmd failed: \$?=$?\n"; bless $rv, $class; } sub valid_foo_name ($;$) { my ($name, $pfx) = @_; # Similar rules found in git.git/remote.c::valid_remote_nick # and git.git/refs.c::check_refname_component # We don't reject /\.lock\z/, however, since we don't lock refs if ($name eq '' || $name =~ /\@\{/ || $name =~ /\.\./ || $name =~ m![/:\?\[\]\^~\s\f[:cntrl:]\*]! || $name =~ /\A\./ || $name =~ /\.\z/) { warn "invalid $pfx name: `$name'\n" if $pfx; return 0; } # Note: we allow URL-unfriendly characters; users may configure # non-HTTP-accessible inboxes 1; } # XXX needs testing for cgit compatibility # cf. cgit/scan-tree.c::add_repo sub cgit_repo_merge ($$$) { my ($self, $base, $repo) = @_; my $path = $repo->{dir}; if (defined(my $se = $self->{-cgit_strict_export})) { return unless -e "$path/$se"; } return if -e "$path/noweb"; # this comes from the cgit config, and AFAIK cgit only allows # repos to have one URL, but that's just the PATH_INFO component, # not the Host: portion # $repo = { url => 'foo.git', dir => '/path/to/foo.git' } my $rel = $repo->{url}; unless (defined $rel) { my $off = index($path, $base, 0); if ($off != 0) { $rel = $path; } else { $rel = substr($path, length($base) + 1); } $rel =~ s!/\.git\z!! or $rel =~ s!/+\z!!; $self->{-cgit_remove_suffix} and $rel =~ s!/?\.git\z!!; } $self->{"coderepo.$rel.dir"} //= $path; $self->{"coderepo.$rel.cgiturl"} //= _array($rel); } sub is_git_dir ($) { my ($git_dir) = @_; -d "$git_dir/objects" && -f "$git_dir/HEAD"; } # XXX needs testing for cgit compatibility sub scan_path_coderepo { my ($self, $base, $path) = @_; opendir(my $dh, $path) or do { warn "error opening directory: $path\n"; return }; my $git_dir = $path; if (is_git_dir($git_dir) || is_git_dir($git_dir .= '/.git')) { my $repo = { dir => $git_dir }; cgit_repo_merge($self, $base, $repo); return; } while (defined(my $dn = readdir $dh)) { next if $dn eq '.' || $dn eq '..'; if (index($dn, '.') == 0 && !$self->{-cgit_scan_hidden_path}) { next; } my $dir = "$path/$dn"; scan_path_coderepo($self, $base, $dir) if -d $dir; } } sub scan_tree_coderepo ($$) { my ($self, $path) = @_; scan_path_coderepo($self, $path, $path); } sub scan_projects_coderepo ($$$) { my ($self, $list, $path) = @_; open my $fh, '<', $list or do { warn "failed to open cgit projectlist=$list: $!\n"; return; }; while (<$fh>) { chomp; scan_path_coderepo($self, $path, "$path/$_"); } } sub parse_cgitrc { my ($self, $cgitrc, $nesting) = @_; if ($nesting == 0) { # defaults: my %s = map { $_ => 1 } qw(/cgit.css /cgit.png /favicon.ico /robots.txt); $self->{-cgit_static} = \%s; } # same limit as cgit/configfile.c::parse_configfile return if $nesting > 8; open my $fh, '<', $cgitrc or do { warn "failed to open cgitrc=$cgitrc: $!\n"; return; }; # FIXME: this doesn't support macro expansion via $VARS, yet my $repo; while (<$fh>) { chomp; if (m!\Arepo\.url=(.+?)/*\z!) { my $nick = $1; cgit_repo_merge($self, $repo->{dir}, $repo) if $repo; $repo = { url => $nick }; } elsif (m!\Arepo\.path=(.+)\z!) { if (defined $repo) { $repo->{dir} = $1; } else { warn "$_ without repo.url\n"; } } elsif (m!\Ainclude=(.+)\z!) { parse_cgitrc($self, $1, $nesting + 1); } elsif (m!\A(scan-hidden-path|remove-suffix)=([0-9]+)\z!) { my ($k, $v) = ($1, $2); $k =~ tr/-/_/; $self->{"-cgit_$k"} = $v; } elsif (m!\A(project-list|strict-export)=(.+)\z!) { my ($k, $v) = ($1, $2); $k =~ tr/-/_/; $self->{"-cgit_$k"} = $v; } elsif (m!\Ascan-path=(.+)\z!) { if (defined(my $list = $self->{-cgit_project_list})) { scan_projects_coderepo($self, $list, $1); } else { scan_tree_coderepo($self, $1); } } elsif (m!\A(?:css|favicon|logo|repo\.logo)=(/.+)\z!) { # absolute paths for static files via PublicInbox::Cgit $self->{-cgit_static}->{$1} = 1; } } cgit_repo_merge($self, $repo->{dir}, $repo) if $repo; } # parse a code repo, only git is supported at the moment sub fill_code_repo { my ($self, $nick) = @_; my $pfx = "coderepo.$nick"; my $dir = $self->{"$pfx.dir"} // do { # aka "GIT_DIR" warn "$pfx.dir unset\n"; return; }; my $git = PublicInbox::Git->new($dir); if (defined(my $cgits = $self->{"$pfx.cgiturl"})) { $git->{cgit_url} = $cgits = _array($cgits); $self->{"$pfx.cgiturl"} = $cgits; } $git; } sub get_all { my ($self, $key) = @_; my $v = $self->{$key} // return; _array($v); } sub git_bool { my ($val) = $_[-1]; # $_[0] may be $self, or $val if ($val =~ /\A(?:false|no|off|[\-\+]?(?:0x)?0+)\z/i) { 0; } elsif ($val =~ /\A(?:true|yes|on|[\-\+]?(?:0x)?[0-9]+)\z/i) { 1; } else { undef; } } # abs_path resolves symlinks, so we want to avoid it if rel2abs # is sufficient and doesn't leave "/.." or "/../" sub rel2abs_collapsed { require File::Spec; my $p = File::Spec->rel2abs($_[-1]); return $p if substr($p, -3, 3) ne '/..' && index($p, '/../') < 0; require Cwd; Cwd::abs_path($p); } sub get_1 { my ($self, $key) = @_; my $v = $self->{$key}; return $v if !ref($v); warn "W: $key has multiple values, only using `$v->[-1]'\n"; $v->[-1]; } sub repo_objs { my ($self, $ibxish) = @_; my $ibx_code_repos = $ibxish->{coderepo} // return; $ibxish->{-repo_objs} // do { my $code_repos = $self->{-code_repos}; my @repo_objs; for my $nick (@$ibx_code_repos) { my @parts = split(m!/!, $nick); for (@parts) { @parts = () unless valid_foo_name($_); } unless (@parts) { warn "invalid coderepo name: `$nick'\n"; next; } my $repo = $code_repos->{$nick} //= fill_code_repo($self, $nick); push @repo_objs, $repo if $repo; } if (scalar @repo_objs) { $ibxish ->{-repo_objs} = \@repo_objs; } else { delete $ibxish->{coderepo}; } } } sub _fill_ibx { my ($self, $name) = @_; my $pfx = "publicinbox.$name"; my $ibx = {}; for my $k (qw(watch)) { my $v = $self->{"$pfx.$k"}; $ibx->{$k} = $v if defined $v; } for my $k (qw(filter inboxdir newsgroup replyto httpbackendmax feedmax indexlevel indexsequentialshard boost)) { my $v = get_1($self, "$pfx.$k") // next; $ibx->{$k} = $v; } # "mainrepo" is backwards compatibility: my $dir = $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"} // return; if (index($dir, "\n") >= 0) { warn "E: `$dir' must not contain `\\n'\n"; return; } for my $k (qw(obfuscate)) { my $v = $self->{"$pfx.$k"} // next; if (defined(my $bval = git_bool($v))) { $ibx->{$k} = $bval; } else { warn "Ignoring $pfx.$k=$v in config, not boolean\n"; } } # TODO: more arrays, we should support multi-value for # more things to encourage decentralization for my $k (qw(address altid nntpmirror imapmirror coderepo hide listid url infourl watchheader nntpserver imapserver pop3server)) { my $v = $self->{"$pfx.$k"} // next; $ibx->{$k} = _array($v); } return unless valid_foo_name($name, 'publicinbox'); $ibx->{name} = $name; $ibx->{-pi_cfg} = $self; $ibx = PublicInbox::Inbox->new($ibx); foreach (@{$ibx->{address}}) { my $lc_addr = lc($_); $self->{-by_addr}->{$lc_addr} = $ibx; $self->{-no_obfuscate}->{$lc_addr} = 1; } if (my $listids = $ibx->{listid}) { # RFC2919 section 6 stipulates "case insensitive equality" foreach my $list_id (@$listids) { $self->{-by_list_id}->{lc($list_id)} = $ibx; } } if (defined(my $ngname = $ibx->{newsgroup})) { if (ref($ngname)) { delete $ibx->{newsgroup}; warn 'multiple newsgroups not supported: '. join(', ', @$ngname). "\n"; # Newsgroup name needs to be compatible with RFC 3977 # wildmat-exact and RFC 3501 (IMAP) ATOM-CHAR. # Leave out a few chars likely to cause problems or conflicts: # '|', '<', '>', ';', '#', '$', '&', } elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! || $ngname eq '') { delete $ibx->{newsgroup}; warn "newsgroup name invalid: `$ngname'\n"; } else { # PublicInbox::NNTPD does stricter ->nntp_usable # checks, keep this lean for startup speed $self->{-by_newsgroup}->{$ngname} = $ibx; } } unless (defined $ibx->{newsgroup}) { # for ->eidx_key my $abs = rel2abs_collapsed($dir); if ($abs ne $dir) { warn "W: `$dir' canonicalized to `$abs'\n"; $ibx->{inboxdir} = $abs; } } $self->{-by_name}->{$name} = $ibx; if ($ibx->{obfuscate}) { $ibx->{-no_obfuscate} = $self->{-no_obfuscate}; $ibx->{-no_obfuscate_re} = $self->{-no_obfuscate_re}; fill_all($self); # noop to populate -no_obfuscate } if (my $es = ALL($self)) { require PublicInbox::Isearch; $ibx->{isrch} = PublicInbox::Isearch->new($ibx, $es); } $self->{-by_eidx_key}->{$ibx->eidx_key} = $ibx; } sub _fill_ei ($$) { my ($self, $name) = @_; eval { require PublicInbox::ExtSearch } or return; my $pfx = "extindex.$name"; my $d = $self->{"$pfx.topdir"} // return; -d $d or return; if (index($d, "\n") >= 0) { warn "E: `$d' must not contain `\\n'\n"; return; } my $es = PublicInbox::ExtSearch->new($d); for my $k (qw(indexlevel indexsequentialshard)) { my $v = get_1($self, "$pfx.$k") // next; $es->{$k} = $v; } for my $k (qw(coderepo hide url infourl)) { my $v = $self->{"$pfx.$k"} // next; $es->{$k} = _array($v); } return unless valid_foo_name($name, 'extindex'); $es->{name} = $name; $es; } sub urlmatch { my ($self, $key, $url) = @_; state $urlmatch_broken; # requires git 1.8.5 return if $urlmatch_broken; my $file = $self->{'-f'} // default_file(); my $cmd = [qw/git config -z --includes --get-urlmatch/, "--file=$file", $key, $url ]; my $fh = popen_rd($cmd); local $/ = "\0"; my $val = <$fh>; if (close($fh)) { chomp($val); $val; } else { $urlmatch_broken = 1 if (($? >> 8) != 1); undef; } } sub json { state $json; $json //= do { for my $mod (qw(Cpanel::JSON::XS JSON::MaybeXS JSON JSON::PP)) { eval "require $mod" or next; # ->ascii encodes non-ASCII to "\uXXXX" $json = $mod->new->ascii(1) and last; } $json; }; } sub squote_maybe ($) { my ($val) = @_; if ($val =~ m{([^\w@\./,\%\+\-])}) { $val =~ s/(['!])/'\\$1'/g; # '!' for csh return "'$val'"; } $val; } 1; public-inbox-1.9.0/lib/PublicInbox/ConfigIter.pm000066400000000000000000000022521430031475700214740ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # Intended for PublicInbox::DS::event_loop in read-only daemons # to avoid each_inbox() monopolizing the event loop when hundreds/thousands # of inboxes are in play. package PublicInbox::ConfigIter; use strict; use v5.10.1; sub new { my ($class, $pi_cfg, $cb, @args) = @_; my $i = 0; bless [ $pi_cfg, \$i, $cb, @args ], __PACKAGE__; } # for PublicInbox::DS::next_tick, we only call this is if # PublicInbox::DS is already loaded sub event_step { my $self = shift; my ($pi_cfg, $i, $cb, @arg) = @$self; my $section = $pi_cfg->{-section_order}->[$$i++]; eval { $cb->($pi_cfg, $section, @arg) }; warn "E: $@ in ${self}::event_step" if $@; PublicInbox::DS::requeue($self) if defined($section); } # for generic PSGI servers sub each_section { my $self = shift; my ($pi_cfg, $i, $cb, @arg) = @$self; while (defined(my $section = $pi_cfg->{-section_order}->[$$i++])) { eval { $cb->($pi_cfg, $section, @arg) }; warn "E: $@ in ${self}::each_section" if $@; } eval { $cb->($pi_cfg, undef, @arg) }; warn "E: $@ in ${self}::each_section" if $@; } 1; public-inbox-1.9.0/lib/PublicInbox/ContentHash.pm000066400000000000000000000054411430031475700216640ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ # Unstable internal API. # Used for on-the-fly duplicate detection in V2 inboxes. # This is not stored in any database anywhere and may change # as changes in duplicate detection are needed. # See L manpage for more details. package PublicInbox::ContentHash; use strict; use v5.10.1; use parent qw(Exporter); our @EXPORT_OK = qw(content_hash content_digest git_sha); use PublicInbox::MID qw(mids references); use PublicInbox::MsgIter; # not sure if less-widely supported hash families are worth bothering with use Digest::SHA; sub digest_addr ($$$) { my ($dig, $h, $v) = @_; $v =~ tr/"//d; $v =~ tr/\r\n\t / /s; $v =~ s/@([a-z0-9\_\.\-\(\)]*([A-Z])\S*)/'@'.lc($1)/ge; utf8::encode($v); $dig->add("$h\0$v\0"); } sub content_dig_i { my ($dig) = $_[1]; my ($part, $depth, @idx) = @{$_[0]}; $dig->add("\0$depth:".join('.', @idx)."\0"); my $fn = $part->filename; if (defined $fn) { utf8::encode($fn); $dig->add("fn\0$fn\0"); } my @d = $part->header('Content-Description'); foreach my $d (@d) { utf8::encode($d); $dig->add("d\0$d\0"); } $dig->add("b\0"); my $ct = $part->content_type || 'text/plain'; my ($s, undef) = msg_part_text($part, $ct); if (defined $s) { $s =~ s/\r\n/\n/gs; $s =~ s/\s*\z//s; utf8::encode($s); } else { $s = $part->body; } $dig->add($s); } sub content_digest ($;$) { my ($eml, $dig) = @_; $dig //= Digest::SHA->new(256); # References: and In-Reply-To: get used interchangeably # in some "duplicates" in LKML. We treat them the same # in SearchIdx, so treat them the same for this: # do NOT consider the Message-ID as part of the content_hash # if we got here, we've already got Message-ID reuse my %seen = map { $_ => 1 } @{mids($eml)}; foreach my $mid (@{references($eml)}) { $dig->add("ref\0$mid\0") unless $seen{$mid}++; } # Only use Sender: if From is not present foreach my $h (qw(From Sender)) { my @v = $eml->header($h) or next; digest_addr($dig, $h, $_) foreach @v; last; } foreach my $h (qw(Subject Date)) { my @v = $eml->header($h); foreach my $v (@v) { utf8::encode($v); $dig->add("$h\0$v\0"); } } # Some mail processors will add " to unquoted names that were # not in the original message. For the purposes of deduplication, # do not take it into account: foreach my $h (qw(To Cc)) { my @v = $eml->header($h); digest_addr($dig, $h, $_) foreach @v; } msg_iter($eml, \&content_dig_i, $dig); $dig; } sub content_hash ($) { content_digest($_[0])->digest; } sub git_sha ($$) { my ($n, $eml) = @_; my $dig = Digest::SHA->new($n); my $bref = ref($eml) eq 'SCALAR' ? $eml : \($eml->as_string); $dig->add('blob '.length($$bref)."\0"); $dig->add($$bref); $dig; } 1; public-inbox-1.9.0/lib/PublicInbox/DS.pm000066400000000000000000000536601430031475700177620ustar00rootroot00000000000000# This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # This license differs from the rest of public-inbox # # This is a fork of the unmaintained Danga::Socket (1.61) with # significant changes. See Documentation/technical/ds.txt in our # source for details. # # Do not expect this to be a stable API like Danga::Socket, # but it will evolve to suite our needs and to take advantage of # newer Linux and *BSD features. # Bugs encountered were reported to bug-Danga-Socket@rt.cpan.org, # fixed in Danga::Socket 1.62 and visible at: # https://rt.cpan.org/Public/Dist/Display.html?Name=Danga-Socket # # fields: # sock: underlying socket # rbuf: scalarref, usually undef # wbuf: arrayref of coderefs or tmpio (autovivified)) # (tmpio = [ GLOB, offset, [ length ] ]) package PublicInbox::DS; use strict; use v5.10.1; use parent qw(Exporter); use bytes qw(length substr); # FIXME(?): needed for PublicInbox::NNTP use POSIX qw(WNOHANG sigprocmask SIG_SETMASK); use Fcntl qw(SEEK_SET :DEFAULT O_APPEND); use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); use Scalar::Util qw(blessed); use PublicInbox::Syscall qw(:epoll); use PublicInbox::Tmpfile; use Errno qw(EAGAIN EINVAL); use Carp qw(carp croak); our @EXPORT_OK = qw(now msg_more dwaitpid add_timer add_uniq_timer); my %Stack; my $nextq; # queue for next_tick my $wait_pids; # list of [ pid, callback, callback_arg ] my $reap_armed; my $ToClose; # sockets to close when event loop is done our ( %DescriptorMap, # fd (num) -> PublicInbox::DS object $Epoll, # Global epoll fd (or DSKQXS ref) $ep_io, # IO::Handle for Epoll $PostLoopCallback, # subref to call at the end of each loop, if defined (global) $LoopTimeout, # timeout of event loop in milliseconds @Timers, # timers %UniqTimer, $in_loop, ); Reset(); ##################################################################### ### C L A S S M E T H O D S ##################################################################### =head2 C<< CLASS->Reset() >> Reset all state =cut sub Reset { do { $in_loop = undef; # first in case DESTROY callbacks use this %DescriptorMap = (); @Timers = (); %UniqTimer = (); $PostLoopCallback = undef; # we may be iterating inside one of these on our stack my @q = delete @Stack{keys %Stack}; for my $q (@q) { @$q = () } $wait_pids = $nextq = $ToClose = undef; $ep_io = undef; # closes real $Epoll FD $Epoll = undef; # may call DSKQXS::DESTROY } while (@Timers || keys(%Stack) || $nextq || $wait_pids || $ToClose || keys(%DescriptorMap) || $PostLoopCallback || keys(%UniqTimer)); $reap_armed = undef; $LoopTimeout = -1; # no timeout by default } =head2 C<< CLASS->SetLoopTimeout( $timeout ) >> Set the loop timeout for the event loop to some value in milliseconds. A timeout of 0 (zero) means poll forever. A timeout of -1 means poll and return immediately. =cut sub SetLoopTimeout { $LoopTimeout = $_[1] + 0 } sub _add_named_timer { my ($name, $secs, $coderef, @args) = @_; my $fire_time = now() + $secs; my $timer = [$fire_time, $name, $coderef, @args]; if (!@Timers || $fire_time >= $Timers[-1][0]) { push @Timers, $timer; return $timer; } # Now, where do we insert? (NOTE: this appears slow, algorithm-wise, # but it was compared against calendar queues, heaps, naive push/sort, # and a bunch of other versions, and found to be fastest with a large # variety of datasets.) for (my $i = 0; $i < @Timers; $i++) { if ($Timers[$i][0] > $fire_time) { splice(@Timers, $i, 0, $timer); return $timer; } } die "Shouldn't get here."; } sub add_timer { _add_named_timer(undef, @_) } sub add_uniq_timer { # ($name, $secs, $coderef, @args) = @_; $UniqTimer{$_[0]} //= _add_named_timer(@_); } # caller sets return value to $Epoll sub _InitPoller () { if (PublicInbox::Syscall::epoll_defined()) { my $fd = epoll_create(); die "epoll_create: $!" if $fd < 0; open($ep_io, '+<&=', $fd) or return; my $fl = fcntl($ep_io, F_GETFD, 0); fcntl($ep_io, F_SETFD, $fl | FD_CLOEXEC); $fd; } else { my $cls; for (qw(DSKQXS DSPoll)) { $cls = "PublicInbox::$_"; last if eval "require $cls"; } $cls->import(qw(epoll_ctl epoll_wait)); $cls->new; } } sub now () { clock_gettime(CLOCK_MONOTONIC) } sub next_tick () { my $q = $nextq or return; $nextq = undef; $Stack{cur_runq} = $q; for my $obj (@$q) { # avoid "ref" on blessed refs to workaround a Perl 5.16.3 leak: # https://rt.perl.org/Public/Bug/Display.html?id=114340 if (blessed($obj)) { $obj->event_step; } else { $obj->(); } } delete $Stack{cur_runq}; } # runs timers and returns milliseconds for next one, or next event loop sub RunTimers { next_tick(); return (($nextq || $ToClose) ? 0 : $LoopTimeout) unless @Timers; my $now = now(); # Run expired timers while (@Timers && $Timers[0][0] <= $now) { my $to_run = shift(@Timers); delete $UniqTimer{$to_run->[1] // ''}; $to_run->[2]->(@$to_run[3..$#$to_run]); } # timers may enqueue into nextq: return 0 if ($nextq || $ToClose); return $LoopTimeout unless @Timers; # convert time to an even number of milliseconds, adding 1 # extra, otherwise floating point fun can occur and we'll # call RunTimers like 20-30 times, each returning a timeout # of 0.0000212 seconds my $timeout = int(($Timers[0][0] - $now) * 1000) + 1; # -1 is an infinite timeout, so prefer a real timeout ($LoopTimeout < 0 || $LoopTimeout >= $timeout) ? $timeout : $LoopTimeout } sub sig_setmask { sigprocmask(SIG_SETMASK, @_) or die "sigprocmask: $!" } sub block_signals () { my $oldset = POSIX::SigSet->new; my $newset = POSIX::SigSet->new; $newset->fillset or die "fillset: $!"; sig_setmask($newset, $oldset); $oldset; } # We can't use waitpid(-1) safely here since it can hit ``, system(), # and other things. So we scan the $wait_pids list, which is hopefully # not too big. We keep $wait_pids small by not calling dwaitpid() # until we've hit EOF when reading the stdout of the child. sub reap_pids { $reap_armed = undef; my $tmp = $wait_pids or return; $wait_pids = undef; $Stack{reap_runq} = $tmp; my $oldset = block_signals(); foreach my $ary (@$tmp) { my ($pid, $cb, $arg) = @$ary; my $ret = waitpid($pid, WNOHANG); if ($ret == 0) { push @$wait_pids, $ary; # autovivifies @$wait_pids } elsif ($ret == $pid) { if ($cb) { eval { $cb->($arg, $pid) }; warn "E: dwaitpid($pid) in_loop: $@" if $@; } } else { warn "waitpid($pid, WNOHANG) = $ret, \$!=$!, \$?=$?"; } } sig_setmask($oldset); delete $Stack{reap_runq}; } # reentrant SIGCHLD handler (since reap_pids is not reentrant) sub enqueue_reap () { $reap_armed //= requeue(\&reap_pids) } sub in_loop () { $in_loop } # Internal function: run the post-event callback, send read events # for pushed-back data, and close pending connections. returns 1 # if event loop should continue, or 0 to shut it all down. sub PostEventLoop () { # now we can close sockets that wanted to close during our event # processing. (we didn't want to close them during the loop, as we # didn't want fd numbers being reused and confused during the event # loop) if (my $close_now = $ToClose) { $ToClose = undef; # will be autovivified on push @$close_now = map { fileno($_) } @$close_now; # ->DESTROY methods may populate ToClose delete @DescriptorMap{@$close_now}; } # by default we keep running, unless a postloop callback cancels it $PostLoopCallback ? $PostLoopCallback->(\%DescriptorMap) : 1; } # Start processing IO events. In most daemon programs this never exits. See # C for how to exit the loop. sub event_loop (;$$) { my ($sig, $oldset) = @_; $Epoll //= _InitPoller(); require PublicInbox::Sigfd if $sig; my $sigfd = PublicInbox::Sigfd->new($sig, 1) if $sig; local @SIG{keys %$sig} = values(%$sig) if $sig && !$sigfd; local $SIG{PIPE} = 'IGNORE'; if (!$sigfd && $sig) { # wake up every second to accept signals if we don't # have signalfd or IO::KQueue: sig_setmask($oldset); PublicInbox::DS->SetLoopTimeout(1000); } $_[0] = $sigfd = $sig = undef; # $_[0] == sig local $in_loop = 1; my @events; do { my $timeout = RunTimers(); # get up to 1000 events epoll_wait($Epoll, 1000, $timeout, \@events); for my $fd (@events) { # it's possible epoll_wait returned many events, # including some at the end that ones in the front # triggered unregister-interest actions. if we can't # find the %sock entry, it's because we're no longer # interested in that event. # guard stack-not-refcounted w/ Carp + @DB::args my $obj = $DescriptorMap{$fd}; $obj->event_step; } } while (PostEventLoop()); } =head2 C<< CLASS->SetPostLoopCallback( CODEREF ) >> Sets post loop callback function. Pass a subref and it will be called every time the event loop finishes. Return 1 (or any true value) from the sub to make the loop continue, 0 or false and it will exit. The callback function will be passed two parameters: \%DescriptorMap =cut sub SetPostLoopCallback { my ($class, $ref) = @_; # global callback $PostLoopCallback = (defined $ref && ref $ref eq 'CODE') ? $ref : undef; } ##################################################################### ### PublicInbox::DS-the-object code ##################################################################### =head2 OBJECT METHODS =head2 C<< CLASS->new( $socket ) >> Create a new PublicInbox::DS subclass object for the given I which will react to events on it during the C. This is normally (always?) called from your subclass via: $class->SUPER::new($socket); =cut sub new { my ($self, $sock, $ev) = @_; $self->{sock} = $sock; my $fd = fileno($sock); $Epoll //= _InitPoller(); retry: if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) { if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) { $ev &= ~EPOLLEXCLUSIVE; goto retry; } die "EPOLL_CTL_ADD $self/$sock/$fd: $!"; } croak("FD:$fd in use by $DescriptorMap{$fd} (for $self/$sock)") if defined($DescriptorMap{$fd}); $DescriptorMap{$fd} = $self; } # for IMAP, NNTP, and POP3 which greet clients upon connect sub greet { my ($self, $sock) = @_; my $ev = EPOLLIN; my $wbuf; if ($sock->can('accept_SSL') && !$sock->accept_SSL) { return CORE::close($sock) if $! != EAGAIN; $ev = PublicInbox::TLS::epollbit() or return CORE::close($sock); $wbuf = [ \&accept_tls_step, $self->can('do_greet')]; } new($self, $sock, $ev | EPOLLONESHOT); if ($wbuf) { $self->{wbuf} = $wbuf; } else { $self->do_greet; } $self; } ##################################################################### ### I N S T A N C E M E T H O D S ##################################################################### sub requeue ($) { push @$nextq, $_[0] } # autovivifies =head2 C<< $obj->close >> Close the socket. =cut sub close { my ($self) = @_; my $sock = delete $self->{sock} or return; # we need to flush our write buffer, as there may # be self-referential closures (sub { $client->close }) # preventing the object from being destroyed delete $self->{wbuf}; # if we're using epoll, we have to remove this from our epoll fd so we stop getting # notifications about it my $fd = fileno($sock); epoll_ctl($Epoll, EPOLL_CTL_DEL, $fd, 0) and croak("EPOLL_CTL_DEL($self/$sock): $!"); # we explicitly don't delete from DescriptorMap here until we # actually close the socket, as we might be in the middle of # processing an epoll_wait/etc that returned hundreds of fds, one # of which is not yet processed and is what we're closing. if we # keep it in DescriptorMap, then the event harnesses can just # looked at $pob->{sock} == undef and ignore it. but if it's an # un-accounted for fd, then it (understandably) freak out a bit # and emit warnings, thinking their state got off. # defer closing the actual socket until the event loop is done # processing this round of events. (otherwise we might reuse fds) push @$ToClose, $sock; # autovivifies $ToClose return 0; } # portable, non-thread-safe sendfile emulation (no pread, yet) sub send_tmpio ($$) { my ($sock, $tmpio) = @_; sysseek($tmpio->[0], $tmpio->[1], SEEK_SET) or return; my $n = $tmpio->[2] // 65536; $n = 65536 if $n > 65536; defined(my $to_write = sysread($tmpio->[0], my $buf, $n)) or return; my $written = 0; while ($to_write > 0) { if (defined(my $w = syswrite($sock, $buf, $to_write, $written))) { $written += $w; $to_write -= $w; } else { return if $written == 0; last; } } $tmpio->[1] += $written; # offset $tmpio->[2] -= $written if defined($tmpio->[2]); # length $written; } sub epbit ($$) { # (sock, default) $_[0]->can('stop_SSL') ? PublicInbox::TLS::epollbit() : $_[1]; } # returns 1 if done, 0 if incomplete sub flush_write ($) { my ($self) = @_; my $sock = $self->{sock} or return; my $wbuf = $self->{wbuf} or return 1; next_buf: while (my $bref = $wbuf->[0]) { if (ref($bref) ne 'CODE') { while ($sock) { my $w = send_tmpio($sock, $bref); # bref is tmpio if (defined $w) { if ($w == 0) { shift @$wbuf; goto next_buf; } } elsif ($! == EAGAIN) { my $ev = epbit($sock, EPOLLOUT) or return $self->close; epwait($sock, $ev | EPOLLONESHOT); return 0; } else { return $self->close; } } } else { #(ref($bref) eq 'CODE') { shift @$wbuf; my $before = scalar(@$wbuf); $bref->($self); # bref may be enqueueing more CODE to call (see accept_tls_step) return 0 if (scalar(@$wbuf) > $before); } } # while @$wbuf delete $self->{wbuf}; 1; # all done } sub rbuf_idle ($$) { my ($self, $rbuf) = @_; if ($$rbuf eq '') { # who knows how long till we can read again delete $self->{rbuf}; } else { $self->{rbuf} = $rbuf; } } sub do_read ($$$;$) { my ($self, $rbuf, $len, $off) = @_; my $r = sysread(my $sock = $self->{sock}, $$rbuf, $len, $off // 0); return ($r == 0 ? $self->close : $r) if defined $r; # common for clients to break connections without warning, # would be too noisy to log here: if ($! == EAGAIN) { my $ev = epbit($sock, EPOLLIN) or return $self->close; epwait($sock, $ev | EPOLLONESHOT); rbuf_idle($self, $rbuf); 0; } else { $self->close; } } # drop the socket if we hit unrecoverable errors on our system which # require BOFH attention: ENOSPC, EFBIG, EIO, EMFILE, ENFILE... sub drop { my $self = shift; carp(@_); $self->close; } sub tmpio ($$$) { my ($self, $bref, $off) = @_; my $fh = tmpfile('wbuf', $self->{sock}, O_APPEND) or return drop($self, "tmpfile $!"); $fh->autoflush(1); my $len = length($$bref) - $off; my $n = syswrite($fh, $$bref, $len, $off) // return drop($self, "write ($len): $!"); $n == $len or return drop($self, "wrote $n < $len bytes"); [ $fh, 0 ] # [1] = offset, [2] = length, not set by us } =head2 C<< $obj->write( $data ) >> Write the specified data to the underlying handle. I may be scalar, scalar ref, code ref (to run when there). Returns 1 if writes all went through, or 0 if there are writes in queue. If it returns 1, caller should stop waiting for 'writable' events) =cut sub write { my ($self, $data) = @_; # nobody should be writing to closed sockets, but caller code can # do two writes within an event, have the first fail and # disconnect the other side (whose destructor then closes the # calling object, but it's still in a method), and then the # now-dead object does its second write. that is this case. we # just lie and say it worked. it'll be dead soon and won't be # hurt by this lie. my $sock = $self->{sock} or return 1; my $ref = ref $data; my $bref = $ref ? $data : \$data; my $wbuf = $self->{wbuf}; if ($wbuf && scalar(@$wbuf)) { # already buffering, can't write more... if ($ref eq 'CODE') { push @$wbuf, $bref; } else { my $tmpio = $wbuf->[-1]; if ($tmpio && !defined($tmpio->[2])) { # append to tmp file buffer $tmpio->[0]->print($$bref) or return drop($self, "print: $!"); } else { my $tmpio = tmpio($self, $bref, 0) or return 0; push @$wbuf, $tmpio; } } return 0; } elsif ($ref eq 'CODE') { $bref->($self); return 1; } else { my $to_write = length($$bref); my $written = syswrite($sock, $$bref, $to_write); if (defined $written) { return 1 if $written == $to_write; requeue($self); # runs: event_step -> flush_write } elsif ($! == EAGAIN) { my $ev = epbit($sock, EPOLLOUT) or return $self->close; epwait($sock, $ev | EPOLLONESHOT); $written = 0; } else { return $self->close; } # deal with EAGAIN or partial write: my $tmpio = tmpio($self, $bref, $written) or return 0; # wbuf may be an empty array if we're being called inside # ->flush_write via CODE bref: push @{$self->{wbuf}}, $tmpio; # autovivifies return 0; } } use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0; sub msg_more ($$) { my $self = $_[0]; my $sock = $self->{sock} or return 1; my $wbuf = $self->{wbuf}; if (MSG_MORE && (!defined($wbuf) || !scalar(@$wbuf)) && !$sock->can('stop_SSL')) { my $n = send($sock, $_[1], MSG_MORE); if (defined $n) { my $nlen = length($_[1]) - $n; return 1 if $nlen == 0; # all done! # queue up the unwritten substring: my $tmpio = tmpio($self, \($_[1]), $n) or return 0; push @{$self->{wbuf}}, $tmpio; # autovivifies epwait($sock, EPOLLOUT|EPOLLONESHOT); return 0; } } # don't redispatch into NNTPdeflate::write PublicInbox::DS::write($self, \($_[1])); } sub epwait ($$) { my ($sock, $ev) = @_; epoll_ctl($Epoll, EPOLL_CTL_MOD, fileno($sock), $ev) and croak("EPOLL_CTL_MOD($sock): $!"); } # return true if complete, false if incomplete (or failure) sub accept_tls_step ($) { my ($self) = @_; my $sock = $self->{sock} or return; return 1 if $sock->accept_SSL; return $self->close if $! != EAGAIN; my $ev = PublicInbox::TLS::epollbit() or return $self->close; epwait($sock, $ev | EPOLLONESHOT); unshift(@{$self->{wbuf}}, \&accept_tls_step); # autovivifies 0; } # return true if complete, false if incomplete (or failure) sub shutdn_tls_step ($) { my ($self) = @_; my $sock = $self->{sock} or return; return $self->close if $sock->stop_SSL(SSL_fast_shutdown => 1); return $self->close if $! != EAGAIN; my $ev = PublicInbox::TLS::epollbit() or return $self->close; epwait($sock, $ev | EPOLLONESHOT); unshift(@{$self->{wbuf}}, \&shutdn_tls_step); # autovivifies 0; } # don't bother with shutdown($sock, 2), we don't fork+exec w/o CLOEXEC # or fork w/o exec, so no inadvertent socket sharing sub shutdn ($) { my ($self) = @_; my $sock = $self->{sock} or return; if ($sock->can('stop_SSL')) { shutdn_tls_step($self); } else { $self->close; } } sub dflush {} # overridden by DSdeflate sub compressed {} # overridden by DSdeflate sub long_response_done {} # overridden by Net::NNTP sub long_step { my ($self) = @_; # wbuf is unset or empty, here; {long} may add to it my ($fd, $cb, $t0, @args) = @{$self->{long_cb}}; my $more = eval { $cb->($self, @args) }; if ($@ || !$self->{sock}) { # something bad happened... delete $self->{long_cb}; my $elapsed = now() - $t0; $@ and warn("$@ during long response[$fd] - ", sprintf('%0.6f', $elapsed),"\n"); $self->out(" deferred[$fd] aborted - %0.6f", $elapsed); $self->close; } elsif ($more) { # $self->{wbuf}: # control passed to ibx_async_cat if $more == \undef requeue_once($self) if !ref($more); } else { # all done! delete $self->{long_cb}; $self->long_response_done; my $elapsed = now() - $t0; my $fd = fileno($self->{sock}); $self->out(" deferred[$fd] done - %0.6f", $elapsed); my $wbuf = $self->{wbuf}; # do NOT autovivify requeue($self) unless $wbuf && @$wbuf; } } sub requeue_once { my ($self) = @_; # COMPRESS users all share the same DEFLATE context. # Flush it here to ensure clients don't see each other's data $self->dflush; # no recursion, schedule another call ASAP, # but only after all pending writes are done. # autovivify wbuf. wbuf may be populated by $cb, # no need to rearm if so: (push returns new size of array) $self->requeue if push(@{$self->{wbuf}}, \&long_step) == 1; } sub long_response ($$;@) { my ($self, $cb, @args) = @_; # cb returns true if more, false if done my $sock = $self->{sock} or return; # make sure we disable reading during a long response, # clients should not be sending us stuff and making us do more # work while we are stream a response to them $self->{long_cb} = [ fileno($sock), $cb, now(), @args ]; long_step($self); # kick off! undef; } sub dwaitpid ($;$$) { my ($pid, $cb, $arg) = @_; if ($in_loop) { push @$wait_pids, [ $pid, $cb, $arg ]; # We could've just missed our SIGCHLD, cover it, here: enqueue_reap(); } else { my $ret = waitpid($pid, 0); if ($ret == $pid) { if ($cb) { eval { $cb->($arg, $pid) }; carp "E: dwaitpid($pid) !in_loop: $@" if $@; } } else { carp "waitpid($pid, 0) = $ret, \$!=$!, \$?=$?"; } } } 1; =head1 AUTHORS (Danga::Socket) Brad Fitzpatrick - author Michael Granger - docs, testing Mark Smith - contributor, heavy user, testing Matt Sergeant - kqueue support, docs, timers, other bits public-inbox-1.9.0/lib/PublicInbox/DSKQXS.pm000066400000000000000000000101361430031475700204600ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # Licensed the same as Danga::Socket (and Perl5) # License: GPL-1.0+ or Artistic-1.0-Perl # # # # kqueue support via IO::KQueue XS module. This makes kqueue look # like epoll to simplify the code in DS.pm. This is NOT meant to be # an all encompassing emulation of epoll via IO::KQueue, but just to # support cases public-inbox-nntpd/httpd care about. # # It also implements signalfd(2) emulation via "tie". package PublicInbox::DSKQXS; use strict; use warnings; use parent qw(Exporter); use Symbol qw(gensym); use IO::KQueue; use Errno qw(EAGAIN); use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT EPOLLET EPOLL_CTL_ADD EPOLL_CTL_MOD EPOLL_CTL_DEL); our @EXPORT_OK = qw(epoll_ctl epoll_wait); sub EV_DISPATCH () { 0x0080 } # map EPOLL* bits to kqueue EV_* flags for EV_SET sub kq_flag ($$) { my ($bit, $ev) = @_; if ($ev & $bit) { my $fl = EV_ENABLE; $fl |= EV_CLEAR if $fl & EPOLLET; # EV_DISPATCH matches EPOLLONESHOT semantics more closely # than EV_ONESHOT, in that EV_ADD is not required to # re-enable a disabled watch. ($ev & EPOLLONESHOT) ? ($fl | EV_DISPATCH) : $fl; } else { EV_DISABLE; } } sub new { my ($class) = @_; bless { kq => IO::KQueue->new, owner_pid => $$ }, $class; } # returns a new instance which behaves like signalfd on Linux. # It's wasteful in that it uses another FD, but it simplifies # our epoll-oriented code. sub signalfd { my ($class, $signo, $nonblock) = @_; my $sym = gensym; tie *$sym, $class, $signo, $nonblock; # calls TIEHANDLE $sym } sub TIEHANDLE { # similar to signalfd() my ($class, $signo, $nonblock) = @_; my $self = $class->new; $self->{timeout} = $nonblock ? 0 : -1; my $kq = $self->{kq}; $kq->EV_SET($_, EVFILT_SIGNAL, EV_ADD) for @$signo; $self; } sub READ { # called by sysread() for signalfd compatibility my ($self, undef, $len, $off) = @_; # $_[1] = buf die "bad args for signalfd read" if ($len % 128) // defined($off); my $timeout = $self->{timeout}; my $sigbuf = $self->{sigbuf} //= []; my $nr = $len / 128; my $r = 0; $_[1] = ''; do { while ($nr--) { my $signo = shift(@$sigbuf) or last; # caller only cares about signalfd_siginfo.ssi_signo: $_[1] .= pack('L', $signo) . ("\0" x 124); $r += 128; } return $r if $r; my @events = eval { $self->{kq}->kevent($timeout) }; # workaround https://rt.cpan.org/Ticket/Display.html?id=116615 if ($@) { next if $@ =~ /Interrupted system call/; die; } if (!scalar(@events) && $timeout == 0) { $! = EAGAIN; return; } # Grab the kevent.ident (signal number). The kevent.data # field shows coalesced signals, and maybe we'll use it # in the future... @$sigbuf = map { $_->[0] } @events; } while (1); } # for fileno() calls in PublicInbox::DS sub FILENO { ${$_[0]->{kq}} } sub epoll_ctl { my ($self, $op, $fd, $ev) = @_; my $kq = $self->{kq}; if ($op == EPOLL_CTL_MOD) { $kq->EV_SET($fd, EVFILT_READ, kq_flag(EPOLLIN, $ev)); eval { $kq->EV_SET($fd, EVFILT_WRITE, kq_flag(EPOLLOUT, $ev)) }; } elsif ($op == EPOLL_CTL_DEL) { $kq->EV_SET($fd, EVFILT_READ, EV_DISABLE); eval { $kq->EV_SET($fd, EVFILT_WRITE, EV_DISABLE) }; } else { # EPOLL_CTL_ADD $kq->EV_SET($fd, EVFILT_READ, EV_ADD|kq_flag(EPOLLIN, $ev)); # we call this blindly for read-only FDs such as tied # DSKQXS (signalfd emulation) and Listeners eval { $kq->EV_SET($fd, EVFILT_WRITE, EV_ADD | kq_flag(EPOLLOUT, $ev)); }; } 0; } sub epoll_wait { my ($self, $maxevents, $timeout_msec, $events) = @_; @$events = eval { $self->{kq}->kevent($timeout_msec) }; if (my $err = $@) { # workaround https://rt.cpan.org/Ticket/Display.html?id=116615 if ($err =~ /Interrupted system call/) { @$events = (); } else { die $err; } } # caller only cares for $events[$i]->[0] $_ = $_->[0] for @$events; } # kqueue is close-on-fork (not exec), so we must not close it # in forked processes: sub DESTROY { my ($self) = @_; my $kq = delete $self->{kq} or return; if (delete($self->{owner_pid}) == $$) { POSIX::close($$kq); } } 1; public-inbox-1.9.0/lib/PublicInbox/DSPoll.pm000066400000000000000000000030501430031475700205750ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # Licensed the same as Danga::Socket (and Perl5) # License: GPL-1.0+ or Artistic-1.0-Perl # # # # poll(2) via IO::Poll core module. This makes poll look # like epoll to simplify the code in DS.pm. This is NOT meant to be # an all encompassing emulation of epoll via IO::Poll, but just to # support cases public-inbox-nntpd/httpd care about. package PublicInbox::DSPoll; use strict; use warnings; use parent qw(Exporter); use IO::Poll; use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT EPOLL_CTL_DEL); our @EXPORT_OK = qw(epoll_ctl epoll_wait); sub new { bless {}, $_[0] } # fd => events sub epoll_ctl { my ($self, $op, $fd, $ev) = @_; # not wasting time on error checking if ($op != EPOLL_CTL_DEL) { $self->{$fd} = $ev; } else { delete $self->{$fd}; } 0; } sub epoll_wait { my ($self, $maxevents, $timeout_msec, $events) = @_; my @pset; while (my ($fd, $events) = each %$self) { my $pevents = $events & EPOLLIN ? POLLIN : 0; $pevents |= $events & EPOLLOUT ? POLLOUT : 0; push(@pset, $fd, $pevents); } @$events = (); my $n = IO::Poll::_poll($timeout_msec, @pset); if ($n >= 0) { for (my $i = 0; $i < @pset; ) { my $fd = $pset[$i++]; my $revents = $pset[$i++] or next; delete($self->{$fd}) if $self->{$fd} & EPOLLONESHOT; push @$events, $fd; } my $nevents = scalar @$events; if ($n != $nevents) { warn "BUG? poll() returned $n, but got $nevents"; } } } 1; public-inbox-1.9.0/lib/PublicInbox/DSdeflate.pm000066400000000000000000000071461430031475700213050ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # RFC 8054 NNTP COMPRESS DEFLATE implementation # RFC 4978 IMAP COMPRESS=DEFLATE extension # # RSS usage for 10K idle-but-did-something NNTP clients on 64-bit: # TLS + DEFLATE[a] : 1.8 GB (MemLevel=9, 1.2 GB with MemLevel=8) # TLS + DEFLATE[b] : ~300MB # TLS only : <200MB # plain : <50MB # # [a] - initial implementation using per-client Deflate contexts and buffer # # [b] - memory-optimized implementation using a global deflate context. # It's less efficient in terms of compression, but way more # efficient in terms of server memory usage. package PublicInbox::DSdeflate; use strict; use v5.10.1; use Compress::Raw::Zlib; my %IN_OPT = ( -Bufsize => 1024, -WindowBits => -15, # RFC 1951 -AppendOutput => 1, ); # global deflate context and buffer my ($zout, $zbuf); { my $err; $zbuf = \(my $initial = ''); # replaced by $next in dflush/write ($zout, $err) = Compress::Raw::Zlib::Deflate->new( # nnrpd (INN) and Compress::Raw::Zlib favor MemLevel=9, # the zlib C library and git use MemLevel=8 as the default # -MemLevel => 9, -Bufsize => 65536, # same as nnrpd -WindowBits => -15, # RFC 1951 -AppendOutput => 1, ); $err == Z_OK or die "Failed to initialize zlib deflate stream: $err"; } sub enable { my ($class, $self) = @_; my ($in, $err) = Compress::Raw::Zlib::Inflate->new(%IN_OPT); if ($err != Z_OK) { warn("Inflate->new failed: $err\n"); return; } bless $self, $class; $self->{zin} = $in; } # overrides PublicInbox::DS::compressed sub compressed { 1 } sub do_read ($$$$) { my ($self, $rbuf, $len, $off) = @_; my $zin = $self->{zin} or return; # closed my $doff; my $dbuf = delete($self->{dbuf}) // ''; $doff = length($dbuf); my $r = PublicInbox::DS::do_read($self, \$dbuf, $len, $doff) or return; # Workaround inflate bug appending to OOK scalars: # # We only have $off if the client is pipelining, and pipelining # is where our substr() OOK optimization in event_step makes sense. if ($off) { my $copy = $$rbuf; undef $$rbuf; $$rbuf = $copy; } # assert(length($$rbuf) == $off) as far as NNTP.pm is concerned # -ConsumeInput is true, so $dbuf is automatically emptied my $err = $zin->inflate($dbuf, $rbuf); if ($err == Z_OK) { $self->{dbuf} = $dbuf if $dbuf ne ''; $r = length($$rbuf) and return $r; # nothing ready, yet, get more, later $self->requeue; } else { delete $self->{zin}; $self->close; } 0; } # override PublicInbox::DS::msg_more sub msg_more ($$) { my $self = $_[0]; # $_[1] may be a reference or not for ->deflate my $err = $zout->deflate($_[1], $zbuf); $err == Z_OK or die "->deflate failed $err"; 1; } sub dflush ($) { my ($self) = @_; my $deflated = $zbuf; $zbuf = \(my $next = ''); my $err = $zout->flush($deflated, Z_FULL_FLUSH); $err == Z_OK or die "->flush failed $err"; # We can still let the lower socket layer do buffering: PublicInbox::DS::msg_more($self, $$deflated); } # compatible with PublicInbox::DS::write, so $_[1] may be a reference or not sub write ($$) { my $self = $_[0]; return PublicInbox::DS::write($self, $_[1]) if ref($_[1]) eq 'CODE'; my $deflated = $zbuf; $zbuf = \(my $next = ''); # $_[1] may be a reference or not for ->deflate my $err = $zout->deflate($_[1], $deflated); $err == Z_OK or die "->deflate failed $err"; $err = $zout->flush($deflated, Z_FULL_FLUSH); $err == Z_OK or die "->flush failed $err"; # We can still let the socket layer do buffering: PublicInbox::DS::write($self, $deflated); } 1; public-inbox-1.9.0/lib/PublicInbox/Daemon.pm000066400000000000000000000504171430031475700206540ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # # Contains common daemon code for the httpd, imapd, and nntpd servers # and designed for handling thousands of untrusted clients over slow # and/or lossy connections. package PublicInbox::Daemon; use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use IO::Handle; # ->autoflush use IO::Socket; use File::Spec; use POSIX qw(WNOHANG :signal_h); use Socket qw(IPPROTO_TCP SOL_SOCKET); STDOUT->autoflush(1); STDERR->autoflush(1); use PublicInbox::DS qw(now); use PublicInbox::Listener; use PublicInbox::EOFpipe; use PublicInbox::Sigfd; use PublicInbox::Git; use PublicInbox::GitAsyncCat; use PublicInbox::Eml; use PublicInbox::Config; our $SO_ACCEPTFILTER = 0x1000; my @CMD; my ($set_user, $oldset); my (@cfg_listen, $stdout, $stderr, $group, $user, $pid_file, $daemonize); my $worker_processes = 1; my @listeners; my (%pids, %logs); my %tls_opt; # scheme://sockname => args for IO::Socket::SSL::SSL_Context->new my $reexec_pid; my ($uid, $gid); my ($default_cert, $default_key); my %KNOWN_TLS = (443 => 'https', 563 => 'nntps', 993 => 'imaps', 995 =>'pop3s'); my %KNOWN_STARTTLS = (110 => 'pop3', 119 => 'nntp', 143 => 'imap'); my %SCHEME2PORT = map { $KNOWN_TLS{$_} => $_ + 0 } keys %KNOWN_TLS; for (keys %KNOWN_STARTTLS) { $SCHEME2PORT{$KNOWN_STARTTLS{$_}} = $_ + 0 } $SCHEME2PORT{http} = 80; sub listener_opt ($) { my ($str) = @_; # opt1=val1,opt2=val2 (opt may repeat for multi-value) my $o = {}; # allow ',' as delimiter since '&' is shell-unfriendly for (split(/[,&]/, $str)) { my ($k, $v) = split(/=/, $_, 2); push @{$o->{$k}}, $v; } # key may be a part of cert. At least # p5-io-socket-ssl/example/ssl_server.pl has this fallback: $o->{cert} //= [ $default_cert ] if defined($default_cert); $o->{key} //= defined($default_key) ? [ $default_key ] : $o->{cert}; $o; } sub check_absolute ($$) { my ($var, $val) = @_; die <{cert}); require PublicInbox::TLS; my @ctx_opt; # parse out hostname:/path/to/ mappings: for my $k (qw(cert key)) { $o->{$k} // next; push(@ctx_opt, "SSL_${k}_file", {}); foreach my $path (@{$o->{$k}}) { my $host = ''; $path =~ s/\A([^:]+):// and $host = $1; $ctx_opt[-1]->{$host} = $path; check_absolute($k, $path) if $daemonize; } } \@ctx_opt; } sub do_chown ($) { $uid // return; my ($path) = @_; chown($uid, $gid, $path) or warn "chown $path: $!\n"; } sub open_log_path ($$) { # my ($fh, $path) = @_; # $_[0] is modified open $_[0], '>>', $_[1] or die "open(>> $_[1]): $!"; $_[0]->autoflush(1); do_chown($_[1]); $_[0]; } sub load_mod ($;$$) { my ($scheme, $opt, $addr) = @_; my $modc = "PublicInbox::\U$scheme"; $modc =~ s/S\z//; my $mod = $modc.'D'; eval "require $mod"; # IMAPD|HTTPD|NNTPD|POP3D die $@ if $@; my %xn; my $tlsd = $xn{tlsd} = $mod->new; my %env = map { substr($_, length('env.')) => $opt->{$_}->[-1]; } grep(/\Aenv\./, keys %$opt); $xn{refresh} = sub { my ($sig) = @_; local @ENV{keys %env} = values %env; $tlsd->refresh_groups($sig); }; $xn{post_accept} = $tlsd->can('post_accept_cb') ? $tlsd->post_accept_cb : sub { $modc->new($_[0], $tlsd) }; my @paths = qw(out err); if ($modc eq 'PublicInbox::HTTP') { @paths = qw(err); $xn{af_default} = 'httpready'; if (my $p = $opt->{psgi}) { die "multiple psgi= options specified\n" if @$p > 1; check_absolute('psgi=', $p->[0]) if $daemonize; $tlsd->{psgi} = $p->[0]; warn "# $scheme://$addr psgi=$p->[0]\n"; } } for my $f (@paths) { my $p = $opt->{$f} or next; die "multiple $f= options specified\n" if @$p > 1; check_absolute("$f=", $p->[0]) if $daemonize; $p = File::Spec->canonpath($p->[0]); $tlsd->{$f} = $logs{$p} //= open_log_path(my $fh, $p); warn "# $scheme://$addr $f=$p\n"; } my $err = $tlsd->{err}; $tlsd->{warn_cb} = sub { print $err @_ }; # for local $SIG{__WARN__} \%xn; } sub daemon_prepare ($$) { my ($default_listen, $xnetd) = @_; my $listener_names = {}; # sockname => IO::Handle $oldset = PublicInbox::DS::block_signals(); @CMD = ($0, @ARGV); my ($prog) = ($CMD[0] =~ m!([^/]+)\z!g); my $dh = defined($default_listen) ? " (default: $default_listen)" : ''; my $help = < \@cfg_listen, '1|stdout=s' => \$stdout, '2|stderr=s' => \$stderr, 'W|worker-processes=i' => \$worker_processes, 'P|pid-file=s' => \$pid_file, 'u|user=s' => \$user, 'g|group=s' => \$group, 'D|daemonize' => \$daemonize, 'cert=s' => \$default_cert, 'key=s' => \$default_key, 'help|h' => \(my $show_help), ); GetOptions(%opt) or die $help; if ($show_help) { print $help; exit 0 }; $_ = File::Spec->canonpath($_ // next) for ($stdout, $stderr); if (defined $pid_file && $pid_file =~ /\.oldbin\z/) { die "--pid-file cannot end with '.oldbin'\n"; } @listeners = inherit($listener_names); my @inherited_names = keys(%$listener_names); # ignore daemonize when inheriting $daemonize = undef if scalar @listeners; unless (@listeners || @cfg_listen) { $default_listen // die "no listeners specified\n"; push @cfg_listen, $default_listen } my ($default_scheme) = (($default_listen // '') =~ m!\A([^:]+)://!); foreach my $l (@cfg_listen) { my $orig = $l; my ($scheme, $port, $opt); $l =~ s!\A([a-z0-9]+)://!! and $scheme = $1; $scheme //= $default_scheme; if ($l =~ /\A(?:\[[^\]]+\]|[^:]+):([0-9]+)/) { $port = $1 + 0; $scheme //= $KNOWN_TLS{$port} // $KNOWN_STARTTLS{$port}; } $scheme // die "unable to determine URL scheme of $orig\n"; if (!defined($port) && index($l, '/') != 0) { # AF_UNIX socket $port = $SCHEME2PORT{$scheme} // die "no port in listen=$orig\n"; $l =~ s!\A([^/]+)!$1:$port! or die "unable to add port=$port to $l\n"; } $l =~ s!/\z!!; # chop one trailing slash if ($l =~ s!/?\?(.+)\z!!) { $opt = listener_opt($1); $tls_opt{"$scheme://$l"} = accept_tls_opt($opt); } elsif (defined($default_cert)) { $tls_opt{"$scheme://$l"} = accept_tls_opt(''); } elsif ($scheme =~ /\A(?:https|imaps|nntps|pop3s)\z/) { die "$orig specified w/o cert=\n"; } if ($listener_names->{$l}) { # already inherited $xnetd->{$l} = load_mod($scheme, $opt, $l); next; } my (%o, $sock_pkg); if (index($l, '/') == 0) { $sock_pkg = 'IO::Socket::UNIX'; eval "use $sock_pkg"; die $@ if $@; %o = (Type => SOCK_STREAM, Peer => $l); if (-S $l) { my $c = $sock_pkg->new(%o); if (!defined($c) && $!{ECONNREFUSED}) { unlink $l or die "failed to unlink stale socket=$l: $!\n"; } # else: let the bind fail } $o{Local} = delete $o{Peer}; } else { # both work for IPv4, too for (qw(IO::Socket::IP IO::Socket::INET6)) { $sock_pkg = $_; eval "use $sock_pkg"; $@ or last; } die $@ if $@; %o = (LocalAddr => $l, ReuseAddr => 1, Proto => 'tcp'); } $o{Listen} = 1024; my $prev = umask 0000; my $s = eval { $sock_pkg->new(%o) } or warn "error binding $l: $! ($@)\n"; umask $prev; $s // next; $s->blocking(0); my $sockname = sockname($s); warn "# bound $scheme://$sockname\n"; $xnetd->{$sockname} //= load_mod($scheme); $listener_names->{$sockname} = $s; push @listeners, $s; } # cert/key options in @cfg_listen takes precedence when inheriting, # but map well-known inherited ports if --listen isn't specified # at all. This allows socket-activation users to set certs once # and not have to configure each socket: if (defined $default_cert) { my ($stls) = (($default_scheme // '') =~ /\A(pop3|nntp|imap)/); for my $x (@inherited_names) { $x =~ /:([0-9]+)\z/ or next; # no TLS for AF_UNIX if (my $scheme = $KNOWN_TLS{$1}) { $xnetd->{$x} //= load_mod($scheme); $tls_opt{"$scheme://$x"} ||= accept_tls_opt(''); } elsif (($scheme = $KNOWN_STARTTLS{$1})) { $xnetd->{$x} //= load_mod($scheme); $tls_opt{"$scheme://$x"} ||= accept_tls_opt(''); } elsif (defined $stls) { $tls_opt{"$stls://$x"} ||= accept_tls_opt(''); } } } if (defined $default_scheme) { for my $x (@inherited_names) { $xnetd->{$x} //= load_mod($default_scheme); } } die "No listeners bound\n" unless @listeners; } sub daemonize () { if ($daemonize) { require Cwd; foreach my $i (0..$#ARGV) { my $arg = $ARGV[$i]; next unless -e $arg; $ARGV[$i] = Cwd::abs_path($arg); } check_absolute('--stdout', $stdout); check_absolute('--stderr', $stderr); check_absolute('--pid-file', $pid_file); check_absolute('--cert', $default_cert); check_absolute('--key', $default_key); chdir '/' or die "chdir failed: $!"; } if (defined($pid_file) || defined($group) || defined($user)) { eval { require Net::Server::Daemonize; 1 } // die <&STDIN' or die "redirect stdout failed: $!\n"; open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n"; POSIX::setsid(); $pid = fork // die "fork: $!"; exit if $pid; } return unless defined $pid_file; write_pid($pid_file); # for ->DESTROY: bless { pid => $$, pid_file => \$pid_file }, __PACKAGE__; } sub worker_quit { # $_[0] = signal name or number (unused) # killing again terminates immediately: exit unless @listeners; $_->close foreach @listeners; # call PublicInbox::DS::close @listeners = (); my $proc_name; my $warn = 0; # drop idle connections and try to quit gracefully PublicInbox::DS->SetPostLoopCallback(sub { my ($dmap, undef) = @_; my $n = 0; my $now = now(); for my $s (values %$dmap) { $s->can('busy') or next; if ($s->busy) { ++$n; } else { # close as much as possible, early as possible $s->close; } } if ($n) { if (($warn + 5) < now()) { warn "$$ quitting, $n client(s) left\n"; $warn = now(); } unless (defined $proc_name) { $proc_name = (split(/\s+/, $0))[0]; $proc_name =~ s!\A.*?([^/]+)\z!$1!; } $0 = "$proc_name quitting, $n client(s) left"; } $n; # true: loop continues, false: loop breaks }); } sub reopen_logs { $logs{$stdout} //= \*STDOUT if defined $stdout; $logs{$stderr} //= \*STDERR if defined $stderr; while (my ($p, $fh) = each %logs) { open_log_path($fh, $p) } } sub sockname ($) { my ($s) = @_; my $addr = getsockname($s) or return; my ($host, $port) = host_with_port($addr); if ($port == 0 && $host eq '127.0.0.1') { my ($path) = Socket::sockaddr_un($addr); return $path; } "$host:$port"; } sub unpack_ipv6 ($) { my ($addr) = @_; my ($port, $host); # Socket.pm in Perl 5.14+ supports IPv6: eval { ($port, $host) = Socket::unpack_sockaddr_in6($addr); $host = Socket::inet_ntop(Socket::AF_INET6(), $host); }; if ($@) { # Perl 5.12 or earlier? SpamAssassin and Net::Server use # Socket6, so it may be installed on our system, already # (otherwise die here): require Socket6; ($port, $host) = Socket6::unpack_sockaddr_in6($addr); $host = Socket6::inet_ntop(Socket6::AF_INET6(), $host); } ($host, $port); } sub host_with_port ($) { my ($addr) = @_; my ($port, $host); # this eval will die on Unix sockets: eval { if (length($addr) >= 28) { ($host, $port) = unpack_ipv6($addr); $host = "[$host]"; } else { ($port, $host) = Socket::sockaddr_in($addr); $host = Socket::inet_ntoa($host); } }; $@ ? ('127.0.0.1', 0) : ($host, $port); } sub inherit ($) { my ($listener_names) = @_; return () if ($ENV{LISTEN_PID} || 0) != $$; my $fds = $ENV{LISTEN_FDS} or return (); my $end = $fds + 2; # LISTEN_FDS_START - 1 my @rv = (); foreach my $fd (3..$end) { open(my $s, '<&=', $fd) or warn "fdopen fd=$fd: $!"; if (my $k = sockname($s)) { my $prev_was_blocking = $s->blocking(0); warn <<"" if $prev_was_blocking; Inherited socket ($k fd=$fd) is blocking, making it non-blocking. Set 'NonBlocking = true' in the systemd.service unit to avoid stalled processes when multiple service instances start. $listener_names->{$k} = $s; warn "# inherited $k fd=$fd\n"; push @rv, $s; } else { warn "failed to inherit fd=$fd (LISTEN_FDS=$fds)"; } } @rv } sub upgrade { # $_[0] = signal name or number (unused) if ($reexec_pid) { warn "upgrade in-progress: $reexec_pid\n"; return; } if (defined $pid_file) { if ($pid_file =~ /\.oldbin\z/) { warn "BUG: .oldbin suffix exists: $pid_file\n"; return; } unlink_pid_file_safe_ish($$, $pid_file); $pid_file .= '.oldbin'; write_pid($pid_file); } my $pid = fork; unless (defined $pid) { warn "fork failed: $!\n"; return; } if ($pid == 0) { use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD); $ENV{LISTEN_FDS} = scalar @listeners; $ENV{LISTEN_PID} = $$; foreach my $s (@listeners) { # @listeners are globs with workers, PI::L w/o workers $s = $s->{sock} if ref($s) eq 'PublicInbox::Listener'; my $fl = fcntl($s, F_GETFD, 0); fcntl($s, F_SETFD, $fl &= ~FD_CLOEXEC); } exec @CMD; die "Failed to exec: $!\n"; } $reexec_pid = $pid; } sub kill_workers ($) { my ($sig) = @_; kill $sig, keys(%pids); } sub upgrade_aborted ($) { my ($p) = @_; warn "reexec PID($p) died with: $?\n"; $reexec_pid = undef; return unless $pid_file; my $file = $pid_file; $file =~ s/\.oldbin\z// or die "BUG: no '.oldbin' suffix in $file"; unlink_pid_file_safe_ish($$, $pid_file); $pid_file = $file; eval { write_pid($pid_file) }; warn $@, "\n" if $@; } sub reap_children { # $_[0] = 'CHLD' or POSIX::SIGCHLD() while (1) { my $p = waitpid(-1, WNOHANG) or return; if (defined $reexec_pid && $p == $reexec_pid) { upgrade_aborted($p); } elsif (defined(my $id = delete $pids{$p})) { warn "worker[$id] PID($p) died with: $?\n"; } elsif ($p > 0) { warn "unknown PID($p) reaped: $?\n"; } else { return; } } } sub unlink_pid_file_safe_ish ($$) { my ($unlink_pid, $file) = @_; return unless defined $unlink_pid && $unlink_pid == $$; open my $fh, '<', $file or return; local $/ = "\n"; defined(my $read_pid = <$fh>) or return; chomp $read_pid; if ($read_pid == $unlink_pid) { Net::Server::Daemonize::unlink_pid_file($file); } } sub master_quit ($) { exit unless @listeners; @listeners = (); kill_workers($_[0]); } sub master_loop { pipe(my ($p0, $p1)) or die "failed to create parent-pipe: $!"; my $set_workers = $worker_processes; reopen_logs(); my $ignore_winch; my $sig = { USR1 => sub { reopen_logs(); kill_workers($_[0]); }, USR2 => \&upgrade, QUIT => \&master_quit, INT => \&master_quit, TERM => \&master_quit, WINCH => sub { return if $ignore_winch || !@listeners; if (-t STDIN || -t STDOUT || -t STDERR) { $ignore_winch = 1; warn < sub { return unless @listeners; $worker_processes = $set_workers; kill_workers($_[0]); }, TTIN => sub { return unless @listeners; if ($set_workers > $worker_processes) { ++$worker_processes; } else { $worker_processes = ++$set_workers; } }, TTOU => sub { $worker_processes = --$set_workers if $set_workers > 0; }, CHLD => \&reap_children, }; my $sigfd = PublicInbox::Sigfd->new($sig); local @SIG{keys %$sig} = values(%$sig) unless $sigfd; PublicInbox::DS::sig_setmask($oldset) if !$sigfd; while (1) { # main loop my $n = scalar keys %pids; unless (@listeners) { exit if $n == 0; $set_workers = $worker_processes = $n = 0; } if ($n > $worker_processes) { while (my ($k, $v) = each %pids) { kill('TERM', $k) if $v >= $worker_processes; } $n = $worker_processes; } my $want = $worker_processes - 1; if ($n <= $want) { PublicInbox::DS::block_signals() if !$sigfd; for my $i ($n..$want) { my $seed = rand(0xffffffff); my $pid = fork; if (!defined $pid) { warn "failed to fork worker[$i]: $!\n"; } elsif ($pid == 0) { srand($seed); eval { Net::SSLeay::randomize() }; $set_user->() if $set_user; return $p0; # run normal work code } else { warn "PID=$pid is worker[$i]\n"; $pids{$pid} = $i; } } PublicInbox::DS::sig_setmask($oldset) if !$sigfd; } if ($sigfd) { # Linux and IO::KQueue users: $sigfd->wait_once; } else { # wake up every second sleep(1); } } exit # never gets here, just for documentation } sub tls_cb { my ($post_accept, $tlsd) = @_; sub { my ($io, $addr, $srv) = @_; $post_accept->(PublicInbox::TLS::start($io, $tlsd), $addr, $srv) } } sub defer_accept ($$) { my ($s, $af_name) = @_; return unless defined $af_name; if ($^O eq 'linux') { my $TCP_DEFER_ACCEPT = 9; # Socket::TCP_DEFER_ACCEPT is in 5.14+ my $x = getsockopt($s, IPPROTO_TCP, $TCP_DEFER_ACCEPT); return unless defined $x; # may be Unix socket my $sec = unpack('i', $x); return if $sec > 0; # systemd users may set a higher value setsockopt($s, IPPROTO_TCP, $TCP_DEFER_ACCEPT, 1); } elsif ($^O eq 'freebsd') { my $x = getsockopt($s, SOL_SOCKET, $SO_ACCEPTFILTER); return if defined $x; # don't change if set my $accf_arg = pack('a16a240', $af_name, ''); setsockopt($s, SOL_SOCKET, $SO_ACCEPTFILTER, $accf_arg); } } sub daemon_loop ($) { my ($xnetd) = @_; local $PublicInbox::Config::DEDUPE = {}; # enable dedupe cache my $refresh = sub { my ($sig) = @_; %$PublicInbox::Config::DEDUPE = (); # clear cache for my $xn (values %$xnetd) { delete $xn->{tlsd}->{ssl_ctx}; # PublicInbox::TLS::start eval { $xn->{refresh}->($sig) }; warn "refresh $@\n" if $@; } }; my %post_accept; while (my ($k, $ctx_opt) = each %tls_opt) { $ctx_opt // next; my ($scheme, $l) = split(m!://!, $k, 2); my $xn = $xnetd->{$l} // die "BUG: no xnetd for $k"; $xn->{tlsd}->{ssl_ctx_opt} //= $ctx_opt; $scheme =~ m!\A(?:https|imaps|nntps|pop3s)! and $post_accept{$l} = tls_cb(@$xn{qw(post_accept tlsd)}); } undef %tls_opt; my $sig = { HUP => $refresh, INT => \&worker_quit, QUIT => \&worker_quit, TERM => \&worker_quit, TTIN => 'IGNORE', TTOU => 'IGNORE', USR1 => \&reopen_logs, USR2 => 'IGNORE', WINCH => 'IGNORE', CHLD => \&PublicInbox::DS::enqueue_reap, }; if ($worker_processes > 0) { $refresh->(); # preload by default my $fh = master_loop(); # returns if in child process PublicInbox::EOFpipe->new($fh, \&worker_quit, undef); } else { reopen_logs(); $set_user->() if $set_user; $sig->{USR2} = sub { worker_quit() if upgrade() }; $refresh->(); } $uid = $gid = undef; reopen_logs(); @listeners = map {; my $l = sockname($_); my $tls_cb = $post_accept{$l}; my $xn = $xnetd->{$l} // die "BUG: no xnetd for $l"; # NNTPS, HTTPS, HTTP, IMAPS and POP3S are client-first traffic # IMAP, NNTP and POP3 are server-first defer_accept($_, $tls_cb ? 'dataready' : $xn->{af_default}); # this calls epoll_create: PublicInbox::Listener->new($_, $tls_cb || $xn->{post_accept}) } @listeners; PublicInbox::DS::event_loop($sig, $oldset); } sub run { my ($default_listen) = @_; daemon_prepare($default_listen, my $xnetd = {}); my $for_destroy = daemonize(); # localize GCF2C for tests: local $PublicInbox::GitAsyncCat::GCF2C; local $PublicInbox::Git::async_warn = 1; local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); daemon_loop($xnetd); PublicInbox::DS->Reset; # ->DESTROY runs when $for_destroy goes out-of-scope } sub write_pid ($) { my ($path) = @_; Net::Server::Daemonize::create_pid_file($path); do_chown($path); } sub DESTROY { unlink_pid_file_safe_ish($_[0]->{pid}, ${$_[0]->{pid_file}}); } 1; public-inbox-1.9.0/lib/PublicInbox/DirIdle.pm000066400000000000000000000051701430031475700207610ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # Used by public-inbox-watch for Maildir (and possibly MH in the future) package PublicInbox::DirIdle; use strict; use parent 'PublicInbox::DS'; use PublicInbox::Syscall qw(EPOLLIN); use PublicInbox::In2Tie; my ($MAIL_IN, $MAIL_GONE, $ino_cls); if ($^O eq 'linux' && eval { require Linux::Inotify2; 1 }) { $MAIL_IN = Linux::Inotify2::IN_MOVED_TO() | Linux::Inotify2::IN_CREATE(); $MAIL_GONE = Linux::Inotify2::IN_DELETE() | Linux::Inotify2::IN_DELETE_SELF() | Linux::Inotify2::IN_MOVE_SELF() | Linux::Inotify2::IN_MOVED_FROM(); $ino_cls = 'Linux::Inotify2'; # Perl 5.22+ is needed for fileno(DIRHANDLE) support: } elsif ($^V ge v5.22 && eval { require PublicInbox::KQNotify }) { $MAIL_IN = PublicInbox::KQNotify::MOVED_TO_OR_CREATE(); $MAIL_GONE = PublicInbox::KQNotify::NOTE_DELETE() | PublicInbox::KQNotify::NOTE_REVOKE() | PublicInbox::KQNotify::NOTE_RENAME(); $ino_cls = 'PublicInbox::KQNotify'; } else { require PublicInbox::FakeInotify; $MAIL_IN = PublicInbox::FakeInotify::MOVED_TO_OR_CREATE(); $MAIL_GONE = PublicInbox::FakeInotify::IN_DELETE() | PublicInbox::FakeInotify::IN_DELETE_SELF() | PublicInbox::FakeInotify::IN_MOVE_SELF(); } sub new { my ($class, $cb) = @_; my $self = bless { cb => $cb }, $class; my $inot; if ($ino_cls) { $inot = $ino_cls->new or die "E: $ino_cls->new: $!"; my $io = PublicInbox::In2Tie::io($inot); $self->SUPER::new($io, EPOLLIN); } else { require PublicInbox::FakeInotify; $inot = PublicInbox::FakeInotify->new; # starts timer } $self->{inot} = $inot; $self; } sub add_watches { my ($self, $dirs, $gone) = @_; my $fl = $MAIL_IN | ($gone ? $MAIL_GONE : 0); my @ret; for my $d (@$dirs) { my $w = $self->{inot}->watch($d, $fl) or next; push @ret, $w; } PublicInbox::FakeInotify::poll_once($self) if !$ino_cls; @ret } sub rm_watches { my ($self, $dir) = @_; my $inot = $self->{inot}; if (my $cb = $inot->can('rm_watches')) { # TODO for fake watchers $cb->($inot, $dir); } } sub event_step { my ($self) = @_; my $cb = $self->{cb}; local $PublicInbox::DS::in_loop = 0; # waitpid() synchronously eval { my @events = $self->{inot}->read; # Linux::Inotify2->read $cb->($_) for @events; }; warn "$self->{inot}->read err: $@\n" if $@; } sub force_close { my ($self) = @_; my $inot = delete $self->{inot} // return; if ($inot->can('fh')) { # Linux::Inotify2 2.3+ close($inot->fh) or warn "CLOSE ERROR: $!"; } elsif ($inot->isa('Linux::Inotify2')) { require PublicInbox::LI2Wrap; PublicInbox::LI2Wrap::wrapclose($inot); } } 1; public-inbox-1.9.0/lib/PublicInbox/DummyInbox.pm000066400000000000000000000012131430031475700215320ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # # An EXAMINE-able, PublicInbox::Inbox-like object for IMAP. Some # IMAP clients don't like having unselectable parent mailboxes, # so we have a dummy package PublicInbox::DummyInbox; use strict; sub uidvalidity { 0 } # Msgmap::created_at sub mm { shift } sub uid_range { [] } # Over::uid_range sub subscribe_unlock { undef }; no warnings 'once'; *max = \&uidvalidity; *query_xover = \&uid_range; *over = \&mm; *isrch = *search = *unsubscribe_unlock = *get_art = *description = *base_url = \&subscribe_unlock; 1; public-inbox-1.9.0/lib/PublicInbox/EOFpipe.pm000066400000000000000000000011601430031475700207270ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ package PublicInbox::EOFpipe; use strict; use parent qw(PublicInbox::DS); use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); sub new { my (undef, $rd, $cb, $arg) = @_; my $self = bless { cb => $cb, arg => $arg }, __PACKAGE__; # 1031: F_SETPIPE_SZ, 4096: page size fcntl($rd, 1031, 4096) if $^O eq 'linux'; $self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT); } sub event_step { my ($self) = @_; if ($self->do_read(my $buf, 1) == 0) { # auto-closed $self->{cb}->($self->{arg}); } } 1; public-inbox-1.9.0/lib/PublicInbox/Emergency.pm000066400000000000000000000037741430031475700213730ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # # Emergency Maildir delivery for MDA package PublicInbox::Emergency; use strict; use v5.10.1; use Fcntl qw(:DEFAULT SEEK_SET); use Sys::Hostname qw(hostname); use IO::Handle; # ->flush use Errno qw(EEXIST); sub new { my ($class, $dir) = @_; foreach (qw(new tmp cur)) { my $d = "$dir/$_"; next if -d $d; require File::Path; if (!File::Path::mkpath($d) && !-d $d) { die "failed to mkpath($d): $!\n"; } } bless { dir => $dir, t => 0 }, $class; } sub _fn_in { my ($self, $pid, $dir) = @_; my $host = $self->{short_host} //= (split(/\./, hostname))[0]; my $now = time; my $n; if ($self->{t} != $now) { $self->{t} = $now; $n = $self->{cnt} = 0; } else { $n = ++$self->{cnt}; } "$self->{dir}/$dir/$self->{t}.$pid"."_$n.$host"; } sub prepare { my ($self, $strref) = @_; my $pid = $$; my $tmp_key = "tmp.$pid"; die "already in transaction: $self->{$tmp_key}" if $self->{$tmp_key}; my ($tmp, $fh); do { $tmp = _fn_in($self, $pid, 'tmp'); $! = undef; } while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_RDWR) and $! == EEXIST); print $fh $$strref or die "write failed: $!"; $fh->flush or die "flush failed: $!"; $self->{fh} = $fh; $self->{$tmp_key} = $tmp; } sub abort { my ($self) = @_; delete $self->{fh}; my $tmp = delete $self->{"tmp.$$"} or return; unlink($tmp) or warn "Failed to unlink $tmp: $!"; undef; } sub fh { my ($self) = @_; my $fh = $self->{fh} or die "{fh} not open!\n"; seek($fh, 0, SEEK_SET) or die "seek(fh) failed: $!"; sysseek($fh, 0, SEEK_SET) or die "sysseek(fh) failed: $!"; $fh; } sub commit { my ($self) = @_; my $pid = $$; my $tmp = delete $self->{"tmp.$pid"} or return; delete $self->{fh}; my ($new, $ok); do { $new = _fn_in($self, $pid, 'new'); } while (!($ok = link($tmp, $new)) && $! == EEXIST); die "link($tmp, $new): $!" unless $ok; unlink($tmp) or warn "Failed to unlink $tmp: $!"; } sub DESTROY { commit($_[0]) } 1; public-inbox-1.9.0/lib/PublicInbox/Eml.pm000066400000000000000000000374751430031475700201770ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # # Lazy MIME parser, it still slurps the full message but keeps short # lifetimes. Unlike Email::MIME, it doesn't pre-split multipart # messages or do any up-front parsing of headers besides splitting # the header string from the body. # # Contains ideas and code from Email::Simple and Email::MIME # (Perl Artistic License, GPL-1+) # # This aims to replace Email::MIME for our purposes, similar API # but internal field names are differ if they're not 100%-compatible. # # Includes some proposed fixes for Email::MIME: # - header-less sub parts - https://github.com/rjbs/Email-MIME/issues/14 # - "0" as boundary - https://github.com/rjbs/Email-MIME/issues/63 # # $self = { # bdy => scalar ref for body (may be undef), # hdr => scalar ref for header, # crlf => "\n" or "\r\n" (scalar, not a ref), # # # filled in during ->each_part # ct => hash ref returned by parse_content_type # } package PublicInbox::Eml; use strict; use v5.10.1; use Carp qw(croak); use Encode qw(find_encoding); # stdlib use Text::Wrap qw(wrap); # stdlib, we need Perl 5.6+ for $huge use MIME::Base64 3.05; # Perl 5.10.0 / 5.9.2 use MIME::QuotedPrint 3.05; # ditto my $MIME_Header = find_encoding('MIME-Header'); use PublicInbox::EmlContentFoo qw(parse_content_type parse_content_disposition); $PublicInbox::EmlContentFoo::STRICT_PARAMS = 0; our $mime_parts_limit = 1000; # same as SpamAssassin (not in postfix AFAIK) # the rest of the limit names are taken from postfix: our $mime_nesting_limit = 20; # seems enough, Perl sucks, here our $mime_boundary_length_limit = 2048; # same as postfix our $header_size_limit = 102400; # same as postfix my %MIME_ENC = (qp => \&enc_qp, base64 => \&encode_base64); my %MIME_DEC = (qp => \&dec_qp, base64 => \&decode_base64); $MIME_ENC{quotedprint} = $MIME_ENC{'quoted-printable'} = $MIME_ENC{qp}; $MIME_DEC{quotedprint} = $MIME_DEC{'quoted-printable'} = $MIME_DEC{qp}; $MIME_ENC{$_} = \&identity_codec for qw(7bit 8bit binary); my %DECODE_ADDRESS = map { ($_ => 1, "Resent-$_" => 1) } qw(From To Cc Sender Reply-To Bcc); my %DECODE_FULL = ( Subject => 1, 'Content-Description' => 1, 'Content-Type' => 1, # not correct, but needed, oh well ); our %STR_TYPE = (text => 1); our %STR_SUBTYPE = (plain => 1, html => 1); # message/* subtypes we descend into our %MESSAGE_DESCEND = ( news => 1, # RFC 1849 (obsolete, but archives are forever) rfc822 => 1, # RFC 2046 rfc2822 => 1, # gmime handles this (but not rfc5322) global => 1, # RFC 6532 ); my %re_memo; sub re_memo ($) { my ($k) = @_; # Do not normalize $k with lc/uc; instead strive to keep # capitalization in our codebase consistent. $re_memo{$k} ||= qr/^\Q$k\E:[ \t]*([^\n]*\r?\n # 1st line # continuation lines: (?:[^:\n]*?[ \t]+[^\n]*\r?\n)*) /ismx } sub hdr_truncate ($) { my $len = length($_[0]); substr($_[0], $header_size_limit, $len) = ''; my $end = rindex($_[0], "\n"); if ($end >= 0) { ++$end; substr($_[0], $end, $len) = ''; warn "header of $len bytes truncated to $end bytes\n"; } else { $_[0] = ''; warn <= 0) { # likely on *nix my $hdr = substr($$ref, 0, $pos + 2, ''); # sv_chop on $$ref chop($hdr); # lower SvCUR hdr_truncate($hdr) if length($hdr) > $header_size_limit; bless { hdr => \$hdr, crlf => "\n", bdy => $ref }, __PACKAGE__; } elsif ($$ref =~ /\r?\n(\r?\n)/s) { my $hdr = substr($$ref, 0, $+[0], ''); # sv_chop on $$ref substr($hdr, -(length($1))) = ''; # lower SvCUR hdr_truncate($hdr) if length($hdr) > $header_size_limit; bless { hdr => \$hdr, crlf => $1, bdy => $ref }, __PACKAGE__; } elsif ($$ref =~ /^[a-z0-9-]+[ \t]*:/ims && $$ref =~ /(\r?\n)\z/s) { # body is optional :P my $hdr = substr($$ref, 0, $header_size_limit + 1); hdr_truncate($hdr) if length($hdr) > $header_size_limit; bless { hdr => \$hdr, crlf => $1 }, __PACKAGE__; } else { # just a body w/o header? my $hdr = ''; my $eol = ($$ref =~ /(\r?\n)/) ? $1 : "\n"; bless { hdr => \$hdr, crlf => $eol, bdy => $ref }, __PACKAGE__; } } sub new_sub { my (undef, $ref) = @_; # special case for messages like <85k5su9k59.fsf_-_@lola.goethe.zz> $$ref =~ /\A(\r?\n)/s or return new(undef, $ref); my $hdr = substr($$ref, 0, $+[0], ''); # sv_chop on $$ref bless { hdr => \$hdr, crlf => $1, bdy => $ref }, __PACKAGE__; } # same output as Email::Simple::Header::header_raw, but we extract # headers on-demand instead of parsing them into a list which # requires O(n) lookups anyways sub header_raw { my $re = re_memo($_[1]); my @v = (${ $_[0]->{hdr} } =~ /$re/g); for (@v) { # for compatibility w/ Email::Simple::Header, s/\s+\z//s; s/\A\s+//s; s/\r?\n[ \t]*/ /gs; } wantarray ? @v : $v[0]; } # pick the first Content-Type header to match Email::MIME behavior. # It's usually the right one based on historical archives. sub ct ($) { # PublicInbox::EmlContentFoo::content_type: $_[0]->{ct} //= parse_content_type(header($_[0], 'Content-Type')); } # returns a queue of sub-parts iff it's worth descending into sub mp_descend ($$) { my ($self, $nr) = @_; # or $once for top-level my $ct = ct($self); my $type = lc($ct->{type}); if ($type eq 'message' && $MESSAGE_DESCEND{lc($ct->{subtype})}) { my $nxt = new(undef, body_raw($self)); $self->{-call_cb} = $nxt->{is_submsg} = 1; return [ $nxt ]; } return if $type ne 'multipart'; my $bnd = $ct->{attributes}->{boundary} // return; # single-part return if $bnd eq '' || length($bnd) >= $mime_boundary_length_limit; $bnd = quotemeta($bnd); # this is a multipart message that didn't get descended into in # public-inbox <= 1.5.0, so ensure we call the user callback for # this part to not break PSGI downloads. $self->{-call_cb} = $self->{is_submsg}; # "multipart" messages can exist w/o a body my $bdy = ($nr ? delete($self->{bdy}) : \(body_raw($self))) or return; # Cut at the the first epilogue, not subsequent ones. # *sigh* just the regexp match alone seems to bump RSS by # length($$bdy) on a ~30M string: my $epilogue_missing; if ($$bdy =~ /(?:\r?\n)?^--$bnd--[ \t]*\r?$/sm) { substr($$bdy, $-[0]) = ''; } else { $epilogue_missing = 1; } # *Sigh* split() doesn't work in-place and return CoW strings # because Perl wants to "\0"-terminate strings. So split() # again bumps RSS by length($$bdy) # Quiet warning for "Complex regular subexpression recursion limit" # in case we get many empty parts, it's harmless in this case no warnings 'regexp'; my ($pre, @parts) = split(/(?:\r?\n)?(?:^--$bnd[ \t]*\r?\n)+/ms, $$bdy, # + 3 since we don't want the last part # processed to include any other excluded # parts ($nr starts at 1, and I suck at math) $mime_parts_limit + 3 - $nr); if (@parts) { # the usual path if we got this far: undef $bdy; # release memory ASAP if $nr > 0 # compatibility with Email::MIME $parts[-1] =~ s/\n\r?\n\z/\n/s if $epilogue_missing; # ignore empty parts @parts = map { new_sub(undef, \$_) } grep /[^ \t\r\n]/s, @parts; # Keep "From: someone..." from preamble in old, # buggy versions of git-send-email, otherwise drop it # There's also a case where quoted text showed up in the # preamble # <20060515162817.65F0F1BBAE@citi.umich.edu> unshift(@parts, new_sub(undef, \$pre)) if index($pre, ':') >= 0; return \@parts; } # "multipart", but no boundary found, treat as single part $self->{bdy} //= $bdy; undef; } # $p = [ \@parts, $depth, $idx ] # $idx[0] grows as $depth grows, $idx[1] == $p->[-1] == current part # (callers need to be updated) # \@parts is a queue which empties when we're done with a parent part # same usage as PublicInbox::MsgIter::msg_iter # $cb - user-supplied callback sub # $arg - user-supplied arg (think pthread_create) # $once - unref body scalar during iteration # $all - used by IMAP server, only sub each_part { my ($self, $cb, $arg, $once, $all) = @_; my $p = mp_descend($self, $once // 0) or return $cb->([$self, 0, 1], $arg); $cb->([$self, 0, 0], $arg) if ($all || $self->{-call_cb}); # rare $p = [ $p, 0 ]; my @s; # our virtual stack my $nr = 0; while ((scalar(@{$p->[0]}) || ($p = pop @s)) && ++$nr <= $mime_parts_limit) { ++$p->[-1]; # bump index my (undef, @idx) = @$p; @idx = (join('.', @idx)); my $depth = ($idx[0] =~ tr/././) + 1; my $sub = shift @{$p->[0]}; if ($depth < $mime_nesting_limit && (my $nxt = mp_descend($sub, $nr))) { push(@s, $p) if scalar @{$p->[0]}; $p = [ $nxt, @idx, 0 ]; ($all || $sub->{-call_cb}) and $cb->([$sub, $depth, @idx], $arg); } else { # a leaf node $cb->([$sub, $depth, @idx], $arg); } } } sub enc_qp { # prevent MIME::QuotedPrint from encoding CR as =0D since it's # against RFCs and breaks MUAs $_[0] =~ s/\r\n/\n/sg; encode_qp($_[0], "\r\n"); } sub dec_qp { # RFC 2822 requires all lines to end in CRLF, though... :< $_[0] = decode_qp($_[0]); $_[0] =~ s/\n/\r\n/sg; $_[0] } sub identity_codec { $_[0] } ########### compatibility section for existing Email::MIME uses ######### sub header_obj { bless { hdr => $_[0]->{hdr}, crlf => $_[0]->{crlf} }, __PACKAGE__; } sub subparts { my ($self) = @_; my $parts = mp_descend($self, 0) or return (); my $bnd = ct($self)->{attributes}->{boundary} // die 'BUG: no boundary'; my $bdy = $self->{bdy}; if ($$bdy =~ /\A(.*?)(?:\r?\n)?^--\Q$bnd\E[ \t]*\r?$/sm) { $self->{preamble} = $1; } if ($$bdy =~ /^--\Q$bnd\E--[ \t]*\r?\n(.+)\z/sm) { $self->{epilogue} = $1; } @$parts; } sub parts_set { my ($self, $parts) = @_; # we can't fully support what Email::MIME does, # just what our filter code needs: my $bnd = ct($self)->{attributes}->{boundary} // die <parts_set not supported for single-part messages EOF my $crlf = $self->{crlf}; my $fin_bnd = "$crlf--$bnd--$crlf"; $bnd = "$crlf--$bnd$crlf"; ${$self->{bdy}} = join($bnd, delete($self->{preamble}) // '', map { $_->as_string } @$parts ) . $fin_bnd . (delete($self->{epilogue}) // ''); undef; } sub body_set { my ($self, $body) = @_; my $bdy = $self->{bdy} = ref($body) ? $body : \$body; if (my $cte = header_raw($self, 'Content-Transfer-Encoding')) { my $enc = $MIME_ENC{lc($cte)} or croak("can't encode `$cte'"); $$bdy = $enc->($$bdy); # in-place } undef; } sub body_str_set { my ($self, $str) = @_; my $cs = ct($self)->{attributes}->{charset} // croak('body_str was given, but no charset is defined'); my $enc = find_encoding($cs) // croak "unknown encoding `$cs'"; my $tmp; { my @w; local $SIG{__WARN__} = sub { push @w, @_ }; $tmp = $enc->encode($str, Encode::FB_WARN); croak(@w) if @w; }; body_set($self, \$tmp); } sub content_type { scalar header($_[0], 'Content-Type') } # we only support raw header_set sub header_set { my ($self, $pfx, @vals) = @_; my $re = re_memo($pfx); my $hdr = $self->{hdr}; return $$hdr =~ s!$re!!g if !@vals; $pfx .= ': '; my $len = 78 - length($pfx); @vals = map {; # folding differs from Email::Simple::Header, # we favor tabs for visibility (and space savings :P) if (length($_) >= $len && (/\n[^ \t]/s || !/\n/s)) { local $Text::Wrap::columns = $len; local $Text::Wrap::huge = 'overflow'; $pfx . wrap('', "\t", $_) . $self->{crlf}; } else { $pfx . $_ . $self->{crlf}; } } @vals; $$hdr =~ s!$re!shift(@vals) // ''!ge; # replace current headers, first $$hdr .= join('', @vals); # append any leftovers not replaced # wantarray ? @_[2..$#_] : $_[2]; # Email::Simple::Header compat undef; # we don't care for the return value } # note: we only call this method on Subject sub header_str_set { my ($self, $name, @vals) = @_; for (@vals) { next unless /[^\x20-\x7e]/; # 39: int((75 - length("Subject: =?UTF-8?B?".'?=') ) / 4) * 3; s/(.{1,39})/ my $x = $1; utf8::encode($x); # to octets '=?UTF-8?B?'.encode_base64($x, '').'?=' /xges; } header_set($self, $name, @vals); } sub mhdr_decode ($) { eval { $MIME_Header->decode($_[0], Encode::FB_DEFAULT) } // $_[0]; } sub filename { my $dis = header_raw($_[0], 'Content-Disposition'); my $attrs = parse_content_disposition($dis)->{attributes}; my $fn = $attrs->{filename}; $fn = ct($_[0])->{attributes}->{name} if !defined($fn) || $fn eq ''; (defined($fn) && $fn =~ /=\?/) ? mhdr_decode($fn) : $fn; } sub xs_addr_str { # helper for ->header / ->header_str for (@_) { # array from header_raw() next unless /=\?/; my @g = parse_email_groups($_); # [ foo => [ E::A::X, ... ] for (my $i = 0; $i < @g; $i += 2) { if (defined($g[$i]) && $g[$i] =~ /=\?/) { $g[$i] = mhdr_decode($g[$i]); } my $addrs = $g[$i + 1]; for my $eax (@$addrs) { for my $m (qw(phrase comment)) { my $v = $eax->$m; $eax->$m(mhdr_decode($v)) if $v && $v =~ /=\?/; } } } $_ = format_email_groups(@g); } } eval { require Email::Address::XS; Email::Address::XS->import(qw(parse_email_groups format_email_groups)); 1; } or do { # fallback to just decoding everything, because parsing # email addresses correctly w/o C/XS is slow %DECODE_FULL = (%DECODE_FULL, %DECODE_ADDRESS); %DECODE_ADDRESS = (); }; *header = \&header_str; sub header_str { my ($self, $name) = @_; my @v = header_raw($self, $name); if ($DECODE_ADDRESS{$name}) { xs_addr_str(@v); } elsif ($DECODE_FULL{$name}) { for (@v) { $_ = mhdr_decode($_) if /=\?/; } } wantarray ? @v : $v[0]; } sub body_raw { ${$_[0]->{bdy} // \''}; } sub body { my $raw = body_raw($_[0]); my $cte = header_raw($_[0], 'Content-Transfer-Encoding') or return $raw; ($cte) = ($cte =~ /([a-zA-Z0-9\-]+)/) or return $raw; # For S/MIME, etc my $dec = $MIME_DEC{lc($cte)} or return $raw; $dec->($raw); } sub body_str { my ($self) = @_; my $ct = ct($self); my $cs = $ct->{attributes}->{charset} // do { ($STR_TYPE{$ct->{type}} && $STR_SUBTYPE{$ct->{subtype}}) and return body($self); croak("can't get body as a string for ", join("\n\t", header_raw($self, 'Content-Type'))); }; my $enc = find_encoding($cs) or croak "unknown encoding `$cs'"; my $tmp = body($self); # workaround https://rt.cpan.org/Public/Bug/Display.html?id=139622 my @w; local $SIG{__WARN__} = sub { push @w, @_ }; my $ret = $enc->decode($tmp, Encode::FB_WARN); croak(@w) if @w; $ret; } sub as_string { my ($self) = @_; my $ret = ${ $self->{hdr} }; return $ret unless defined($self->{bdy}); $ret .= $self->{crlf}; $ret .= ${$self->{bdy}}; } # Unlike Email::MIME::charset_set, this only changes the parsed # representation of charset used for search indexing and HTML display. # This does NOT affect what ->as_string returns. sub charset_set { ct($_[0])->{attributes}->{charset} = $_[1]; } sub crlf { $_[0]->{crlf} // "\n" } sub raw_size { my ($self) = @_; my $len = length(${$self->{hdr}}); defined($self->{bdy}) and $len += length(${$self->{bdy}}) + length($self->{crlf}); $len; } # warnings to ignore when handling spam mailboxes and maybe other places sub warn_ignore { my $s = "@_"; # Email::Address::XS warnings $s =~ /^Argument contains empty / || $s =~ /^Element at index [0-9]+.*? contains / # PublicInbox::MsgTime || $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/ || $s =~ /^bad Date: .+? in / # Encode::Unicode::UTF7 || $s =~ /^Bad UTF7 data escape at / } # this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..." sub warn_ignore_cb { my $cb = $SIG{__WARN__} // \&CORE::warn; sub { $cb->(@_) unless warn_ignore(@_) } } sub willneed { re_memo($_) for @_ } willneed(qw(From To Cc Date Subject Content-Type In-Reply-To References Message-ID X-Alt-Message-ID)); 1; public-inbox-1.9.0/lib/PublicInbox/EmlContentFoo.pm000066400000000000000000000177111430031475700221650ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES # This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # License: GPL-1.0+ or Artistic-1.0-Perl # # # # This license differs from the rest of public-inbox # # ABSTRACT: Parse a MIME Content-Type or Content-Disposition Header # # This is a fork of the Email::MIME::ContentType 1.022 with # minor improvements and incompatibilities; namely changes to # quiet warnings with legacy data. package PublicInbox::EmlContentFoo; use strict; use parent qw(Exporter); use v5.10.1; # find_mime_encoding() only appeared in Encode 2.87+ (Perl 5.26+), # while we support 2.35 shipped with Perl 5.10.1 use Encode 2.35 qw(find_encoding); my %mime_name_map; # $enc->mime_name => $enc object BEGIN { eval { Encode->import('find_mime_encoding') }; if ($@) { *find_mime_encoding = sub { $mime_name_map{lc($_[0])} }; %mime_name_map = map {; my $enc = find_encoding($_); my $m = lc($enc->mime_name // ''); $m => $enc; } Encode->encodings(':all'); # delete fallback for encodings w/o ->mime_name: delete $mime_name_map{''}; # an extra alias see Encode::MIME::NAME $mime_name_map{'utf8'} = find_encoding('UTF-8'); } } our @EXPORT_OK = qw(parse_content_type parse_content_disposition); our $STRICT_PARAMS = 1; my $ct_default = 'text/plain; charset=us-ascii'; my $re_token = # US-ASCII except SPACE, CTLs and tspecials ()<>@,;:\\"/[]?= qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/; my $re_token_non_strict = # allow CTLs and above ASCII qr/([\x00-\x08\x0B\x0C\x0E-\x1F\x7E-\xFF]+|$re_token)/; my $re_qtext = # US-ASCII except CR, LF, white space, backslash and quote qr/[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7E\x7F]/; my $re_quoted_pair = qr/\\[\x00-\x7F]/; my $re_quoted_string = qr/"((?:[ \t]*(?:$re_qtext|$re_quoted_pair))*[ \t]*)"/; my $re_qtext_non_strict = qr/[\x80-\xFF]|$re_qtext/; my $re_quoted_pair_non_strict = qr/\\[\x00-\xFF]/; my $re_quoted_string_non_strict = qr/"((?:[ \t]*(?:$re_qtext_non_strict|$re_quoted_pair_non_strict))*[ \t]*)"/; my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/; my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/; my $re_exvalue = qr/($re_charset)?'(?:$re_language)?'(.*)/; sub parse_content_type { my ($ct) = @_; # If the header isn't there or is empty, give default answer. $ct = $ct_default unless defined($ct) && length($ct); _unfold_lines($ct); _clean_comments($ct); # It is also recommend (sic.) that this default be assumed when a # syntactically invalid Content-Type header field is encountered. unless ($ct =~ s/^($re_token)\/($re_token)//) { unless ($STRICT_PARAMS && $ct =~ s/^($re_token_non_strict)\/ ($re_token_non_strict)//x) { #carp "Invalid Content-Type '$ct'"; return parse_content_type($ct_default); } } my ($type, $subtype) = (lc $1, lc $2); _clean_comments($ct); $ct =~ s/\s+$//; my $attributes = {}; if ($STRICT_PARAMS && length($ct) && $ct !~ /^;/) { # carp "Missing ';' before first Content-Type parameter '$ct'"; } else { $attributes = _process_rfc2231(_parse_attributes($ct)); } { type => $type, subtype => $subtype, attributes => $attributes, }; } my $cd_default = 'attachment'; sub parse_content_disposition { my ($cd) = @_; $cd = $cd_default unless defined($cd) && length($cd); _unfold_lines($cd); _clean_comments($cd); unless ($cd =~ s/^($re_token)//) { unless ($STRICT_PARAMS and $cd =~ s/^($re_token_non_strict)//) { #carp "Invalid Content-Disposition '$cd'"; return parse_content_disposition($cd_default); } } my $type = lc $1; _clean_comments($cd); $cd =~ s/\s+$//; my $attributes = {}; if ($STRICT_PARAMS && length($cd) && $cd !~ /^;/) { # carp "Missing ';' before first Content-Disposition parameter '$cd'"; } else { $attributes = _process_rfc2231(_parse_attributes($cd)); } { type => $type, attributes => $attributes, }; } sub _unfold_lines { $_[0] =~ s/(?:\r\n|[\r\n])(?=[ \t])//g; } sub _clean_comments { my $ret = ($_[0] =~ s/^\s+//); while (length $_[0]) { last unless $_[0] =~ s/^\(//; my $level = 1; while (length $_[0]) { my $ch = substr $_[0], 0, 1, ''; if ($ch eq '(') { $level++; } elsif ($ch eq ')') { $level--; last if $level == 0; } elsif ($ch eq '\\') { substr $_[0], 0, 1, ''; } } # carp "Unbalanced comment" if $level != 0 and $STRICT_PARAMS; $ret |= ($_[0] =~ s/^\s+//); } $ret; } sub _process_rfc2231 { my ($attribs) = @_; my %cont; my %encoded; foreach (keys %{$attribs}) { next unless $_ =~ m/^(.*)\*([0-9])\*?$/; my ($attr, $sec) = ($1, $2); $cont{$attr}->[$sec] = $attribs->{$_}; $encoded{$attr}->[$sec] = 1 if $_ =~ m/\*$/; delete $attribs->{$_}; } foreach (keys %cont) { my $key = $_; $key .= '*' if $encoded{$_}; $attribs->{$key} = join '', @{$cont{$_}}; } foreach (keys %{$attribs}) { next unless $_ =~ m/^(.*)\*$/; my $key = $1; next unless ($attribs->{$_} // '') =~ m/^$re_exvalue$/; my ($charset, $value) = ($1, $2); $value =~ s/%([0-9A-Fa-f]{2})/pack('C', hex($1))/eg; if (length $charset) { my $enc = find_mime_encoding($charset); if (defined $enc) { $value = $enc->decode($value); # } else { #carp "Unknown charset '$charset' in #attribute '$key' value"; } } $attribs->{$key} = $value; delete $attribs->{$_}; } $attribs; } sub _parse_attributes { local $_ = shift; substr($_, 0, 0, '; ') if length $_ and $_ !~ /^;/; my $attribs = {}; while (length $_) { s/^;// or $STRICT_PARAMS and do { #carp "Missing semicolon before parameter '$_'"; return $attribs; }; _clean_comments($_); unless (length $_) { # Some mail software generates a Content-Type like this: # "Content-Type: text/plain;" # RFC 1521 section 3 says a parameter must exist if # there is a semicolon. #carp "Extra semicolon after last parameter" if #$STRICT_PARAMS; return $attribs; } my $attribute; if (s/^($re_token)=//) { $attribute = lc $1; } else { if ($STRICT_PARAMS) { # carp "Illegal parameter '$_'"; return $attribs; } if (s/^($re_token_non_strict)=//) { $attribute = lc $1; } else { unless (s/^([^;=\s]+)\s*=//) { #carp "Cannot parse parameter '$_'"; return $attribs; } $attribute = lc $1; } } _clean_comments($_); my $value = _extract_attribute_value(); $attribs->{$attribute} = $value; _clean_comments($_); } $attribs; } sub _extract_attribute_value { # EXPECTS AND MODIFIES $_ my $value; while (length $_) { if (s/^($re_token)//) { $value .= $1; } elsif (s/^$re_quoted_string//) { my $sub = $1; $sub =~ s/\\(.)/$1/g; $value .= $sub; } elsif ($STRICT_PARAMS) { #my $char = substr $_, 0, 1; #carp "Unquoted '$char' not allowed"; return; } elsif (s/^($re_token_non_strict)//) { $value .= $1; } elsif (s/^$re_quoted_string_non_strict//) { my $sub = $1; $sub =~ s/\\(.)/$1/g; $value .= $sub; } my $erased = _clean_comments($_); last if !length $_ or /^;/; if ($STRICT_PARAMS) { #my $char = substr $_, 0, 1; #carp "Extra '$char' found after parameter"; return; } if ($erased) { # Sometimes semicolon is missing, so check for = char last if m/^$re_token_non_strict=/; $value .= ' '; } $value .= substr $_, 0, 1, ''; } $value; } 1; __END__ =func parse_content_type This routine is exported by default. This routine parses email content type headers according to section 5.1 of RFC 2045 and also RFC 2231 (Character Set and Parameter Continuations). It returns a hash as above, with entries for the C, the C, and a hash of C. =func parse_content_disposition This routine is exported by default. This routine parses email Content-Disposition headers according to RFC 2183 and RFC 2231. It returns a hash as above, with entries for the C, and a hash of C. =cut public-inbox-1.9.0/lib/PublicInbox/ExtMsg.pm000066400000000000000000000172211430031475700206540ustar00rootroot00000000000000# Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ # # Used by the web interface to link to messages outside of the our # public-inboxes. Mail threads may cross projects/threads; so # we should ensure users can find more easily find them on other # sites. package PublicInbox::ExtMsg; use strict; use warnings; use PublicInbox::Hval qw(ascii_html prurl mid_href); use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::Smsg; our $MIN_PARTIAL_LEN = 16; # TODO: user-configurable our @EXT_URL = map { ascii_html($_) } ( # leading "//" denotes protocol-relative (http:// or https://) '//marc.info/?i=%s', '//www.mail-archive.com/search?l=mid&q=%s', 'nntp://news.gmane.io/%s', 'https://lists.debian.org/msgid-search/%s', '//docs.FreeBSD.org/cgi/mid.cgi?db=mid&id=%s', 'https://www.w3.org/mid/%s', 'http://www.postgresql.org/message-id/%s', 'https://lists.debconf.org/cgi-lurker/keyword.cgi?'. 'doc-url=/lurker&format=en.html&query=id:%s' ); sub PARTIAL_MAX () { 100 } sub search_partial ($$) { my ($ibx, $mid) = @_; return if length($mid) < $MIN_PARTIAL_LEN; my $srch = $ibx->isrch or return; my $opt = { limit => PARTIAL_MAX, relevance => -1 }; my @try = ("m:$mid*"); my $chop = $mid; if ($chop =~ s/(\W+)(\w*)\z//) { my ($delim, $word) = ($1, $2); if (length($word)) { push @try, "m:$chop$delim"; push @try, "m:$chop$delim*"; } push @try, "m:$chop"; push @try, "m:$chop*"; } # break out long words individually to search for, because # too many messages begin with "Pine.LNX." (or "alpine" or "nycvar") if ($mid =~ /\w{9,}/) { my @long = ($mid =~ m!(\w{3,})!g); push(@try, join(' ', map { "m:$_" } @long)); # is the last element long enough to not trigger excessive # wildcard matches? if (length($long[-1]) > 8) { $long[-1] .= '*'; push(@try, join(' ', map { "m:$_" } @long)); } } foreach my $m (@try) { # If Xapian can't handle the wildcard since it # has too many results. $@ can be # Search::Xapian::QueryParserError or even: # "something terrible happened at ../Search/Xapian/Enquire.pm" my $mset = eval { $srch->mset($m, $opt) } or next; my @mids = map { $_->{mid} } @{$srch->mset_to_smsg($ibx, $mset)}; return \@mids if scalar(@mids); } } sub ext_msg_i { my ($other, $ctx) = @_; return if $other->{name} eq $ctx->{ibx}->{name} || !$other->base_url; my $mm = $other->mm or return; # try to find the URL with Msgmap to avoid forking my $num = $mm->num_for($ctx->{mid}); if (defined $num) { push @{$ctx->{found}}, $other; } else { # no point in trying the fork fallback if we # know Xapian is up-to-date but missing the # message in the current repo push @{$ctx->{again}}, $other; } } sub ext_msg_step { my ($pi_cfg, $section, $ctx) = @_; if (defined($section)) { return if $section !~ m!\Apublicinbox\.([^/]+)\z!; my $ibx = $pi_cfg->lookup_name($1) or return; ext_msg_i($ibx, $ctx); } else { # undef == "EOF" finalize_exact($ctx); } } sub ext_msg_ALL ($) { my ($ctx) = @_; my $ALL = $ctx->{www}->{pi_cfg}->ALL or return; my $by_eidx_key = $ctx->{www}->{pi_cfg}->{-by_eidx_key}; my $cur_key = eval { $ctx->{ibx}->eidx_key } // return partial_response($ctx); # $cur->{ibx} == $ALL my %seen = ($cur_key => 1); my ($id, $prev); while (my $x = $ALL->over->next_by_mid($ctx->{mid}, \$id, \$prev)) { my $xr3 = $ALL->over->get_xref3($x->{num}); for my $k (@$xr3) { $k =~ s/:[0-9]+:$x->{blob}\z// or next; next if $k eq $cur_key; my $ibx = $by_eidx_key->{$k} // next; $ibx->base_url or next; push(@{$ctx->{found}}, $ibx) unless $seen{$k}++; } } return exact($ctx) if $ctx->{found}; # fall back to partial MID matching for my $ibxish ($ctx->{ibx}, $ALL) { my $mids = search_partial($ibxish, $ctx->{mid}) or next; push @{$ctx->{partial}}, [ $ibxish, $mids ]; last if ($ctx->{n_partial} += scalar(@$mids)) >= PARTIAL_MAX; } partial_response($ctx); } sub ext_msg { my ($ctx) = @_; ext_msg_ALL($ctx) // sub { $ctx->{-wcb} = $_[0]; # HTTP server write callback if ($ctx->{env}->{'pi-httpd.async'}) { require PublicInbox::ConfigIter; my $iter = PublicInbox::ConfigIter->new( $ctx->{www}->{pi_cfg}, \&ext_msg_step, $ctx); $iter->event_step; } else { $ctx->{www}->{pi_cfg}->each_inbox(\&ext_msg_i, $ctx); finalize_exact($ctx); } }; } # called via PublicInbox::DS::event_loop sub event_step { my ($ctx, $sync) = @_; # can't find a partial match in current inbox, try the others: my $ibx = shift @{$ctx->{again}} or return finalize_partial($ctx); my $mids = search_partial($ibx, $ctx->{mid}) or return ($sync ? undef : PublicInbox::DS::requeue($ctx)); $ctx->{n_partial} += scalar(@$mids); push @{$ctx->{partial}}, [ $ibx, $mids ]; $ctx->{n_partial} >= PARTIAL_MAX ? finalize_partial($ctx) : ($sync ? undef : PublicInbox::DS::requeue($ctx)); } sub finalize_exact { my ($ctx) = @_; return $ctx->{-wcb}->(exact($ctx)) if $ctx->{found}; # fall back to partial MID matching my $mid = $ctx->{mid}; my $cur = $ctx->{ibx}; my $mids = search_partial($cur, $mid); if ($mids) { $ctx->{n_partial} = scalar(@$mids); push @{$ctx->{partial}}, [ $cur, $mids ]; } elsif ($ctx->{again} && length($mid) >= $MIN_PARTIAL_LEN) { bless $ctx, __PACKAGE__; if ($ctx->{env}->{'pi-httpd.async'}) { $ctx->event_step; return; } # synchronous fall-through $ctx->event_step while @{$ctx->{again}}; } finalize_partial($ctx); } sub _url_pfx ($$) { my ($ctx, $u) = @_; (index($u, '://') < 0 && index($u, '/') != 0) ? "$ctx->{-upfx}../$u" : $u; } sub partial_response ($) { my ($ctx) = @_; my $mid = $ctx->{mid}; my $code = 404; my $href = mid_href($mid); my $html = ascii_html($mid); my $title = "<$html> not found"; my $s = "
Message-ID <$html>\nnot found\n";
	$ctx->{-upfx} //= '../';
	if (my $n_partial = $ctx->{n_partial}) {
		$code = 300;
		my $es = $n_partial == 1 ? '' : 'es';
		$n_partial .= '+' if ($n_partial == PARTIAL_MAX);
		$s .= "\n$n_partial partial match$es found:\n\n";
		my $cur_name = $ctx->{ibx}->{name};
		foreach my $pair (@{$ctx->{partial}}) {
			my ($ibx, $res) = @$pair;
			my $e = $ibx->{name} eq $cur_name ? $ctx->{env} : undef;
			my $u = _url_pfx($ctx, $ibx->base_url($e) // next);
			foreach my $m (@$res) {
				my $href = mid_href($m);
				my $html = ascii_html($m);
				$s .= qq{$u$html/\n};
			}
		}
	}
	my $ext = ext_urls($ctx, $mid, $href, $html);
	if ($ext ne '') {
		$s .= $ext;
		$code = 300;
	}
	$ctx->{-html_tip} = $s .= '
'; $ctx->{-title_html} = $title; html_oneshot($ctx, $code); } sub finalize_partial ($) { $_[0]->{-wcb}->(partial_response($_[0])) } sub ext_urls { my ($ctx, $mid, $href, $html) = @_; # Fall back to external repos if configured if (@EXT_URL && index($mid, '@') >= 0) { my $env = $ctx->{env}; my $e = "\nPerhaps try an external site:\n\n"; foreach my $url (@EXT_URL) { my $u = prurl($env, $url); my $r = sprintf($u, $href); my $t = sprintf($u, $html); $e .= qq{$t\n}; } return $e; } '' } sub exact { my ($ctx) = @_; my $mid = $ctx->{mid}; my $found = $ctx->{found}; my $href = mid_href($mid); my $html = ascii_html($mid); my $title = "<$html> found in "; my $end = @$found == 1 ? 'another inbox' : 'other inboxes'; $ctx->{-title_html} = $title . $end; $ctx->{-upfx} //= '../'; my $ext_urls = ext_urls($ctx, $mid, $href, $html); my $code = (@$found == 1 && $ext_urls eq '') ? 200 : 300; $ctx->{-html_tip} = join('', "
Message-ID: <$html>\nfound in $end:\n\n",
				(map {
					my $u = _url_pfx($ctx, $_->base_url);
					qq($u$html/\n)
				} @$found),
			$ext_urls, '
'); html_oneshot($ctx, $code); } 1; public-inbox-1.9.0/lib/PublicInbox/ExtSearch.pm000066400000000000000000000066511430031475700213400ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # Read-only external (detached) index for cross inbox search. # This is a read-only counterpart to PublicInbox::ExtSearchIdx # and behaves like PublicInbox::Inbox AND PublicInbox::Search package PublicInbox::ExtSearch; use strict; use v5.10.1; use PublicInbox::Over; use PublicInbox::Inbox; use PublicInbox::MiscSearch; use DBI qw(:sql_types); # SQL_BLOB # for ->reopen, ->mset, ->mset_to_artnums use parent qw(PublicInbox::Search); sub new { my ($class, $topdir) = @_; bless { topdir => $topdir, -primary_address => 'unknown@example.com', # xpfx => 'ei15' xpfx => "$topdir/ei".PublicInbox::Search::SCHEMA_VERSION }, $class; } sub misc { my ($self) = @_; $self->{misc} //= PublicInbox::MiscSearch->new("$self->{xpfx}/misc"); } # same as per-inbox ->over, for now... sub over { my ($self) = @_; $self->{over} //= do { PublicInbox::Inbox::_cleanup_later($self); PublicInbox::Over->new("$self->{xpfx}/over.sqlite3"); }; } sub git { my ($self) = @_; $self->{git} //= do { PublicInbox::Inbox::_cleanup_later($self); PublicInbox::Git->new("$self->{topdir}/ALL.git"); }; } # returns a hashref of { $NEWSGROUP_NAME => $ART_NO } using the `xref3' table sub nntp_xref_for { # NNTP only my ($self, $xibx, $xsmsg) = @_; my $dbh = over($self)->dbh; my $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1 $sth->execute($xibx->{newsgroup}); my $xibx_id = $sth->fetchrow_array // do { warn "W: `$xibx->{newsgroup}' not found in $self->{topdir}\n"; return; }; $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT docid FROM xref3 WHERE oidbin = ? AND xnum = ? AND ibx_id = ? LIMIT 1 $sth->bind_param(1, $xsmsg->oidbin, SQL_BLOB); # NNTP::cmd_over can set {num} to zero according to RFC 3977 8.3.2 $sth->bind_param(2, $xsmsg->{num} || $xsmsg->{-orig_num}); $sth->bind_param(3, $xibx_id); $sth->execute; my $docid = $sth->fetchrow_array // do { warn <{newsgroup}:$xsmsg->{num}' not found in $self->{topdir}" EOF return; }; # LIMIT is number of newsgroups on server: $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT ibx_id,xnum FROM xref3 WHERE docid = ? AND ibx_id != ? $sth->execute($docid, $xibx_id); my $rows = $sth->fetchall_arrayref; my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1); SELECT eidx_key FROM inboxes WHERE ibx_id = ? LIMIT 1 my %xref = map { my ($ibx_id, $xnum) = @$_; $eidx_key_sth->execute($ibx_id); my $eidx_key = $eidx_key_sth->fetchrow_array; # only include if there's a newsgroup name $eidx_key && index($eidx_key, '/') >= 0 ? () : ($eidx_key => $xnum) } @$rows; $xref{$xibx->{newsgroup}} = $xsmsg->{num}; \%xref; } sub mm { undef } sub altid_map { {} } sub description { my ($self) = @_; ($self->{description} //= PublicInbox::Inbox::cat_desc("$self->{topdir}/description")) // '$EXTINDEX_DIR/description missing'; } sub search { PublicInbox::Inbox::_cleanup_later($_[0]); $_[0]; } sub thing_type { 'external index' } no warnings 'once'; *base_url = \&PublicInbox::Inbox::base_url; *smsg_eml = \&PublicInbox::Inbox::smsg_eml; *smsg_by_mid = \&PublicInbox::Inbox::smsg_by_mid; *msg_by_mid = \&PublicInbox::Inbox::msg_by_mid; *modified = \&PublicInbox::Inbox::modified; *recent = \&PublicInbox::Inbox::recent; *max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef *isrch = \&search; 1; public-inbox-1.9.0/lib/PublicInbox/ExtSearchIdx.pm000066400000000000000000001264161430031475700220070ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # Detached/external index cross inbox search indexing support # read-write counterpart to PublicInbox::ExtSearch # # It's based on the same ideas as public-inbox-v2-format(5) using # over.sqlite3 for dedupe and sharded Xapian. msgmap.sqlite3 is # missing, so there is no Message-ID conflict resolution, meaning # no NNTP support for now. # # v2 has a 1:1 mapping of index:inbox or msgmap for NNTP support. # This is intended to be an M:N index:inbox mapping, but it'll likely # be 1:N in common practice (M==1) package PublicInbox::ExtSearchIdx; use strict; use v5.10.1; use parent qw(PublicInbox::ExtSearch PublicInbox::Lock); use Carp qw(croak carp); use Scalar::Util qw(blessed); use Sys::Hostname qw(hostname); use POSIX qw(strftime); use File::Glob qw(bsd_glob GLOB_NOSORT); use PublicInbox::MultiGit; use PublicInbox::Search; use PublicInbox::SearchIdx qw(prepare_stack is_ancestor is_bad_blob); use PublicInbox::OverIdx; use PublicInbox::MiscIdx; use PublicInbox::MID qw(mids); use PublicInbox::V2Writable; use PublicInbox::InboxWritable; use PublicInbox::ContentHash qw(content_hash); use PublicInbox::Eml; use PublicInbox::DS qw(now add_timer); use DBI qw(:sql_types); # SQL_BLOB sub new { my (undef, $dir, $opt) = @_; my $l = $opt->{indexlevel} // 'full'; $l !~ $PublicInbox::SearchIdx::INDEXLEVELS and die "invalid indexlevel=$l\n"; $l eq 'basic' and die "E: indexlevel=basic not yet supported\n"; my $self = bless { xpfx => "$dir/ei".PublicInbox::Search::SCHEMA_VERSION, topdir => $dir, creat => $opt->{creat}, ibx_map => {}, # (newsgroup//inboxdir) => $ibx ibx_active => [], # by config section order ibx_known => [], # by config section order indexlevel => $l, transact_bytes => 0, total_bytes => 0, current_info => '', parallel => 1, lock_path => "$dir/ei.lock", }, __PACKAGE__; $self->{shards} = $self->count_shards || nproc_shards({ nproc => $opt->{jobs} }); my $oidx = PublicInbox::OverIdx->new("$self->{xpfx}/over.sqlite3"); $self->{-no_fsync} = $oidx->{-no_fsync} = 1 if !$opt->{fsync}; $self->{-dangerous} = 1 if $opt->{dangerous}; $self->{oidx} = $oidx; $self } sub attach_inbox { my ($self, $ibx, $types) = @_; $self->{ibx_map}->{$ibx->eidx_key} //= do { delete $self->{-ibx_ary_known}; # invalidate cache delete $self->{-ibx_ary_active}; # invalidate cache $types //= [ qw(active known) ]; for my $t (@$types) { push @{$self->{"ibx_$t"}}, $ibx; } $ibx; } } sub _ibx_attach { # each_inbox callback my ($ibx, $self, $types) = @_; attach_inbox($self, $ibx, $types); } sub attach_config { my ($self, $cfg, $ibxs) = @_; $self->{cfg} = $cfg; my $types; if ($ibxs) { for my $ibx (@$ibxs) { $self->{ibx_map}->{$ibx->eidx_key} //= do { push @{$self->{ibx_active}}, $ibx; push @{$self->{ibx_known}}, $ibx; $ibx; } } # invalidate cache delete $self->{-ibx_ary_known}; delete $self->{-ibx_ary_active}; $types = [ 'known' ]; } $types //= [ qw(known active) ]; $cfg->each_inbox(\&_ibx_attach, $self, $types); } sub check_batch_limit ($) { my ($req) = @_; my $self = $req->{self}; my $new_smsg = $req->{new_smsg}; my $n = $self->{transact_bytes} += $new_smsg->{bytes}; # set flag for PublicInbox::V2Writable::index_todo: ${$req->{need_checkpoint}} = 1 if $n >= $self->{batch_bytes}; } sub apply_boost ($$) { my ($req, $smsg) = @_; my $id2pos = $req->{id2pos}; # index in ibx_sorted my $xr3 = $req->{self}->{oidx}->get_xref3($smsg->{num}, 1); @$xr3 = sort { $id2pos->{$a->[0]} <=> $id2pos->{$b->[0]} || $a->[1] <=> $b->[1] # break ties with {xnum} } @$xr3; my $new_smsg = $req->{new_smsg}; return if $xr3->[0]->[2] ne $new_smsg->oidbin; # loser # replace the old smsg with the more boosted one $new_smsg->{num} = $smsg->{num}; $new_smsg->populate($req->{eml}, $req); $req->{self}->{oidx}->add_overview($req->{eml}, $new_smsg); } sub remove_doc ($$) { my ($self, $docid) = @_; $self->{oidx}->delete_by_num($docid); $self->{oidx}->eidxq_del($docid); $self->idx_shard($docid)->ipc_do('xdb_remove', $docid); } sub _unref_doc ($$$$$;$) { my ($sync, $docid, $ibx, $xnum, $oidbin, $eml) = @_; my $smsg; if (ref($docid)) { $smsg = $docid; $docid = $smsg->{num}; } if (defined($oidbin) && defined($xnum) && blessed($ibx) && $ibx->over) { my $smsg = $ibx->over->get_art($xnum); if ($smsg && $smsg->oidbin eq $oidbin) { carp("BUG: (non-fatal) ".$ibx->eidx_key. " #$xnum $smsg->{blob} still valid"); return; } } my $s = 'DELETE FROM xref3 WHERE oidbin = ?'; $s .= ' AND ibx_id = ?' if defined($ibx); $s .= ' AND xnum = ?' if defined($xnum); my $del = $sync->{self}->{oidx}->dbh->prepare_cached($s); my $col = 0; $del->bind_param(++$col, $oidbin, SQL_BLOB); $del->bind_param(++$col, $ibx->{-ibx_id}) if $ibx; $del->bind_param(++$col, $xnum) if defined($xnum); $del->execute; my $xr3 = $sync->{self}->{oidx}->get_xref3($docid); if (scalar(@$xr3) == 0) { # all gone remove_doc($sync->{self}, $docid); } else { # enqueue for reindex of remaining messages if ($ibx) { my $ekey = $ibx->{-gc_eidx_key} // $ibx->eidx_key; my $idx = $sync->{self}->idx_shard($docid); $idx->ipc_do('remove_eidx_info', $docid, $ekey, $eml); } # else: we can't remove_eidx_info in reindex-only path # replace invalidated blob ASAP with something which should be # readable since we may commit the transaction on checkpoint. # eidxq processing will re-apply boost $smsg //= $sync->{self}->{oidx}->get_art($docid); my $hex = unpack('H*', $oidbin); if ($smsg && $smsg->{blob} eq $hex) { $xr3->[0] =~ /:([a-f0-9]{40,}+)\z/ or die "BUG: xref $xr3->[0] has no OID"; $sync->{self}->{oidx}->update_blob($smsg, $1); } # yes, add, we'll need to re-apply boost $sync->{self}->{oidx}->eidxq_add($docid); } @$xr3 } sub do_xpost ($$) { my ($req, $smsg) = @_; my $self = $req->{self}; my $docid = $smsg->{num}; my $oid = $req->{oid}; my $xibx = $req->{ibx}; my $eml = $req->{eml}; if (my $new_smsg = $req->{new_smsg}) { # 'm' on cross-posted message my $eidx_key = $xibx->eidx_key; my $xnum = $req->{xnum}; $self->{oidx}->add_xref3($docid, $xnum, $oid, $eidx_key); my $idx = $self->idx_shard($docid); $idx->ipc_do('add_eidx_info', $docid, $eidx_key, $eml); apply_boost($req, $smsg) if $req->{boost_in_use}; } else { # 'd' no {xnum} $self->git->async_wait_all; $oid = pack('H*', $oid); _unref_doc($req, $docid, $xibx, undef, $oid, $eml); } } # called by V2Writable::sync_prepare sub artnum_max { $_[0]->{oidx}->eidx_max } sub index_unseen ($) { my ($req) = @_; my $new_smsg = $req->{new_smsg} or die 'BUG: {new_smsg} unset'; my $eml = delete $req->{eml}; $new_smsg->populate($eml, $req); my $self = $req->{self}; my $docid = $self->{oidx}->adj_counter('eidx_docid', '+'); $new_smsg->{num} = $docid; my $idx = $self->idx_shard($docid); $self->{oidx}->add_overview($eml, $new_smsg); my $oid = $new_smsg->{blob}; my $ibx = delete $req->{ibx} or die 'BUG: {ibx} unset'; $self->{oidx}->add_xref3($docid, $req->{xnum}, $oid, $ibx->eidx_key); $idx->index_eml($eml, $new_smsg, $ibx->eidx_key); check_batch_limit($req); } sub do_finalize ($) { my ($req) = @_; if (my $indexed = $req->{indexed}) { # duplicated messages do_xpost($req, $_) for @$indexed; } elsif (exists $req->{new_smsg}) { # totally unseen messsage index_unseen($req); } else { # `d' message was already unindexed in the v1/v2 inboxes, # so it's too noisy to warn, here. } # cur_cmt may be undef for unindex_oid, set by V2Writable::index_todo if (defined(my $cur_cmt = $req->{cur_cmt})) { ${$req->{latest_cmt}} = $cur_cmt; } } sub do_step ($) { # main iterator for adding messages to the index my ($req) = @_; my $self = $req->{self} // die 'BUG: {self} missing'; while (1) { if (my $next_arg = $req->{next_arg}) { if (my $smsg = $self->{oidx}->next_by_mid(@$next_arg)) { $req->{cur_smsg} = $smsg; $self->git->cat_async($smsg->{blob}, \&ck_existing, $req); return; # ck_existing calls do_step } delete $req->{next_arg}; } die "BUG: {cur_smsg} still set" if $req->{cur_smsg}; my $mid = shift(@{$req->{mids}}) // last; my ($id, $prev); $req->{next_arg} = [ $mid, \$id, \$prev ]; # loop again } do_finalize($req); } sub _blob_missing ($$) { # called when a known $smsg->{blob} is gone my ($req, $smsg) = @_; # xnum and ibx are unknown, we only call this when an entry from # /ei*/over.sqlite3 is bad, not on entries from xap*/over.sqlite3 $req->{self}->git->async_wait_all; _unref_doc($req, $smsg, undef, undef, $smsg->oidbin); } sub ck_existing { # git->cat_async callback my ($bref, $oid, $type, $size, $req) = @_; my $smsg = delete $req->{cur_smsg} or die 'BUG: {cur_smsg} missing'; if ($type eq 'missing') { _blob_missing($req, $smsg); } elsif (!is_bad_blob($oid, $type, $size, $smsg->{blob})) { my $self = $req->{self} // die 'BUG: {self} missing'; local $self->{current_info} = "$self->{current_info} $oid"; my $cur = PublicInbox::Eml->new($bref); if (content_hash($cur) eq $req->{chash}) { push @{$req->{indexed}}, $smsg; # for do_xpost } # else { index_unseen later } } do_step($req); } # is the messages visible in the inbox currently being indexed? # return the number if so sub cur_ibx_xnum ($$;$) { my ($req, $bref, $mismatch) = @_; my $ibx = $req->{ibx} or die 'BUG: current {ibx} missing'; $req->{eml} = PublicInbox::Eml->new($bref); $req->{chash} = content_hash($req->{eml}); $req->{mids} = mids($req->{eml}); for my $mid (@{$req->{mids}}) { my ($id, $prev); while (my $x = $ibx->over->next_by_mid($mid, \$id, \$prev)) { return $x->{num} if $x->{blob} eq $req->{oid}; push @$mismatch, $x if $mismatch; } } undef; } sub index_oid { # git->cat_async callback for 'm' my ($bref, $oid, $type, $size, $req) = @_; my $self = $req->{self}; local $self->{current_info} = "$self->{current_info} $oid"; return if is_bad_blob($oid, $type, $size, $req->{oid}); my $new_smsg = $req->{new_smsg} = bless { blob => $oid, }, 'PublicInbox::Smsg'; $new_smsg->set_bytes($$bref, $size); ++${$req->{nr}}; my $mismatch = []; $req->{xnum} = cur_ibx_xnum($req, $bref, $mismatch) // do { warn "# deleted\n"; warn "# mismatch $_->{blob}\n" for @$mismatch; ${$req->{latest_cmt}} = $req->{cur_cmt} // die "BUG: {cur_cmt} unset ($oid)\n"; return; }; do_step($req); } sub unindex_oid { # git->cat_async callback for 'd' my ($bref, $oid, $type, $size, $req) = @_; my $self = $req->{self}; local $self->{current_info} = "$self->{current_info} $oid"; return if is_bad_blob($oid, $type, $size, $req->{oid}); return if defined(cur_ibx_xnum($req, $bref)); # was re-added do_step($req); } # overrides V2Writable::last_commits, called by sync_ranges via sync_prepare sub last_commits { my ($self, $sync) = @_; my $heads = []; my $ekey = $sync->{ibx}->eidx_key; my $uv = $sync->{ibx}->uidvalidity; for my $i (0..$sync->{epoch_max}) { $heads->[$i] = $self->{oidx}->eidx_meta("lc-v2:$ekey//$uv;$i"); } $heads; } sub _ibx_index_reject ($) { my ($ibx) = @_; $ibx->mm // return 'unindexed, no msgmap.sqlite3'; $ibx->uidvalidity // return 'no UIDVALIDITY'; $ibx->over // return 'unindexed, no over.sqlite3'; undef; } sub _sync_inbox ($$$) { my ($self, $sync, $ibx) = @_; my $ekey = $ibx->eidx_key; if (defined(my $err = _ibx_index_reject($ibx))) { return "W: skipping $ekey ($err)"; } $sync->{ibx} = $ibx; $sync->{nr} = \(my $nr = 0); my $v = $ibx->version; if ($v == 2) { $sync->{epoch_max} = $ibx->max_git_epoch // return; sync_prepare($self, $sync); # or return # TODO: once MiscIdx is stable } elsif ($v == 1) { my $uv = $ibx->uidvalidity; my $lc = $self->{oidx}->eidx_meta("lc-v1:$ekey//$uv"); my $head = $ibx->mm->last_commit // return "E: $ibx->{inboxdir} is not indexed"; my $stk = prepare_stack($sync, $lc ? "$lc..$head" : $head); my $unit = { stack => $stk, git => $ibx->git }; push @{$sync->{todo}}, $unit; } else { return "E: $ekey unsupported inbox version (v$v)"; } for my $unit (@{delete($sync->{todo}) // []}) { last if $sync->{quit}; index_todo($self, $sync, $unit); } $self->{midx}->index_ibx($ibx) unless $sync->{quit}; $ibx->git->cleanup; # done with this inbox, now undef; } sub eidx_gc_scan_inboxes ($$) { my ($self, $sync) = @_; my ($x3_doc, $ibx_ck); restart: $x3_doc = $self->{oidx}->dbh->prepare(<{oidx}->dbh->prepare(<execute; while (my ($ibx_id, $eidx_key) = $ibx_ck->fetchrow_array) { next if $self->{ibx_map}->{$eidx_key}; $self->{midx}->remove_eidx_key($eidx_key); warn "I: deleting messages for $eidx_key...\n"; $x3_doc->execute($ibx_id); my $ibx = { -ibx_id => $ibx_id, -gc_eidx_key => $eidx_key }; while (my ($docid, $xnum, $oid) = $x3_doc->fetchrow_array) { my $r = _unref_doc($sync, $docid, $ibx, $xnum, $oid); $oid = unpack('H*', $oid); $r = $r ? 'unref' : 'remove'; warn "I: $r #$docid $eidx_key $oid\n"; if (checkpoint_due($sync)) { $x3_doc = $ibx_ck = undef; reindex_checkpoint($self, $sync); goto restart; } } $self->{oidx}->dbh->do(<<'', undef, $ibx_id); DELETE FROM inboxes WHERE ibx_id = ? # drop last_commit info my $pat = $eidx_key; $pat =~ s/([_%\\])/\\$1/g; $self->{oidx}->dbh->do('PRAGMA case_sensitive_like = ON'); my $lc_i = $self->{oidx}->dbh->prepare(<<''); SELECT key FROM eidx_meta WHERE key LIKE ? ESCAPE ? $lc_i->execute("lc-%:$pat//%", '\\'); while (my ($key) = $lc_i->fetchrow_array) { next if $key !~ m!\Alc-v[1-9]+:\Q$eidx_key\E//!; warn "I: removing $key\n"; $self->{oidx}->dbh->do(<<'', undef, $key); DELETE FROM eidx_meta WHERE key = ? } warn "I: $eidx_key removed\n"; } } sub eidx_gc_scan_shards ($$) { # TODO: use for lei/store my ($self, $sync) = @_; my $nr = $self->{oidx}->dbh->do(<<''); DELETE FROM xref3 WHERE docid NOT IN (SELECT num FROM over) warn "I: eliminated $nr stale xref3 entries\n" if $nr != 0; reindex_checkpoint($self, $sync) if checkpoint_due($sync); # fixup from old bugs: $nr = $self->{oidx}->dbh->do(<<''); DELETE FROM over WHERE num > 0 AND num NOT IN (SELECT docid FROM xref3) warn "I: eliminated $nr stale over entries\n" if $nr != 0; reindex_checkpoint($self, $sync) if checkpoint_due($sync); $nr = $self->{oidx}->dbh->do(<<''); DELETE FROM eidxq WHERE docid NOT IN (SELECT num FROM over) warn "I: eliminated $nr stale reindex queue entries\n" if $nr != 0; reindex_checkpoint($self, $sync) if checkpoint_due($sync); my ($cur) = $self->{oidx}->dbh->selectrow_array(< 0 EOM $cur // return; # empty my ($r, $n, %active_shards); $nr = 0; while (1) { $r = $self->{oidx}->dbh->selectcol_arrayref(<<"", undef, $cur); SELECT num FROM over WHERE num >= ? ORDER BY num ASC LIMIT 10000 last unless scalar(@$r); while (defined($n = shift @$r)) { for my $i ($cur..($n - 1)) { my $idx = idx_shard($self, $i); $idx->ipc_do('xdb_remove_quiet', $i); $active_shards{$idx} = $idx; } $cur = $n + 1; } if (checkpoint_due($sync)) { for my $idx (values %active_shards) { $nr += $idx->ipc_do('nr_quiet_rm') } %active_shards = (); reindex_checkpoint($self, $sync); } } warn "I: eliminated $nr stale Xapian documents\n" if $nr != 0; } sub eidx_gc { my ($self, $opt) = @_; $self->{cfg} or die "E: GC requires ->attach_config\n"; $opt->{-idx_gc} = 1; my $sync = { need_checkpoint => \(my $need_checkpoint = 0), check_intvl => 10, next_check => now() + 10, checkpoint_unlocks => 1, -opt => $opt, self => $self, }; $self->idx_init($opt); # acquire lock via V2Writable::_idx_init eidx_gc_scan_inboxes($self, $sync); eidx_gc_scan_shards($self, $sync); done($self); } sub _ibx_for ($$$) { my ($self, $sync, $smsg) = @_; my $ibx_id = delete($smsg->{ibx_id}) // die '{ibx_id} unset'; my $pos = $sync->{id2pos}->{$ibx_id} // die "$ibx_id no pos"; $self->{-ibx_ary_known}->[$pos] // die "BUG: ibx for $smsg->{blob} not mapped" } sub _fd_constrained ($) { my ($self) = @_; $self->{-fd_constrained} //= do { my $soft; if (eval { require BSD::Resource; 1 }) { my $NOFILE = BSD::Resource::RLIMIT_NOFILE(); ($soft, undef) = BSD::Resource::getrlimit($NOFILE); } else { chomp($soft = `sh -c 'ulimit -n'`); } if (defined($soft)) { # $want is an estimate my $want = scalar(@{$self->{ibx_active}}) + 64; my $ret = $want > $soft; if ($ret) { warn <{sync}; my $self = $sync->{self}; my $by_chash = delete $req->{by_chash} or die 'BUG: no {by_chash}'; my $nr = scalar(keys(%$by_chash)) or die 'BUG: no content hashes'; my $orig_smsg = $req->{orig_smsg} // die 'BUG: no {orig_smsg}'; my $docid = $smsg->{num} = $orig_smsg->{num}; $self->{oidx}->add_overview($eml, $smsg); # may rethread check_batch_limit({ %$sync, new_smsg => $smsg }); my $chash0 = $smsg->{chash} // die "BUG: $smsg->{blob} no {chash}"; my $stable = delete($by_chash->{$chash0}) // die "BUG: $smsg->{blob} chash missing"; my $idx = $self->idx_shard($docid); my $top_smsg = pop @$stable; $top_smsg == $smsg or die 'BUG: top_smsg != smsg'; my $ibx = _ibx_for($self, $sync, $smsg); $idx->index_eml($eml, $smsg, $ibx->eidx_key); for my $x (reverse @$stable) { $ibx = _ibx_for($self, $sync, $x); my $hdr = delete $x->{hdr} // die 'BUG: no {hdr}'; $idx->ipc_do('add_eidx_info', $docid, $ibx->eidx_key, $hdr); } return if $nr == 1; # likely, all good $self->git->async_wait_all; warn "W: #$docid split into $nr due to deduplication change\n"; my @todo; for my $ary (values %$by_chash) { for my $x (reverse @$ary) { warn "removing #$docid xref3 $x->{blob}\n"; my $bin = $x->oidbin; my $n = _unref_doc($sync, $docid, undef, undef, $bin); die "BUG: $x->{blob} invalidated #$docid" if $n == 0; } my $x = pop(@$ary) // die "BUG: #$docid {by_chash} empty"; $x->{num} = delete($x->{xnum}) // die '{xnum} unset'; $ibx = _ibx_for($self, $sync, $x); if (my $over = $ibx->over) { my $e = $over->get_art($x->{num}); $e->{blob} eq $x->{blob} or die <{blob} != $e->{blob} (${\$ibx->eidx_key}:$e->{num}); EOF push @todo, $ibx, $e; $over->dbh_close if _fd_constrained($self); } else { die "$ibx->{inboxdir}: over.sqlite3 unusable: $!\n"; } } undef $by_chash; while (my ($ibx, $e) = splice(@todo, 0, 2)) { reindex_unseen($self, $sync, $ibx, $e); } } sub _reindex_oid { # git->cat_async callback my ($bref, $oid, $type, $size, $req) = @_; my $sync = $req->{sync}; my $self = $sync->{self}; my $orig_smsg = $req->{orig_smsg} // die 'BUG: no {orig_smsg}'; my $expect_oid = $req->{xr3r}->[$req->{ix}]->[2]; my $docid = $orig_smsg->{num}; if (is_bad_blob($oid, $type, $size, $expect_oid)) { my $oidbin = pack('H*', $expect_oid); my $remain = _unref_doc($sync, $docid, undef, undef, $oidbin); if ($remain == 0) { warn "W: #$docid ($oid) gone or corrupt\n"; } elsif (my $next_oid = $req->{xr3r}->[++$req->{ix}]->[2]) { $self->git->cat_async($next_oid, \&_reindex_oid, $req); } else { warn "BUG: #$docid ($oid) gone (UNEXPECTED)\n"; } return; } my $ci = $self->{current_info}; local $self->{current_info} = "$ci #$docid $oid"; my $re_smsg = bless { blob => $oid }, 'PublicInbox::Smsg'; $re_smsg->set_bytes($$bref, $size); my $eml = PublicInbox::Eml->new($bref); $re_smsg->populate($eml, { autime => $orig_smsg->{ds}, cotime => $orig_smsg->{ts} }); my $chash = content_hash($eml); $re_smsg->{chash} = $chash; $re_smsg->{xnum} = $req->{xr3r}->[$req->{ix}]->[1]; $re_smsg->{ibx_id} = $req->{xr3r}->[$req->{ix}]->[0]; $re_smsg->{hdr} = $eml->header_obj; push @{$req->{by_chash}->{$chash}}, $re_smsg; if (my $next_oid = $req->{xr3r}->[++$req->{ix}]->[2]) { $self->git->cat_async($next_oid, \&_reindex_oid, $req); } else { # last $re_smsg is the highest priority xref3 local $self->{current_info} = "$ci #$docid"; _reindex_finalize($req, $re_smsg, $eml); } } sub _reindex_smsg ($$$) { my ($self, $sync, $smsg) = @_; my $docid = $smsg->{num}; my $xr3 = $self->{oidx}->get_xref3($docid, 1); if (scalar(@$xr3) == 0) { # _reindex_check_stale should've covered this warn <<""; BUG? #$docid $smsg->{blob} is not referenced by inboxes during reindex remove_doc($self, $docid); return; } # we sort {xr3r} in the reverse order of ibx_sorted so we can # hit the common case in _reindex_finalize without rereading # from git (or holding multiple messages in memory). my $id2pos = $sync->{id2pos}; # index in ibx_sorted @$xr3 = sort { $id2pos->{$b->[0]} <=> $id2pos->{$a->[0]} || $b->[1] <=> $a->[1] # break ties with {xnum} } @$xr3; @$xr3 = map { [ $_->[0], $_->[1], unpack('H*', $_->[2]) ] } @$xr3; my $req = { orig_smsg => $smsg, sync => $sync, xr3r => $xr3, ix => 0 }; $self->git->cat_async($xr3->[$req->{ix}]->[2], \&_reindex_oid, $req); } sub checkpoint_due ($) { my ($sync) = @_; ${$sync->{need_checkpoint}} || (now() > $sync->{next_check}); } sub host_ident () { # I've copied FS images and only changed the hostname before, # so prepend hostname. Use `state' since these a BOFH can change # these while this process is running and we always want to be # able to release locks taken by this process. state $retval = hostname . '-' . do { my $m; # machine-id(5) is systemd if (open(my $fh, '<', '/etc/machine-id')) { $m = <$fh> } # (g)hostid(1) is in GNU coreutils, kern.hostid is most BSDs chomp($m ||= `{ sysctl -n kern.hostid || hostid || ghostid; } 2>/dev/null` || "no-machine-id-or-hostid-on-$^O"); $m; }; } sub eidxq_release { my ($self) = @_; my $expect = delete($self->{-eidxq_locked}) or return; my ($owner_pid, undef) = split(/-/, $expect); return if $owner_pid != $$; # shards may fork my $oidx = $self->{oidx}; $oidx->begin_lazy; my $cur = $oidx->eidx_meta('eidxq_lock') // ''; if ($cur eq $expect) { $oidx->eidx_meta('eidxq_lock', ''); return 1; } elsif ($cur ne '') { warn "E: eidxq_lock($expect) stolen by $cur\n"; } else { warn "E: eidxq_lock($expect) released by another process\n"; } undef; } sub DESTROY { my ($self) = @_; eidxq_release($self) and $self->{oidx}->commit_lazy; } sub _eidxq_take ($) { my ($self) = @_; my $val = "$$-${\time}-$>-".host_ident; $self->{oidx}->eidx_meta('eidxq_lock', $val); $self->{-eidxq_locked} = $val; } sub eidxq_lock_acquire ($) { my ($self) = @_; my $oidx = $self->{oidx}; $oidx->begin_lazy; my $cur = $oidx->eidx_meta('eidxq_lock') || return _eidxq_take($self); if (my $locked = $self->{-eidxq_locked}) { # be lazy return $locked if $locked eq $cur; } my ($pid, $time, $euid, $ident) = split(/-/, $cur, 4); my $t = strftime('%Y-%m-%d %k:%M %z', localtime($time)); local $self->{current_info} = 'eidxq'; if ($euid == $> && $ident eq host_ident) { if (kill(0, $pid)) { warn <dbh->sqlite_db_filename; warn <{"-ibx_ary_$type"} //= do { # highest boost first, stable for config-ordering tiebreaker use sort 'stable'; [ sort { ($b->{boost} // 0) <=> ($a->{boost} // 0) } @{$self->{'ibx_'.$type} // die "BUG: $type unknown"} ]; } } sub prep_id2pos ($) { my ($self) = @_; my %id2pos; my $pos = 0; $id2pos{$_->{-ibx_id}} = $pos++ for (@{ibx_sorted($self, 'known')}); \%id2pos; } sub eidxq_process ($$) { # for reindexing my ($self, $sync) = @_; local $self->{current_info} = 'eidxq process'; return unless ($self->{cfg} && eidxq_lock_acquire($self)); my $dbh = $self->{oidx}->dbh; my $tot = $dbh->selectrow_array('SELECT COUNT(*) FROM eidxq') or return; ${$sync->{nr}} = 0; local $sync->{-regen_fmt} = "%u/$tot\n"; my $pr = $sync->{-opt}->{-progress}; if ($pr) { my $min = $dbh->selectrow_array('SELECT MIN(docid) FROM eidxq'); my $max = $dbh->selectrow_array('SELECT MAX(docid) FROM eidxq'); $pr->("Xapian indexing $min..$max (total=$tot)\n"); } $sync->{id2pos} //= prep_id2pos($self); my ($del, $iter); restart: $del = $dbh->prepare('DELETE FROM eidxq WHERE docid = ?'); $iter = $dbh->prepare('SELECT docid FROM eidxq ORDER BY docid ASC'); $iter->execute; while (defined(my $docid = $iter->fetchrow_array)) { last if $sync->{quit}; if (my $smsg = $self->{oidx}->get_art($docid)) { _reindex_smsg($self, $sync, $smsg); } else { warn "E: #$docid does not exist in over\n"; } $del->execute($docid); ++${$sync->{nr}}; if (checkpoint_due($sync)) { $dbh = $del = $iter = undef; reindex_checkpoint($self, $sync); # release lock $dbh = $self->{oidx}->dbh; goto restart; } } $self->git->async_wait_all; $pr->("reindexed ${$sync->{nr}}/$tot\n") if $pr; } sub _reindex_unseen { # git->cat_async callback my ($bref, $oid, $type, $size, $req) = @_; return if is_bad_blob($oid, $type, $size, $req->{oid}); my $self = $req->{self} // die 'BUG: {self} unset'; local $self->{current_info} = "$self->{current_info} $oid"; my $new_smsg = bless { blob => $oid, }, 'PublicInbox::Smsg'; $new_smsg->set_bytes($$bref, $size); my $eml = $req->{eml} = PublicInbox::Eml->new($bref); $req->{new_smsg} = $new_smsg; $req->{chash} = content_hash($eml); $req->{mids} = mids($eml); # do_step iterates through this do_step($req); # enter the normal indexing flow } # --reindex may catch totally unseen messages, this handles them sub reindex_unseen ($$$$) { my ($self, $sync, $ibx, $xsmsg) = @_; my $req = { %$sync, # has {self} autime => $xsmsg->{ds}, cotime => $xsmsg->{ts}, oid => $xsmsg->{blob}, ibx => $ibx, xnum => $xsmsg->{num}, # {mids} and {chash} will be filled in at _reindex_unseen }; warn "I: reindex_unseen ${\$ibx->eidx_key}:$req->{xnum}:$req->{oid}\n"; $self->git->cat_async($xsmsg->{blob}, \&_reindex_unseen, $req); } sub _unref_stale_range ($$$) { my ($sync, $ibx, $lt_or_gt) = @_; my $r; my $lim = 10000; do { $r = $sync->{self}->{oidx}->dbh->selectall_arrayref( <{-ibx_id}); SELECT docid,xnum,oidbin FROM xref3 WHERE ibx_id = ? AND $lt_or_gt LIMIT $lim EOS return if $sync->{quit}; for (@$r) { # hopefully rare, not worth optimizing: my ($docid, $xnum, $oidbin) = @$_; my $hex = unpack('H*', $oidbin); warn("# $xnum:$hex (#$docid): stale\n"); _unref_doc($sync, $docid, $ibx, $xnum, $oidbin); } } while (scalar(@$r) == $lim); 1; } sub _reindex_check_ibx ($$$) { my ($self, $sync, $ibx) = @_; my $ibx_id = $ibx->{-ibx_id}; my $slice = 10000; my $opt = { limit => $slice }; my ($beg, $end) = (1, $slice); my $ekey = $ibx->eidx_key; my ($max, $max0); do { $max0 = $ibx->mm->num_highwater; sync_inbox($self, $sync, $ibx) and return; # warned $max = $ibx->mm->num_highwater; return if $sync->{quit}; } while ($max > $max0 && warn("# $ekey moved $max0..$max, resyncing..\n")); $end = $max if $end > $max; # first, check if we missed any messages in target $ibx my $msgs; my $pr = $sync->{-opt}->{-progress}; local $sync->{-regen_fmt} = "$ekey checking %u/$max\n"; ${$sync->{nr}} = 0; my $fast = $sync->{-opt}->{fast}; my $usr; # _unref_stale_range (< $lo) called my ($lo, $hi); while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end, $opt)})) { ${$sync->{nr}} = $beg; $beg = $msgs->[-1]->{num} + 1; $end = $beg + $slice; $end = $max if $end > $max; if (checkpoint_due($sync)) { reindex_checkpoint($self, $sync); # release lock } ($lo, $hi) = ($msgs->[0]->{num}, $msgs->[-1]->{num}); $usr //= _unref_stale_range($sync, $ibx, "xnum < $lo"); my $x3a = $self->{oidx}->dbh->selectall_arrayref( <<"", undef, $ibx_id, $lo, $hi); SELECT xnum,oidbin,docid FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ? my %x3m; for (@$x3a) { my $k = pack('J', $_->[0]) . $_->[1]; push @{$x3m{$k}}, $_->[2]; } undef $x3a; for my $xsmsg (@$msgs) { my $k = pack('JH*', $xsmsg->{num}, $xsmsg->{blob}); my $docids = delete($x3m{$k}); if (!defined($docids)) { reindex_unseen($self, $sync, $ibx, $xsmsg); } elsif (!$fast) { for my $num (@$docids) { $self->{oidx}->eidxq_add($num); } } return if $sync->{quit}; } next unless scalar keys %x3m; $self->git->async_wait_all; # wait for reindex_unseen # eliminate stale/mismatched entries my %mismatch = map { $_->{num} => $_->{blob} } @$msgs; while (my ($k, $docids) = each %x3m) { my ($xnum, $hex) = unpack('JH*', $k); my $bin = pack('H*', $hex); my $exp = $mismatch{$xnum}; if (defined $exp) { my $smsg = $ibx->over->get_art($xnum) // next; # $xnum may be expired by another process if ($smsg->{blob} eq $hex) { warn <<""; BUG: (non-fatal) $ekey #$xnum $smsg->{blob} still matches (old exp: $exp) next; } # else: continue to unref } my $m = defined($exp) ? "mismatch (!= $exp)" : 'stale'; warn("# $xnum:$hex (#@$docids): $m\n"); for my $i (@$docids) { _unref_doc($sync, $i, $ibx, $xnum, $bin); } return if $sync->{quit}; } } defined($hi) and ($hi < $max) and _unref_stale_range($sync, $ibx, "xnum > $hi AND xnum <= $max"); } sub _reindex_inbox ($$$) { my ($self, $sync, $ibx) = @_; my $ekey = $ibx->eidx_key; local $self->{current_info} = $ekey; if (defined(my $err = _ibx_index_reject($ibx))) { warn "W: cannot reindex $ekey ($err)\n"; } else { _reindex_check_ibx($self, $sync, $ibx); } delete @$ibx{qw(over mm search git)}; # won't need these for a bit } sub eidx_reindex { my ($self, $sync) = @_; return unless $self->{cfg}; # acquire eidxq_lock early because full reindex takes forever # and incremental -extindex processes can run during our checkpoints if (!eidxq_lock_acquire($self)) { warn "E: aborting --reindex\n"; return; } for my $ibx (@{ibx_sorted($self, 'active')}) { _reindex_inbox($self, $sync, $ibx); last if $sync->{quit}; } $self->git->async_wait_all; # ensure eidxq gets filled completely eidxq_process($self, $sync) unless $sync->{quit}; } sub sync_inbox { my ($self, $sync, $ibx) = @_; my $err = _sync_inbox($self, $sync, $ibx); delete @$ibx{qw(mm over)}; warn $err, "\n" if defined($err); $err; } sub dd_smsg { # git->cat_async callback my ($bref, $oid, $type, $size, $dd) = @_; my $smsg = $dd->{smsg} // die 'BUG: dd->{smsg} missing'; my $self = $dd->{self} // die 'BUG: {self} missing'; my $per_mid = $dd->{per_mid} // die 'BUG: {per_mid} missing'; if ($type eq 'missing') { _blob_missing($dd, $smsg); } elsif (!is_bad_blob($oid, $type, $size, $smsg->{blob})) { local $self->{current_info} = "$self->{current_info} $oid"; my $chash = content_hash(PublicInbox::Eml->new($bref)); push(@{$per_mid->{dd_chash}->{$chash}}, $smsg); } return if $per_mid->{last_smsg} != $smsg; while (my ($chash, $ary) = each %{$per_mid->{dd_chash}}) { my $keep = shift @$ary; next if !scalar(@$ary); $per_mid->{sync}->{dedupe_cull} += scalar(@$ary); print STDERR "# <$keep->{mid}> keeping #$keep->{num}, dropping ", join(', ', map { "#$_->{num}" } @$ary),"\n"; next if $per_mid->{sync}->{-opt}->{'dry-run'}; my $oidx = $self->{oidx}; for my $smsg (@$ary) { my $gone = $smsg->{num}; $oidx->merge_xref3($keep->{num}, $gone, $smsg->oidbin); remove_doc($self, $gone); } } } sub eidx_dedupe ($$$) { my ($self, $sync, $msgids) = @_; $sync->{dedupe_cull} = 0; my $candidates = 0; my $nr_mid = 0; return unless eidxq_lock_acquire($self); my ($iter, $cur_mid); my $min_id = 0; my $idx = 0; my ($max_id) = $self->{oidx}->dbh->selectrow_array(<{-regen_fmt} = "dedupe %u/$max_id\n"; # note: we could write this query more intelligently, # but that causes lock contention with read-only processes dedupe_restart: $cur_mid = $msgids->[$idx]; if ($cur_mid eq '') { # all Message-IDs $iter = $self->{oidx}->dbh->prepare(< ? ORDER BY id ASC EOS $iter->execute($min_id); } else { $iter = $self->{oidx}->dbh->prepare(< ? ORDER BY id ASC EOS $iter->execute($cur_mid, $min_id); } while (my ($mid, $id) = $iter->fetchrow_array) { last if $sync->{quit}; $self->{current_info} = "dedupe $mid"; ${$sync->{nr}} = $min_id = $id; my ($prv, @smsg); while (my $x = $self->{oidx}->next_by_mid($mid, \$id, \$prv)) { push @smsg, $x; } next if scalar(@smsg) < 2; my $per_mid = { dd_chash => {}, # chash => [ary of smsgs] last_smsg => $smsg[-1], sync => $sync }; $nr_mid++; $candidates += scalar(@smsg) - 1; for my $smsg (@smsg) { my $dd = { per_mid => $per_mid, smsg => $smsg, self => $self, }; $self->git->cat_async($smsg->{blob}, \&dd_smsg, $dd); } # need to wait on every single one @smsg contents can get # invalidated inside dd_smsg for messages with multiple # Message-IDs. $self->git->async_wait_all; if (checkpoint_due($sync)) { undef $iter; reindex_checkpoint($self, $sync); goto dedupe_restart; } } goto dedupe_restart if defined($msgids->[++$idx]); my $n = delete $sync->{dedupe_cull}; if (my $pr = $sync->{-opt}->{-progress}) { $pr->("culled $n/$candidates candidates ($nr_mid msgids)\n"); } ${$sync->{nr}} = 0; } sub eidx_sync { # main entry point my ($self, $opt) = @_; my $warn_cb = $SIG{__WARN__} || \&CORE::warn; local $self->{current_info} = ''; local $SIG{__WARN__} = sub { return if PublicInbox::Eml::warn_ignore(@_); $warn_cb->($self->{current_info}, ': ', @_); }; $self->idx_init($opt); # acquire lock via V2Writable::_idx_init $self->{oidx}->rethread_prepare($opt); my $sync = { need_checkpoint => \(my $need_checkpoint = 0), check_intvl => 10, next_check => now() + 10, -opt => $opt, # DO NOT SET {reindex} here, it's incompatible with reused # V2Writable code, reindex is totally different here # compared to v1/v2 inboxes because we have multiple histories self => $self, -regen_fmt => "%u/?\n", }; local $SIG{USR1} = sub { $need_checkpoint = 1 }; my $quit = PublicInbox::SearchIdx::quit_cb($sync); local $SIG{QUIT} = $quit; local $SIG{INT} = $quit; local $SIG{TERM} = $quit; for my $ibx (@{ibx_sorted($self, 'known')}) { $ibx->{-ibx_id} //= $self->{oidx}->ibx_id($ibx->eidx_key); } if (scalar(grep { defined($_->{boost}) } @{$self->{ibx_known}})) { $sync->{id2pos} //= prep_id2pos($self); $sync->{boost_in_use} = 1; } if (my $msgids = delete($opt->{dedupe})) { local $sync->{checkpoint_unlocks} = 1; eidx_dedupe($self, $sync, $msgids); } if (delete($opt->{reindex})) { local $sync->{checkpoint_unlocks} = 1; eidx_reindex($self, $sync); } # don't use $_ here, it'll get clobbered by reindex_checkpoint if ($opt->{scan} // 1) { for my $ibx (@{ibx_sorted($self, 'active')}) { last if $sync->{quit}; sync_inbox($self, $sync, $ibx); } } $self->{oidx}->rethread_done($opt) unless $sync->{quit}; eidxq_process($self, $sync) unless $sync->{quit}; eidxq_release($self); done($self); $sync; # for eidx_watch } sub update_last_commit { # overrides V2Writable my ($self, $sync, $stk) = @_; my $unit = $sync->{unit} // return; my $latest_cmt = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}}; defined($latest_cmt) or return; my $ibx = $sync->{ibx} or die 'BUG: {ibx} missing'; my $ekey = $ibx->eidx_key; my $uv = $ibx->uidvalidity; my $epoch = $unit->{epoch}; my $meta_key; my $v = $ibx->version; if ($v == 2) { die 'No {epoch} for v2 unit' unless defined $epoch; $meta_key = "lc-v2:$ekey//$uv;$epoch"; } elsif ($v == 1) { die 'Unexpected {epoch} for v1 unit' if defined $epoch; $meta_key = "lc-v1:$ekey//$uv"; } else { die "Unsupported inbox version: $v"; } my $last = $self->{oidx}->eidx_meta($meta_key); if (defined $last && is_ancestor($self->git, $last, $latest_cmt)) { my @cmd = (qw(rev-list --count), "$last..$latest_cmt"); chomp(my $n = $unit->{git}->qx(@cmd)); return if $n ne '' && $n == 0; } $self->{oidx}->eidx_meta($meta_key, $latest_cmt); } sub _idx_init { # with_umask callback my ($self, $opt) = @_; PublicInbox::V2Writable::_idx_init($self, $opt); # acquires ei.lock $self->{midx} = PublicInbox::MiscIdx->new($self); } sub symlink_packs ($$) { my ($ibx, $pd) = @_; my $ret = 0; my $glob = "$ibx->{inboxdir}/git/*.git/objects/pack/*.idx"; for my $idx (bsd_glob($glob, GLOB_NOSORT)) { my $src = substr($idx, 0, -length('.idx')); my $dst = $pd . substr($src, rindex($src, '/')); if (-f "$src.pack" and symlink("$src.pack", "$dst.pack") and symlink($idx, "$dst.idx") and -f $idx) { ++$ret; # .promisor, .bitmap, .rev and .keep are optional # XXX should we symlink .keep here? for my $s (qw(promisor bitmap rev)) { symlink("$src.$s", "$dst.$s") if -f "$src.$s"; } } elsif (!$!{EEXIST}) { warn "W: ln -s $src.{pack,idx} => $dst.*: $!\n"; unlink "$dst.pack", "$dst.idx"; } } $ret; } sub idx_init { # similar to V2Writable my ($self, $opt) = @_; return if $self->{idx_shards}; $self->git->cleanup; my $mode = 0644; my $ALL = $self->git->{git_dir}; # topdir/ALL.git my ($has_new, $alt, $seen, $prune, $prune_nr); if ($opt->{-private}) { # LeiStore my $local = "$self->{topdir}/local"; # lei/store $self->{mg} //= PublicInbox::MultiGit->new($self->{topdir}, 'ALL.git', 'local'); $mode = 0600; unless (-d $ALL) { umask 077; # don't bother restoring for lei PublicInbox::Import::init_bare($ALL); $self->git->qx(qw(config core.sharedRepository 0600)); } ($alt, $seen) = $self->{mg}->read_alternates(\$mode); $has_new = $self->{mg}->merge_epochs($alt, $seen); } else { # extindex has no epochs $self->{mg} //= PublicInbox::MultiGit->new($self->{topdir}, 'ALL.git'); $prune = $opt->{-idx_gc} ? \$prune_nr : undef; ($alt, $seen) = $self->{mg}->read_alternates(\$mode, $prune); PublicInbox::Import::init_bare($ALL); } # git-multi-pack-index(1) can speed up "git cat-file" startup slightly my $git_midx = 0; my $pd = "$ALL/objects/pack"; if (opendir(my $dh, $pd)) { # drop stale symlinks while (defined(my $dn = readdir($dh))) { if ($dn =~ /\.(?:idx|pack|promisor|bitmap|rev)\z/) { my $f = "$pd/$dn"; unlink($f) if -l $f && !-e $f; } } } elsif ($!{ENOENT}) { mkdir($pd) or die "mkdir($pd): $!"; } else { die "opendir($pd): $!"; } my $new = ''; for my $ibx (@{ibx_sorted($self, 'active')}) { # create symlinks for multi-pack-index $git_midx += symlink_packs($ibx, $pd); # add new lines to our alternates file my $d = $ibx->git->{git_dir} . '/objects'; next if exists $alt->{$d}; if (my @st = stat($d)) { next if $seen->{"$st[0]\0$st[1]"}++; } else { warn "W: stat($d) failed (from $ibx->{inboxdir}): $!\n"; next if $opt->{-idx_gc}; } $new .= "$d\n"; } ($has_new || $prune_nr || $new ne '') and $self->{mg}->write_alternates($mode, $alt, $new); $git_midx and $self->with_umask(sub { my @cmd = ('multi-pack-index'); push @cmd, '--no-progress' if ($opt->{quiet}//0) > 1; my $lk = $self->lock_for_scope; system('git', "--git-dir=$ALL", @cmd, 'write'); # ignore errors, fairly new command, may not exist }); $self->parallel_init($self->{indexlevel}); $self->with_umask(\&_idx_init, $self, $opt); $self->{oidx}->begin_lazy; $self->{oidx}->eidx_prep; $self->{midx}->create_xdb if $new ne ''; } sub _watch_commit { # PublicInbox::DS::add_timer callback my ($self) = @_; delete $self->{-commit_timer}; eidxq_process($self, $self->{-watch_sync}); eidxq_release($self); my $fmt = delete $self->{-watch_sync}->{-regen_fmt}; reindex_checkpoint($self, $self->{-watch_sync}); $self->{-watch_sync}->{-regen_fmt} = $fmt; # call event_step => done unless commit_timer is armed PublicInbox::DS::requeue($self); } sub on_inbox_unlock { # called by PublicInbox::InboxIdle my ($self, $ibx) = @_; my $opt = $self->{-watch_sync}->{-opt}; my $pr = $opt->{-progress}; my $ekey = $ibx->eidx_key; local $0 = "sync $ekey"; $pr->("indexing $ekey\n") if $pr; $self->idx_init($opt); sync_inbox($self, $self->{-watch_sync}, $ibx); $self->{-commit_timer} //= add_timer($opt->{'commit-interval'} // 10, \&_watch_commit, $self); } sub eidx_reload { # -extindex --watch SIGHUP handler my ($self, $idler) = @_; if ($self->{cfg}) { my $pr = $self->{-watch_sync}->{-opt}->{-progress}; $pr->('reloading ...') if $pr; delete $self->{-resync_queue}; delete $self->{-ibx_ary_known}; delete $self->{-ibx_ary_active}; $self->{ibx_known} = []; $self->{ibx_active} = []; %{$self->{ibx_map}} = (); delete $self->{-watch_sync}->{id2pos}; my $cfg = PublicInbox::Config->new; attach_config($self, $cfg); $idler->refresh($cfg); $pr->(" done\n") if $pr; } else { warn "reload not supported without --all\n"; } } sub eidx_resync_start ($) { # -extindex --watch SIGUSR1 handler my ($self) = @_; $self->{-resync_queue} //= [ @{ibx_sorted($self, 'active')} ]; PublicInbox::DS::requeue($self); # trigger our ->event_step } sub event_step { # PublicInbox::DS::requeue callback my ($self) = @_; if (my $resync_queue = $self->{-resync_queue}) { if (my $ibx = shift(@$resync_queue)) { on_inbox_unlock($self, $ibx); PublicInbox::DS::requeue($self); } else { delete $self->{-resync_queue}; _watch_commit($self); } } else { done($self) unless $self->{-commit_timer}; } } sub eidx_watch { # public-inbox-extindex --watch main loop my ($self, $opt) = @_; local @SIG{keys %SIG} = values %SIG; for my $sig (qw(HUP USR1 TSTP QUIT INT TERM)) { $SIG{$sig} = sub { warn "SIG$sig ignored while scanning\n" }; } require PublicInbox::InboxIdle; require PublicInbox::DS; require PublicInbox::Syscall; require PublicInbox::Sigfd; my $idler = PublicInbox::InboxIdle->new($self->{cfg}); if (!$self->{cfg}) { $idler->watch_inbox($_) for (@{ibx_sorted($self, 'active')}); } for my $ibx (@{ibx_sorted($self, 'active')}) { $ibx->subscribe_unlock(__PACKAGE__, $self) } my $pr = $opt->{-progress}; $pr->("performing initial scan ...\n") if $pr; my $sync = eidx_sync($self, $opt); # initial sync return if $sync->{quit}; my $oldset = PublicInbox::DS::block_signals(); local $self->{current_info} = ''; my $cb = $SIG{__WARN__} || \&CORE::warn; local $SIG{__WARN__} = sub { return if PublicInbox::Eml::warn_ignore(@_); $cb->($self->{current_info}, ': ', @_); }; my $sig = { HUP => sub { eidx_reload($self, $idler) }, USR1 => sub { eidx_resync_start($self) }, TSTP => sub { kill('STOP', $$) }, }; my $quit = PublicInbox::SearchIdx::quit_cb($sync); $sig->{QUIT} = $sig->{INT} = $sig->{TERM} = $quit; local $self->{-watch_sync} = $sync; # for ->on_inbox_unlock PublicInbox::DS->SetPostLoopCallback(sub { !$sync->{quit} }); $pr->("initial scan complete, entering event loop\n") if $pr; # calls InboxIdle->event_step: PublicInbox::DS::event_loop($sig, $oldset); done($self); } no warnings 'once'; *done = \&PublicInbox::V2Writable::done; *with_umask = \&PublicInbox::InboxWritable::with_umask; *parallel_init = \&PublicInbox::V2Writable::parallel_init; *nproc_shards = \&PublicInbox::V2Writable::nproc_shards; *sync_prepare = \&PublicInbox::V2Writable::sync_prepare; *index_todo = \&PublicInbox::V2Writable::index_todo; *count_shards = \&PublicInbox::V2Writable::count_shards; *atfork_child = \&PublicInbox::V2Writable::atfork_child; *idx_shard = \&PublicInbox::V2Writable::idx_shard; *reindex_checkpoint = \&PublicInbox::V2Writable::reindex_checkpoint; *checkpoint = \&PublicInbox::V2Writable::checkpoint; 1; public-inbox-1.9.0/lib/PublicInbox/FakeImport.pm000066400000000000000000000013151430031475700215030ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # pretend to do PublicInbox::Import::add for "lei index" package PublicInbox::FakeImport; use strict; use v5.10.1; use PublicInbox::ContentHash qw(git_sha); use PublicInbox::Import; sub new { bless { bytes_added => 0 }, __PACKAGE__ } sub add { my ($self, $eml, $check_cb, $smsg) = @_; PublicInbox::Import::drop_unwanted_headers($eml); $smsg->populate($eml); my $raw = $eml->as_string; $smsg->{blob} = git_sha(1, \$raw)->hexdigest; $smsg->set_bytes($raw, length($raw)); if (my $oidx = delete $smsg->{-oidx}) { # used by LeiStore $oidx->vivify_xvmd($smsg) or return; } 1; } 1; public-inbox-1.9.0/lib/PublicInbox/FakeInotify.pm000066400000000000000000000076531430031475700216650ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # for systems lacking Linux::Inotify2 or IO::KQueue, just emulates # enough of Linux::Inotify2 package PublicInbox::FakeInotify; use strict; use v5.10.1; use parent qw(Exporter); use Time::HiRes qw(stat); use PublicInbox::DS qw(add_timer); sub IN_MODIFY () { 0x02 } # match Linux inotify # my $IN_MOVED_FROM 0x00000040 /* File was moved from X. */ # my $IN_MOVED_TO = 0x80; # my $IN_CREATE = 0x100; sub MOVED_TO_OR_CREATE () { 0x80 | 0x100 } sub IN_DELETE () { 0x200 } sub IN_DELETE_SELF () { 0x400 } sub IN_MOVE_SELF () { 0x800 } our @EXPORT_OK = qw(fill_dirlist on_dir_change); my $poll_intvl = 2; # same as Filesys::Notify::Simple sub new { bless { watch => {}, dirlist => {} }, __PACKAGE__ } sub fill_dirlist ($$$) { my ($self, $path, $dh) = @_; my $dirlist = $self->{dirlist}->{$path} = {}; while (defined(my $n = readdir($dh))) { $dirlist->{$n} = undef if $n !~ /\A\.\.?\z/; } } # behaves like Linux::Inotify2->watch sub watch { my ($self, $path, $mask) = @_; my @st = stat($path) or return; my $k = "$path\0$mask"; $self->{watch}->{$k} = $st[10]; # 10 - ctime if ($mask & IN_DELETE) { opendir(my $dh, $path) or return; fill_dirlist($self, $path, $dh); } bless [ $self->{watch}, $k ], 'PublicInbox::FakeInotify::Watch'; } # also used by KQNotify since it kevent requires readdir on st_nlink # count changes. sub on_dir_change ($$$$$) { my ($events, $dh, $path, $old_ctime, $dirlist) = @_; my $oldlist = $dirlist->{$path}; my $newlist = $oldlist ? {} : undef; while (defined(my $base = readdir($dh))) { next if $base =~ /\A\.\.?\z/; my $full = "$path/$base"; my @st = stat($full); if (@st && $st[10] > $old_ctime) { push @$events, bless(\$full, 'PublicInbox::FakeInotify::Event') } if (!@st) { # ignore ENOENT due to race warn "unhandled stat($full) error: $!\n" if !$!{ENOENT}; } elsif ($newlist) { $newlist->{$base} = undef; } } return if !$newlist; delete @$oldlist{keys %$newlist}; $dirlist->{$path} = $newlist; push(@$events, map { bless \"$path/$_", 'PublicInbox::FakeInotify::GoneEvent' } keys %$oldlist); } # behaves like non-blocking Linux::Inotify2->read sub read { my ($self) = @_; my $watch = $self->{watch} or return (); my $events = []; my @watch_gone; for my $x (keys %$watch) { my ($path, $mask) = split(/\0/, $x, 2); my @now = stat($path); if (!@now && $!{ENOENT} && ($mask & IN_DELETE_SELF)) { push @$events, bless(\$path, 'PublicInbox::FakeInotify::SelfGoneEvent'); push @watch_gone, $x; delete $self->{dirlist}->{$path}; } next if !@now; my $old_ctime = $watch->{$x}; $watch->{$x} = $now[10]; next if $old_ctime == $now[10]; if ($mask & IN_MODIFY) { push @$events, bless(\$path, 'PublicInbox::FakeInotify::Event') } elsif ($mask & (MOVED_TO_OR_CREATE | IN_DELETE)) { if (opendir(my $dh, $path)) { on_dir_change($events, $dh, $path, $old_ctime, $self->{dirlist}); } elsif ($!{ENOENT}) { push @watch_gone, $x; delete $self->{dirlist}->{$path}; } else { warn "W: opendir $path: $!\n"; } } } delete @$watch{@watch_gone}; @$events; } sub poll_once { my ($obj) = @_; $obj->event_step; # PublicInbox::InboxIdle::event_step add_timer($poll_intvl, \&poll_once, $obj); } package PublicInbox::FakeInotify::Watch; use strict; sub cancel { my ($self) = @_; delete $self->[0]->{$self->[1]}; } sub name { my ($self) = @_; (split(/\0/, $self->[1], 2))[0]; } package PublicInbox::FakeInotify::Event; use strict; sub fullname { ${$_[0]} } sub IN_DELETE { 0 } sub IN_MOVED_FROM { 0 } sub IN_DELETE_SELF { 0 } package PublicInbox::FakeInotify::GoneEvent; use strict; our @ISA = qw(PublicInbox::FakeInotify::Event); sub IN_DELETE { 1 } sub IN_MOVED_FROM { 0 } package PublicInbox::FakeInotify::SelfGoneEvent; use strict; our @ISA = qw(PublicInbox::FakeInotify::GoneEvent); sub IN_DELETE_SELF { 1 } 1; public-inbox-1.9.0/lib/PublicInbox/Feed.pm000066400000000000000000000070461430031475700203140ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # # Used for generating Atom feeds for web-accessible mailing list archives. package PublicInbox::Feed; use strict; use v5.10.1; use PublicInbox::View; use PublicInbox::WwwAtomStream; use PublicInbox::Smsg; # this loads w/o Search::Xapian sub generate_i { my ($ctx) = @_; shift @{$ctx->{msgs}}; } # main function sub generate { my ($ctx) = @_; my $msgs = $ctx->{msgs} = recent_msgs($ctx); return _no_thread() unless @$msgs; PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i); } sub generate_thread_atom { my ($ctx) = @_; my $msgs = $ctx->{msgs} = $ctx->{ibx}->over->get_thread($ctx->{mid}); return _no_thread() unless @$msgs; PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i); } sub generate_html_index { my ($ctx) = @_; # if the 'r' query parameter is given, it is a legacy permalink # which we must continue supporting: my $qp = $ctx->{qp}; my $ibx = $ctx->{ibx}; if ($qp && !$qp->{r} && $ibx->over) { return PublicInbox::View::index_topics($ctx); } my $env = $ctx->{env}; my $url = $ibx->base_url($env) . 'new.html'; my $qs = $env->{QUERY_STRING}; $url .= "?$qs" if $qs ne ''; [302, [ 'Location', $url, 'Content-Type', 'text/plain'], [ "Redirecting to $url\n" ] ]; } sub new_html_i { my ($ctx, $eml) = @_; $ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip}; $eml and return PublicInbox::View::eml_entry($ctx, $eml); my $smsg = shift @{$ctx->{msgs}} or $ctx->zmore(PublicInbox::View::pagination_footer( $ctx, './new.html')); $smsg; } sub new_html { my ($ctx) = @_; my $msgs = $ctx->{msgs} = recent_msgs($ctx); if (!@$msgs) { return [404, ['Content-Type', 'text/plain'], ["No messages, yet\n"] ]; } $ctx->{-html_tip} = '
';
	$ctx->{-upfx} = '';
	$ctx->{-hr} = 1;
	PublicInbox::WwwStream::aresponse($ctx, 200, \&new_html_i);
}

# private subs

sub _no_thread () {
	[404, ['Content-Type', 'text/plain'], ["No feed found for thread\n"]];
}

sub recent_msgs {
	my ($ctx) = @_;
	my $ibx = $ctx->{ibx};
	my $max = $ibx->{feedmax} // 25;
	return PublicInbox::View::paginate_recent($ctx, $max) if $ibx->over;

	# only for rare v1 inboxes which aren't indexed at all
	my $qp = $ctx->{qp};
	my $hex = '[a-f0-9]';
	my $addmsg = qr!^:000000 100644 \S+ (\S+) A\t${hex}{2}/${hex}{38}$!;
	my $delmsg = qr!^:100644 000000 (\S+) \S+ D\t(${hex}{2}/${hex}{38})$!;
	my $refhex = qr/(?:HEAD|${hex}{4,})(?:~[0-9]+)?/;

	# revision ranges may be specified
	my $range = 'HEAD';
	my $r = $qp->{r} if $qp;
	if ($r && ($r =~ /\A(?:$refhex\.\.)?$refhex\z/o)) {
		$range = $r;
	}

	# get recent messages
	# we could use git log -z, but, we already know ssoma will not
	# leave us with filenames with spaces in them..
	my $log = $ibx->git->popen(qw/log
				--no-notes --no-color --raw -r
				--no-abbrev --abbrev-commit/,
				"--format=%H", $range);
	my %deleted; # only an optimization at this point
	my $last;
	my $last_commit;
	local $/ = "\n";
	my @ret;
	while (defined(my $line = <$log>)) {
		if ($line =~ /$addmsg/o) {
			my $add = $1;
			next if $deleted{$add}; # optimization-only
			push(@ret, bless { blob => $add }, 'PublicInbox::Smsg');
			if (scalar(@ret) >= $max) {
				$last = 1;
				last;
			}
		} elsif ($line =~ /$delmsg/o) {
			$deleted{$1} = 1;
		}
	}

	if ($last) {
		local $/ = "\n";
		while (my $line = <$log>) {
			if ($line =~ /^(${hex}{7,})/) {
				$last_commit = $1;
				last;
			}
		}
	}

	$last_commit and
		$ctx->{next_page} = qq[] .
					'next (older)';
	\@ret;
}

1;
public-inbox-1.9.0/lib/PublicInbox/Fetch.pm000066400000000000000000000157141430031475700205030ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 
# Wrapper to "git fetch" remote public-inboxes
package PublicInbox::Fetch;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC);
use URI ();
use PublicInbox::Spawn qw(popen_rd run_die spawn);
use PublicInbox::Admin;
use PublicInbox::LEI;
use PublicInbox::LeiCurl;
use PublicInbox::LeiMirror;
use File::Temp ();
use PublicInbox::Config;
use IO::Compress::Gzip qw(gzip $GzipError);

sub new { bless {}, __PACKAGE__ }

sub fetch_args ($$) {
	my ($lei, $opt) = @_;
	my @cmd; # (git --git-dir=...) to be added by caller
	$opt->{$_} = $lei->{$_} for (0..2);
	# we support "-c $key=$val" for arbitrary git config options
	# e.g.: git -c http.proxy=socks5h://127.0.0.1:9050
	push(@cmd, '-c', $_) for @{$lei->{opt}->{c} // []};
	push @cmd, 'fetch';
	push @cmd, '-q' if $lei->{opt}->{quiet};
	push @cmd, '-v' if $lei->{opt}->{verbose};
	@cmd;
}

sub remote_url ($$) {
	my ($lei, $dir) = @_;
	my $rn = $lei->{opt}->{'try-remote'} // [ 'origin', '_grokmirror' ];
	for my $r (@$rn) {
		my $cmd = [ qw(git config), "remote.$r.url" ];
		my $fh = popen_rd($cmd, undef, { -C => $dir, 2 => $lei->{2} });
		my $url = <$fh>;
		close $fh or next;
		$url =~ s!/*\n!!s;
		return $url;
	}
	undef
}

sub do_manifest ($$$) {
	my ($lei, $dir, $ibx_uri) = @_;
	my $muri = URI->new("$ibx_uri/manifest.js.gz");
	my $ft = File::Temp->new(TEMPLATE => 'm-XXXX',
				UNLINK => 1, DIR => $dir, SUFFIX => '.tmp');
	my $fn = $ft->filename;
	my $mf = "$dir/manifest.js.gz";
	my $m0; # current manifest.js.gz contents
	if (open my $fh, '<', $mf) {
		$m0 = eval {
			PublicInbox::LeiMirror::decode_manifest($fh, $mf, $mf)
		};
		warn($@) if $@;
	}
	my ($bn) = ($fn =~ m!/([^/]+)\z!);
	my $curl_cmd = $lei->{curl}->for_uri($lei, $muri, qw(-R -o), $bn);
	my $opt = { -C => $dir };
	$opt->{$_} = $lei->{$_} for (0..2);
	my $cerr = PublicInbox::LeiMirror::run_reap($lei, $curl_cmd, $opt);
	if ($cerr) {
		return [ 404, $muri ] if ($cerr >> 8) == 22; # 404 Missing
		$lei->child_error($cerr, "@$curl_cmd failed");
		return;
	}
	my $m1 = eval {
		PublicInbox::LeiMirror::decode_manifest($ft, $fn, $muri);
	} or return [ 404, $muri ];
	my $mdiff = { %$m1 };

	# filter out unchanged entries.  We check modified, too, since
	# fingerprints are SHA-1, so there's a teeny chance they'll collide
	while (my ($k, $v0) = each %{$m0 // {}}) {
		my $cur = $m1->{$k} // next;
		my $f0 = $v0->{fingerprint} // next;
		my $f1 = $cur->{fingerprint} // next;
		my $t0 = $v0->{modified} // next;
		my $t1 = $cur->{modified} // next;
		delete($mdiff->{$k}) if $f0 eq $f1 && $t0 == $t1;
	}
	unless (keys %$mdiff) {
		$lei->child_error(127 << 8) if $lei->{opt}->{'exit-code'};
		return;
	}
	my (undef, $v1_path, @v2_epochs) =
		PublicInbox::LeiMirror::deduce_epochs($mdiff, $ibx_uri->path);
	[ 200, $muri, $v1_path, \@v2_epochs, $ft, $mf, $m1 ];
}

sub get_fingerprint2 {
	my ($git_dir) = @_;
	require Digest::SHA;
	my $rd = popen_rd([qw(git show-ref)], undef, { -C => $git_dir });
	Digest::SHA::sha256(do { local $/; <$rd> });
}

sub writable_dir ($) {
	my ($dir) = @_;
	return unless -d $dir && -w _;
	my @st = stat($dir);
	$st[2] & 0222; # any writable bits set? (in case of root)
}

sub do_fetch { # main entry point
	my ($cls, $lei, $cd) = @_;
	my $ibx_ver;
	$lei->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
	my $dir = PublicInbox::Admin::resolve_inboxdir($cd, \$ibx_ver);
	my ($ibx_uri, @git_dir, @epochs, $mg, @new_epoch, $skip);
	if ($ibx_ver == 1) {
		my $url = remote_url($lei, $dir) //
			die "E: $dir missing remote.*.url\n";
		$ibx_uri = URI->new($url);
	} else { # v2:
		require PublicInbox::MultiGit;
		$mg = PublicInbox::MultiGit->new($dir, 'all.git', 'git');
		@epochs = $mg->git_epochs;
		my ($git_url, $epoch);
		for my $nr (@epochs) { # try newest epoch, first
			my $edir = "$dir/git/$nr.git";
			if (!writable_dir($edir)) {
				$skip->{$nr} = 1;
				next;
			}
			next if defined $git_url;
			if (defined(my $url = remote_url($lei, $edir))) {
				$git_url = $url;
				$epoch = $nr;
			} else {
				warn "W: $edir missing remote.*.url\n";
				my $pid = spawn([qw(git config -l)], undef,
					{ 1 => $lei->{2}, 2 => $lei->{2} });
				waitpid($pid, 0);
				$lei->child_error($?) if $?;
			}
		}
		@epochs = grep { !$skip->{$_} } @epochs if $skip;
		$skip //= {}; # makes code below easier
		$git_url or die "Unable to determine git URL\n";
		my $inbox_url = $git_url;
		$inbox_url =~ s!/git/$epoch(?:\.git)?/?\z!! or
			$inbox_url =~ s!/$epoch(?:\.git)?/?\z!! or die <
EOM
		$ibx_uri = URI->new($inbox_url);
	}
	PublicInbox::LeiMirror::write_makefile($dir, $ibx_ver);
	$lei->qerr("# inbox URL: $ibx_uri/");
	my $res = do_manifest($lei, $dir, $ibx_uri) or return;
	my ($code, $muri, $v1_path, $v2_epochs, $ft, $mf, $m1) = @$res;
	if ($code == 404) {
		# any pre-manifest.js.gz instances running? Just fetch all
		# existing ones and unconditionally try cloning the next
		$v2_epochs = [ map { "$dir/git/$_.git" } @epochs ];
		if (@epochs) {
			my $n = $epochs[-1] + 1;
			push @$v2_epochs, "$dir/git/$n.git" if !$skip->{$n};
		}
	} else {
		$code == 200 or die "BUG unexpected code $code\n";
	}
	my $mculled;
	if ($ibx_ver == 2) {
		defined($v1_path) and warn <, WTF?
EOM
		@git_dir = map { "$dir/git/$_.git" } sort { $a <=> $b } map {
				my ($nr) = (m!/([0-9]+)\.git\z!g);
				$skip->{$nr} ? () : $nr;
			} @$v2_epochs;
		if ($m1 && scalar keys %$skip) {
			my $re = join('|', keys %$skip);
			my @del = grep(m!/git/$re\.git\z!, keys %$m1);
			delete @$m1{@del};
			$mculled = 1;
		}
	} else {
		$git_dir[0] = $dir;
	}
	# n.b. this expects all epochs are from the same host
	my $torsocks = $lei->{curl}->torsocks($lei, $muri);
	my $fp2 = $lei->{opt}->{'exit-code'} ? [] : undef;
	my $xit = 127;
	for my $d (@git_dir) {
		my $cmd;
		my $opt = {}; # for spawn
		if (-d $d) {
			$fp2->[0] = get_fingerprint2($d) if $fp2;
			$cmd = [ @$torsocks, 'git', "--git-dir=$d",
				fetch_args($lei, $opt) ];
		} else {
			my $e_uri = $ibx_uri->clone;
			my ($epath) = ($d =~ m!(/git/[0-9]+\.git)\z!);
			defined($epath) or
				die "BUG: $d is not an epoch to clone\n";
			$e_uri->path($ibx_uri->path.$epath);
			$cmd = [ @$torsocks,
				PublicInbox::LeiMirror::clone_cmd($lei, $opt),
				$$e_uri, $d];
			push @new_epoch, substr($epath, 5, -4) + 0;
			$xit = 0;
		}
		my $cerr = PublicInbox::LeiMirror::run_reap($lei, $cmd, $opt);
		# do not bail on clone failure if we didn't have a manifest
		if ($cerr && ($code == 200 || -d $d)) {
			$lei->child_error($cerr, "@$cmd failed");
			return;
		}
		if ($fp2 && $xit) {
			$fp2->[1] = get_fingerprint2($d);
			$xit = 0 if $fp2->[0] ne $fp2->[1];
		}
	}
	for my $i (@new_epoch) { $mg->epoch_cfg_set($i) }
	if ($ft) {
		if ($mculled) {
			my $json = PublicInbox::Config->json->encode($m1);
			my $fn = $ft->filename;
			gzip(\$json => $fn) or die "gzip: $GzipError";
		}
		PublicInbox::LeiMirror::ft_rename($ft, $mf, 0666);
	}
	$lei->child_error($xit << 8) if $fp2 && $xit;
}

1;
public-inbox-1.9.0/lib/PublicInbox/Filter/000077500000000000000000000000001430031475700203315ustar00rootroot00000000000000public-inbox-1.9.0/lib/PublicInbox/Filter/Base.pm000066400000000000000000000052711430031475700215460ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors 
# License: AGPL-3.0+ 
#
# base class for creating per-list or per-project filters
package PublicInbox::Filter::Base;
use strict;
use warnings;
use PublicInbox::MsgIter;
use parent qw(Exporter);
our @EXPORT_OK = qw(REJECT); # we may export IGNORE if/when needed

sub No ($) { "*** We only accept plain-text mail, No $_[0] ***" }

our %DEFAULTS = (
	reject_suffix => [ qw(exe bat cmd com pif scr vbs cpl zip swf swfl) ],
	reject_type => [ 'text/html:'.No('HTML'), 'text/xhtml:'.No('HTML'),
		'application/vnd.*:'.No('vendor-specific formats'),
		'image/*:'.No('images'), 'video/*:'.No('video'),
		'audio/*:'.No('audio') ],
);
our $INVALID_FN = qr/\0/;

sub REJECT () { 100 }
sub ACCEPT { scalar @_ > 1 ? $_[1] : 1 }
sub IGNORE () { 0 }

my %patmap = ('*' => '.*', '?' => '.', '[' => '[', ']' => ']');
sub glob2pat {
	my ($glob) = @_;
        $glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge;
        $glob;
}

sub new {
	my ($class, %opts) = @_;
	my $self = bless { err => '', %opts }, $class;
	foreach my $f (qw(reject_suffix reject_type)) {
		# allow undef:
		$self->{$f} = $DEFAULTS{$f} unless exists $self->{$f};
	}
	if (defined $self->{reject_suffix}) {
		my $tmp = $self->{reject_suffix};
		$tmp = join('|', map { glob2pat($_) } @$tmp);
		$self->{reject_suffix} = qr/\.($tmp)\s*\z/i;
	}
	my $rt = [];
	if (defined $self->{reject_type}) {
		my $tmp = $self->{reject_type};
		@$rt = map {
			my ($type, $msg) = split(':', $_, 2);
			$type = lc $type;
			$msg ||= "Unacceptable Content-Type: $type";
			my $re = glob2pat($type);
			[ qr/\b$re\b/i, $msg ];
		} @$tmp;
	}
	$self->{reject_type} = $rt;
	$self;
}

sub reject ($$) {
	my ($self, $reason) = @_;
	$self->{err} = $reason;
	REJECT;
}

sub err ($) { $_[0]->{err} }

# by default, scrub is a no-op, see PublicInbox::Filter::Vger::scrub
# for an example of the override.  The $for_remove arg is set to
# disable altid setting for spam removal.
sub scrub {
	my ($self, $mime, $for_remove) = @_;
	$self->ACCEPT($mime);
}

# for MDA
sub delivery {
	my ($self, $mime) = @_;

	my $rt = $self->{reject_type};
	my $reject_suffix = $self->{reject_suffix} || $INVALID_FN;
	my (%sfx, %type);

	msg_iter($mime, sub {
		my ($part, $depth, @idx) = @{$_[0]};

		my $ct = $part->content_type || 'text/plain';
		foreach my $p (@$rt) {
			if ($ct =~ $p->[0]) {
				$type{$p->[1]} = 1;
			}
		}

		my $fn = $part->filename;
		if (defined($fn) && $fn =~ $reject_suffix) {
			$sfx{$1} = 1;
		}
	});

	my @r;
	if (keys %type) {
		push @r, sort keys %type;
	}
	if (keys %sfx) {
		push @r, 'Rejected suffixes(s): '.join(', ', sort keys %sfx);
	}

	@r ? $self->reject(join("\n", @r)) : $self->scrub($mime);
}

1;
public-inbox-1.9.0/lib/PublicInbox/Filter/Gmane.pm000066400000000000000000000016421430031475700217210ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors 
# License: AGPL-3.0+ 

# Filter for importing some archives from gmane
package PublicInbox::Filter::Gmane;
use base qw(PublicInbox::Filter::Base);
use strict;
use warnings;

sub scrub {
	my ($self, $mime) = @_;
	my $hdr = $mime->header_obj;

	# gmane rewrites Received headers, which increases spamminess
	# Some older archives set Original-To
	foreach my $x (qw(Received To)) {
		my @h = $hdr->header_raw("Original-$x");
		if (@h) {
			$hdr->header_set($x, @h);
			$hdr->header_set("Original-$x");
		}
	}

	# Approved triggers for the SA HEADER_SPAM rule,
	# X-From is gmane specific
	foreach my $drop (qw(Approved X-From)) {
		$hdr->header_set($drop);
	}

	# appears to be an old gmane bug:
	$hdr->header_set('connect()');

	$self->ACCEPT($mime);
}

sub delivery {
	my ($self, $mime) = @_;
	$self->scrub($mime);
}

1;
public-inbox-1.9.0/lib/PublicInbox/Filter/Mirror.pm000066400000000000000000000005001430031475700221340ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors 
# License: AGPL-3.0+ 

# Dumb filter for blindly accepting everything
package PublicInbox::Filter::Mirror;
use base qw(PublicInbox::Filter::Base);
use strict;
use warnings;

sub delivery { $_[0]->ACCEPT };

1;
public-inbox-1.9.0/lib/PublicInbox/Filter/RubyLang.pm000066400000000000000000000037031430031475700224150ustar00rootroot00000000000000# Copyright (C) 2017-2021 all contributors 
# License: AGPL-3.0+ 

# Filter for lists.ruby-lang.org trailers
package PublicInbox::Filter::RubyLang;
use base qw(PublicInbox::Filter::Base);
use strict;
use warnings;
use PublicInbox::MID qw(mids);

my $l1 = qr/Unsubscribe:\s
	/x;
my $l2 = qr{};

sub new {
	my ($class, %opts) = @_;
	my $altid = delete $opts{-altid};
	my $self = $class->SUPER::new(%opts);
	my $ibx = $self->{ibx};
	# altid = serial:ruby-core:file=msgmap.sqlite3
	if (!$altid && $ibx && $ibx->{altid}) {
		$altid ||= $ibx->{altid}->[0];
	}
	if ($altid) {
		require PublicInbox::AltId;
		$self->{-altid} = PublicInbox::AltId->new($ibx, $altid, 1);
	}
	$self;
}

sub scrub_part ($) {
	my ($part) = @_;
	my $ct = $part->content_type;
	if (!$ct || $ct =~ m{\btext/plain\b}i) {
		my $s = eval { $part->body_str };
		if (defined $s && $s =~ s/\n?$l1\n$l2\n\z//os) {
			$part->body_str_set($s);
			return 1;
		}
	}
	0;
}

sub scrub {
	my ($self, $mime, $for_remove) = @_;
	# no msg_iter here, msg_iter is only for read-only access
	if (my @sub = $mime->subparts) {
		my $changed = 0;
		$changed |= scrub_part($_) for @sub;
		$mime->parts_set(\@sub) if $changed;
	} else {
		scrub_part($mime);
	}
	my $altid = $self->{-altid};
	if ($altid && !$for_remove) {
		my $hdr = $mime->header_obj;
		my $mids = mids($hdr);
		return $self->REJECT('Message-ID missing') unless (@$mids);
		my @v = $hdr->header_raw('X-Mail-Count');
		my $n;
		foreach (@v) {
			/\A\s*([0-9]+)\s*\z/ or next;
			$n = $1;
			last;
		}
		unless (defined $n) {
			return $self->REJECT('X-Mail-Count not numeric');
		}
		foreach my $mid (@$mids) {
			my $r = $altid->mm_alt->mid_set($n, $mid);
			next if $r == 0;
			last;
		}
	}
	$self->ACCEPT($mime);
}

sub delivery {
	my ($self, $mime) = @_;
	$self->scrub($mime);
}

1;
public-inbox-1.9.0/lib/PublicInbox/Filter/SubjectTag.pm000066400000000000000000000015061430031475700227240ustar00rootroot00000000000000# Copyright (C) 2017-2021 all contributors 
# License: AGPL-3.0+ 

# Filter for various [tags] in subjects
package PublicInbox::Filter::SubjectTag;
use strict;
use warnings;
use base qw(PublicInbox::Filter::Base);

sub new {
	my ($class, %opts) = @_;
	my $tag = delete $opts{-tag};
	die "tag not defined!\n" unless defined $tag && $tag ne '';
	my $self = $class->SUPER::new(%opts);
	$self->{tag_re} = qr/\A\s*(re:\s+|)\Q$tag\E\s*/i;
	$self;
}

sub scrub {
	my ($self, $mime) = @_;
	my $subj = $mime->header('Subject');
	if (defined $subj) {
		$subj =~ s/$self->{tag_re}/$1/; # $1 is "Re: "
		$mime->header_str_set('Subject', $subj);
	}
	$self->ACCEPT($mime);
}

# no suffix/article rejection for mirrors
sub delivery {
	my ($self, $mime) = @_;
	$self->scrub($mime);
}

1;
public-inbox-1.9.0/lib/PublicInbox/Filter/Vger.pm000066400000000000000000000021711430031475700215730ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors 
# License: AGPL-3.0+ 

# Filter for vger.kernel.org list trailer
package PublicInbox::Filter::Vger;
use base qw(PublicInbox::Filter::Base);
use strict;
use PublicInbox::Eml;

my $l0 = qr/-+/; # older messages only had one '-'
my $l1 =
 qr/To unsubscribe from this list: send the line "unsubscribe [\w-]+" in/;
my $l2 = qr/the body of a message to majordomo\@vger\.kernel\.org/;
my $l3 =
  qr!More majordomo info at +http://vger\.kernel\.org/majordomo-info\.html!;

# only LKML had this, and LKML nowadays has no list trailer since Jan 2016
my $l4 = qr!Please read the FAQ at +http://www\.tux\.org/lkml/!;

sub scrub {
	my ($self, $mime) = @_;
	my $s = $mime->as_string;

	# the vger appender seems to only work on the raw string,
	# so in multipart (e.g. GPG-signed) messages, the list trailer
	# becomes invisible to MIME-aware email clients.
	if ($s =~ s/$l0\n$l1\n$l2\n$l3\n(?:$l4\n)?\n*\z//os) {
		$mime = PublicInbox::Eml->new(\$s);
	}
	$self->ACCEPT($mime);
}

sub delivery {
	my ($self, $mime) = @_;
	$self->scrub($mime);
}

1;
public-inbox-1.9.0/lib/PublicInbox/Gcf2.pm000066400000000000000000000112631430031475700202260ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 

# backend for a git-cat-file-workalike based on libgit2,
# other libgit2 stuff may go here, too.
package PublicInbox::Gcf2;
use strict;
use v5.10.1;
use PublicInbox::Spawn qw(which popen_rd); # may set PERL_INLINE_DIRECTORY
use Fcntl qw(LOCK_EX SEEK_SET);
use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
use IO::Handle; # autoflush
BEGIN {
	my (%CFG, $c_src);
	# PublicInbox::Spawn will set PERL_INLINE_DIRECTORY
	# to ~/.cache/public-inbox/inline-c if it exists
	my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //
		die 'PERL_INLINE_DIRECTORY not defined';
	my $f = "$inline_dir/.public-inbox.lock";
	open my $fh, '+>', $f or die "open($f): $!";

	# CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
	flock($fh, LOCK_EX) or die "LOCK_EX($f): $!\n";

	my $pc = which($ENV{PKG_CONFIG} // 'pkg-config') //
		die "pkg-config missing for libgit2";
	my ($dir) = (__FILE__ =~ m!\A(.+?)/[^/]+\z!);
	my $ef = "$inline_dir/.public-inbox.pkg-config.err";
	open my $err, '+>', $ef or die "open($ef): $!";
	for my $x (qw(libgit2)) {
		my $rdr = { 2 => $err };
		my ($l, $pid) = popen_rd([$pc, '--libs', $x], undef, $rdr);
		$l = do { local $/; <$l> };
		waitpid($pid, 0);
		next if $?;
		(my $c, $pid) = popen_rd([$pc, '--cflags', $x], undef, $rdr);
		$c = do { local $/; <$c> };
		waitpid($pid, 0);
		next if $?;

		# note: we name C source files .h to prevent
		# ExtUtils::MakeMaker from automatically trying to
		# build them.
		my $f = "$dir/gcf2_$x.h";
		open(my $src, '<', $f) or die "E: open($f): $!";
		chomp($l, $c);
		local $/;
		defined($c_src = <$src>) or die "read $f: $!";
		$CFG{LIBS} = $l;
		$CFG{CCFLAGSEX} = $c;
		last;
	}
	unless ($c_src) {
		seek($err, 0, SEEK_SET);
		$err = do { local $/; <$err> };
		die "E: libgit2 not installed: $err\n";
	}
	open my $oldout, '>&', \*STDOUT or die "dup(1): $!";
	open my $olderr, '>&', \*STDERR or die "dup(2): $!";
	open STDOUT, '>&', $fh or die "1>$f: $!";
	open STDERR, '>&', $fh or die "2>$f: $!";
	STDERR->autoflush(1);
	STDOUT->autoflush(1);

	# we use Capitalized and ALLCAPS for compatibility with old Inline::C
	eval <<'EOM';
use Inline C => Config => %CFG, BOOT => q[git_libgit2_init();];
use Inline C => $c_src, BUILD_NOISY => 1;
EOM
	$err = $@;
	open(STDERR, '>&', $olderr) or warn "restore stderr: $!";
	open(STDOUT, '>&', $oldout) or warn "restore stdout: $!";
	if ($err) {
		seek($fh, 0, SEEK_SET);
		my @msg = <$fh>;
		die "Inline::C Gcf2 build failed:\n", $err, "\n", @msg;
	}
}

sub add_alt ($$) {
	my ($gcf2, $objdir) = @_;

	# libgit2 (tested 0.27.7+dfsg.1-0.2 and 0.28.3+dfsg.1-1~bpo10+1
	# in Debian) doesn't handle relative epochs properly when nested
	# multiple levels.  Add all the absolute paths to workaround it,
	# since $EXTINDEX_DIR/ALL.git/objects/info/alternates uses absolute
	# paths to reference $V2INBOX_DIR/all.git/objects and
	# $V2INBOX_DIR/all.git/objects/info/alternates uses relative paths
	# to refer to $V2INBOX_DIR/git/$EPOCH.git/objects
	#
	# See https://bugs.debian.org/975607
	if (open(my $fh, '<', "$objdir/info/alternates")) {
		chomp(my @abs_alt = grep(m!^/!, <$fh>));
		$gcf2->add_alternate($_) for @abs_alt;
	}
	$gcf2->add_alternate($objdir);
	1;
}

sub have_unlinked_files () {
	# FIXME: port gcf2-like over to git.git so we won't need to
	# deal with libgit2
	return 1 if $^O ne 'linux';
	open my $fh, '<', "/proc/$$/maps" or return;
	while (<$fh>) { return 1 if /\.(?:idx|pack) \(deleted\)$/ }
	undef;
}

# Usage: $^X -MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop [EXPIRE-TIMEOUT]
# (see lib/PublicInbox/Gcf2Client.pm)
sub loop (;$) {
	my $exp = $_[0] || $ARGV[0] || 60; # seconds
	my $gcf2 = new();
	my (%seen, $check_at);
	STDERR->autoflush(1);
	STDOUT->autoflush(1);

	while () {
		chomp;
		my ($oid, $git_dir) = split(/ /, $_, 2);
		$seen{$git_dir} //= add_alt($gcf2, "$git_dir/objects");
		if (!$gcf2->cat_oid(1, $oid)) {
			# retry once if missing.  We only get unabbreviated OIDs
			# from SQLite or Xapian DBs, here, so malicious clients
			# can't trigger excessive retries:
			warn "I: $$ $oid missing, retrying in $git_dir\n";

			$gcf2 = new();
			%seen = ($git_dir => add_alt($gcf2,"$git_dir/objects"));
			$check_at = clock_gettime(CLOCK_MONOTONIC) + $exp;

			if ($gcf2->cat_oid(1, $oid)) {
				warn "I: $$ $oid found after retry\n";
			} else {
				warn "W: $$ $oid missing after retry\n";
				print "$oid missing\n"; # mimic git-cat-file
			}
		} else { # check expiry to deal with deleted pack files
			my $now = clock_gettime(CLOCK_MONOTONIC);
			$check_at //= $now + $exp;
			if ($now > $check_at) {
				undef $check_at;
				if (have_unlinked_files()) {
					$gcf2 = new();
					%seen = ();
				}
			}
		}
	}
}

1;
public-inbox-1.9.0/lib/PublicInbox/Gcf2Client.pm000066400000000000000000000047301430031475700213660ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 

# connects public-inbox processes to PublicInbox::Gcf2::loop()
package PublicInbox::Gcf2Client;
use strict;
use parent qw(PublicInbox::DS);
use PublicInbox::Git;
use PublicInbox::Gcf2; # fails if Inline::C or libgit2-dev isn't available
use PublicInbox::Spawn qw(spawn);
use Socket qw(AF_UNIX SOCK_STREAM);
use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
# fields:
#	sock => socket to Gcf2::loop
# The rest of these fields are compatible with what PublicInbox::Git
# uses code-sharing
#	pid => PID of Gcf2::loop process
#	pid.owner => process which spawned {pid}
#	in => same as {sock}, for compatibility with PublicInbox::Git
#	inflight => array (see PublicInbox::Git)
#	rbuf => scalarref, may be non-existent or empty
sub new  {
	my ($rdr) = @_;
	my $self = bless {}, __PACKAGE__;
	# ensure the child process has the same @INC we do:
	my $env = { PERL5LIB => join(':', @INC) };
	my ($s1, $s2);
	socketpair($s1, $s2, AF_UNIX, SOCK_STREAM, 0) or die "socketpair $!";
	$rdr //= {};
	$rdr->{0} = $rdr->{1} = $s2;
	my $cmd = [$^X, qw[-MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop]];
	$self->{'pid.owner'} = $$;
	$self->{pid} = spawn($cmd, $env, $rdr);
	$s1->blocking(0);
	$self->{inflight} = [];
	$self->{in} = $s1;
	$self->SUPER::new($s1, EPOLLIN|EPOLLET);
}

sub fail {
	my $self = shift;
	$self->close; # PublicInbox::DS::close
	PublicInbox::Git::fail($self, @_);
}

sub gcf2_async ($$$;$) {
	my ($self, $req, $cb, $arg) = @_;
	my $inflight = $self->{inflight} or return $self->close;

	# {wbuf} is rare, I hope:
	cat_async_step($self, $inflight) if $self->{wbuf};

	$self->fail("gcf2c write: $!") if !$self->write($req) && !$self->{sock};
	push @$inflight, $req, $cb, $arg;
}

# ensure PublicInbox::Git::cat_async_step never calls cat_async_retry
sub alternates_changed {}

# DS::event_loop will call this
sub event_step {
	my ($self) = @_;
	$self->flush_write;
	$self->close if !$self->{in} || !$self->{sock}; # process died
	my $inflight = $self->{inflight};
	if ($inflight && @$inflight) {
		cat_async_step($self, $inflight);
		return $self->close unless $self->{in}; # process died

		# ok, more to do, requeue for fairness
		$self->requeue if @$inflight || exists($self->{rbuf});
	}
}

sub DESTROY {
	my ($self) = @_;
	delete $self->{sock}; # if outside event_loop
	PublicInbox::Git::DESTROY($self);
}

no warnings 'once';

*cat_async_step = \&PublicInbox::Git::cat_async_step;

1;
public-inbox-1.9.0/lib/PublicInbox/GetlineBody.pm000066400000000000000000000024101430031475700216440ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors 
# License: AGPL-3.0+ 

# Wrap a pipe or file for PSGI streaming response bodies and calls the
# end callback when the object goes out-of-scope.
# This depends on rpipe being _blocking_ on getline.
#
# This is only used by generic PSGI servers and not public-inbox-httpd
package PublicInbox::GetlineBody;
use strict;
use warnings;

sub new {
	my ($class, $rpipe, $end, $end_arg, $buf, $filter) = @_;
	bless {
		rpipe => $rpipe,
		end => $end,
		end_arg => $end_arg,
		initial_buf => $buf,
		filter => $filter,
	}, $class;
}

# close should always be called after getline returns undef,
# but a client aborting a connection can ruin our day; so lets
# hope our underlying PSGI server does not leak references, here.
sub DESTROY { $_[0]->close }

sub getline {
	my ($self) = @_;
	my $rpipe = $self->{rpipe} or return; # EOF was set on previous call
	my $buf = delete($self->{initial_buf}) // $rpipe->getline;
	delete($self->{rpipe}) unless defined $buf; # set EOF for next call
	if (my $filter = $self->{filter}) {
		$buf = $filter->translate($buf);
	}
	$buf;
}

sub close {
	my ($self) = @_;
	my ($end, $end_arg) = delete @$self{qw(end end_arg)};
	$end->($end_arg) if $end;
}

1;
public-inbox-1.9.0/lib/PublicInbox/Git.pm000066400000000000000000000416041430031475700201720ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: GPLv2 or later 
#
# Used to read files from a git repository without excessive forking.
# Used in our web interfaces as well as our -nntpd server.
# This is based on code in Git.pm which is GPLv2+, but modified to avoid
# dependence on environment variables for compatibility with mod_perl.
# There are also API changes to simplify our usage and data set.
package PublicInbox::Git;
use strict;
use v5.10.1;
use parent qw(Exporter);
use POSIX ();
use IO::Handle; # ->autoflush
use Errno qw(EINTR EAGAIN ENOENT);
use File::Glob qw(bsd_glob GLOB_NOSORT);
use File::Spec ();
use Time::HiRes qw(stat);
use PublicInbox::Spawn qw(popen_rd spawn);
use PublicInbox::Tmpfile;
use IO::Poll qw(POLLIN);
use Carp qw(croak carp);
use Digest::SHA ();
use PublicInbox::DS qw(dwaitpid);
our @EXPORT_OK = qw(git_unquote git_quote);
our $PIPE_BUFSIZ = 65536; # Linux default
our $in_cleanup;
our $RDTIMEO = 60_000; # milliseconds
our $async_warn; # true in read-only daemons

use constant MAX_INFLIGHT => (POSIX::PIPE_BUF * 3) /
	65; # SHA-256 hex size + "\n" in preparation for git using non-SHA1

my %GIT_ESC = (
	a => "\a",
	b => "\b",
	f => "\f",
	n => "\n",
	r => "\r",
	t => "\t",
	v => "\013",
	'"' => '"',
	'\\' => '\\',
);
my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC;


# unquote pathnames used by git, see quote.c::unquote_c_style.c in git.git
sub git_unquote ($) {
	return $_[0] unless ($_[0] =~ /\A"(.*)"\z/);
	$_[0] = $1;
	$_[0] =~ s!\\([\\"abfnrtv]|[0-3][0-7]{2})!$GIT_ESC{$1}//chr(oct($1))!ge;
	$_[0];
}

sub git_quote ($) {
	if ($_[0] =~ s/([\\"\a\b\f\n\r\t\013]|[^[:print:]])/
		      '\\'.($ESC_GIT{$1}||sprintf("%03o",ord($1)))/egs) {
		return qq{"$_[0]"};
	}
	$_[0];
}

sub new {
	my ($class, $git_dir) = @_;
	$git_dir =~ tr!/!/!s;
	$git_dir =~ s!/*\z!!s;
	# may contain {-tmp} field for File::Temp::Dir
	bless { git_dir => $git_dir, alt_st => '', -git_path => {} }, $class
}

sub git_path ($$) {
	my ($self, $path) = @_;
	$self->{-git_path}->{$path} //= do {
		local $/ = "\n";
		chomp(my $str = $self->qx(qw(rev-parse --git-path), $path));

		# git prior to 2.5.0 did not understand --git-path
		if ($str eq "--git-path\n$path") {
			$str = "$self->{git_dir}/$path";
		}
		$str;
	};
}

sub alternates_changed {
	my ($self) = @_;
	my $alt = git_path($self, 'objects/info/alternates');
	my @st = stat($alt) or return 0;

	# can't rely on 'q' on some 32-bit builds, but `d' works
	my $st = pack('dd', $st[10], $st[7]); # 10: ctime, 7: size
	return 0 if $self->{alt_st} eq $st;
	$self->{alt_st} = $st; # always a true value
}

sub object_format {
	$_[0]->{object_format} //= do {
		my $fmt = $_[0]->qx(qw(config extensions.objectformat));
		$fmt eq "sha256\n" ? \'sha256' : \undef;
	}
}

sub last_check_err {
	my ($self) = @_;
	my $fh = $self->{err_c} or return;
	sysseek($fh, 0, 0) or $self->fail("sysseek failed: $!");
	defined(sysread($fh, my $buf, -s $fh)) or
			$self->fail("sysread failed: $!");
	$buf;
}

sub _bidi_pipe {
	my ($self, $batch, $in, $out, $pid, $err) = @_;
	if ($self->{$pid}) {
		if (defined $err) { # "err_c"
			my $fh = $self->{$err};
			sysseek($fh, 0, 0) or $self->fail("sysseek failed: $!");
			truncate($fh, 0) or $self->fail("truncate failed: $!");
		}
		return;
	}
	pipe(my ($out_r, $out_w)) or $self->fail("pipe failed: $!");
	my $rdr = { 0 => $out_r, pgid => 0 };
	my $gd = $self->{git_dir};
	if ($gd =~ s!/([^/]+/[^/]+)\z!/!) {
		$rdr->{-C} = $gd;
		$gd = $1;
	}
	my @cmd = (qw(git), "--git-dir=$gd",
			qw(-c core.abbrev=40 cat-file), $batch);
	if ($err) {
		my $id = "git.$self->{git_dir}$batch.err";
		my $fh = tmpfile($id) or $self->fail("tmpfile($id): $!");
		$self->{$err} = $fh;
		$rdr->{2} = $fh;
	}
	my ($in_r, $p) = popen_rd(\@cmd, undef, $rdr);
	$self->{$pid} = $p;
	$self->{"$pid.owner"} = $$;
	$out_w->autoflush(1);
	if ($^O eq 'linux') { # 1031: F_SETPIPE_SZ
		fcntl($out_w, 1031, 4096);
		fcntl($in_r, 1031, 4096) if $batch eq '--batch-check';
	}
	$self->{$out} = $out_w;
	$self->{$in} = $in_r;
}

sub poll_in ($) { IO::Poll::_poll($RDTIMEO, fileno($_[0]), my $ev = POLLIN) }

sub my_read ($$$) {
	my ($fh, $rbuf, $len) = @_;
	my $left = $len - length($$rbuf);
	my $r;
	while ($left > 0) {
		$r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf));
		if ($r) {
			$left -= $r;
		} elsif (defined($r)) { # EOF
			return 0;
		} else {
			next if ($! == EAGAIN and poll_in($fh));
			next if $! == EINTR; # may be set by sysread or poll_in
			return; # unrecoverable error
		}
	}
	my $no_pad = substr($$rbuf, 0, $len, '');
	\$no_pad;
}

sub my_readline ($$) {
	my ($fh, $rbuf) = @_;
	while (1) {
		if ((my $n = index($$rbuf, "\n")) >= 0) {
			return substr($$rbuf, 0, $n + 1, '');
		}
		my $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf))
								and next;

		# return whatever's left on EOF
		return substr($$rbuf, 0, length($$rbuf)+1, '') if defined($r);

		next if ($! == EAGAIN and poll_in($fh));
		next if $! == EINTR; # may be set by sysread or poll_in
		return; # unrecoverable error
	}
}

sub cat_async_retry ($$) {
	my ($self, $inflight) = @_;

	# {inflight} may be non-existent, but if it isn't we delete it
	# here to prevent cleanup() from waiting:
	delete $self->{inflight};
	cleanup($self);

	$self->{inflight} = $inflight;
	batch_prepare($self);
	my $buf = '';
	for (my $i = 0; $i < @$inflight; $i += 3) {
		$buf .= "$inflight->[$i]\n";
	}
	print { $self->{out} } $buf or $self->fail("write error: $!");
	my $req = shift @$inflight;
	unshift(@$inflight, \$req); # \$ref to indicate retried

	cat_async_step($self, $inflight); # take one step
}

sub cat_async_step ($$) {
	my ($self, $inflight) = @_;
	die 'BUG: inflight empty or odd' if scalar(@$inflight) < 3;
	my ($req, $cb, $arg) = @$inflight[0, 1, 2];
	my $rbuf = delete($self->{rbuf}) // \(my $new = '');
	my ($bref, $oid, $type, $size);
	my $head = my_readline($self->{in}, $rbuf);
	# ->fail may be called via Gcf2Client.pm
	if ($head =~ /^([0-9a-f]{40,}) (\S+) ([0-9]+)$/) {
		($oid, $type, $size) = ($1, $2, $3 + 0);
		$bref = my_read($self->{in}, $rbuf, $size + 1) or
			$self->fail(defined($bref) ? 'read EOF' : "read: $!");
		chop($$bref) eq "\n" or $self->fail('LF missing after blob');
	} elsif ($head =~ s/ missing\n//s) {
		$oid = $head;
		# ref($req) indicates it's already been retried
		# -gcf2 retries internally, so it never hits this path:
		if (!ref($req) && !$in_cleanup && $self->alternates_changed) {
			return cat_async_retry($self, $inflight);
		}
		$type = 'missing';
		$oid = ref($req) ? $$req : $req if $oid eq '';
	} else {
		my $err = $! ? " ($!)" : '';
		$self->fail("bad result from async cat-file: $head$err");
	}
	$self->{rbuf} = $rbuf if $$rbuf ne '';
	splice(@$inflight, 0, 3); # don't retry $cb on ->fail
	eval { $cb->($bref, $oid, $type, $size, $arg) };
	async_err($self, $req, $oid, $@, 'cat') if $@;
}

sub cat_async_wait ($) {
	my ($self) = @_;
	my $inflight = $self->{inflight} or return;
	while (scalar(@$inflight)) {
		cat_async_step($self, $inflight);
	}
}

sub batch_prepare ($) {
	_bidi_pipe($_[0], qw(--batch in out pid));
}

sub _cat_file_cb {
	my ($bref, $oid, $type, $size, $result) = @_;
	@$result = ($bref, $oid, $type, $size);
}

sub cat_file {
	my ($self, $oid) = @_;
	my $result = [];
	cat_async($self, $oid, \&_cat_file_cb, $result);
	cat_async_wait($self);
	wantarray ? @$result : $result->[0];
}

sub check_async_step ($$) {
	my ($self, $inflight_c) = @_;
	die 'BUG: inflight empty or odd' if scalar(@$inflight_c) < 3;
	my ($req, $cb, $arg) = @$inflight_c[0, 1, 2];
	my $rbuf = delete($self->{rbuf_c}) // \(my $new = '');
	chomp(my $line = my_readline($self->{in_c}, $rbuf));
	my ($hex, $type, $size) = split(/ /, $line);

	# Future versions of git.git may have type=ambiguous, but for now,
	# we must handle 'dangling' below (and maybe some other oddball
	# stuff):
	# https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
	if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') {
		my $ret = my_read($self->{in_c}, $rbuf, $type + 1);
		$self->fail(defined($ret) ? 'read EOF' : "read: $!") if !$ret;
	}
	$self->{rbuf_c} = $rbuf if $$rbuf ne '';
	splice(@$inflight_c, 0, 3); # don't retry $cb on ->fail
	eval { $cb->($hex, $type, $size, $arg, $self) };
	async_err($self, $req, $hex, $@, 'check') if $@;
}

sub check_async_wait ($) {
	my ($self) = @_;
	my $inflight_c = $self->{inflight_c} or return;
	while (scalar(@$inflight_c)) {
		check_async_step($self, $inflight_c);
	}
}

sub check_async_begin ($) {
	my ($self) = @_;
	cleanup($self) if alternates_changed($self);
	_bidi_pipe($self, qw(--batch-check in_c out_c pid_c err_c));
	die 'BUG: already in async check' if $self->{inflight_c};
	$self->{inflight_c} = [];
}

sub check_async ($$$$) {
	my ($self, $oid, $cb, $arg) = @_;
	my $inflight_c = $self->{inflight_c} // check_async_begin($self);
	while (scalar(@$inflight_c) >= MAX_INFLIGHT) {
		check_async_step($self, $inflight_c);
	}
	print { $self->{out_c} } $oid, "\n" or $self->fail("write error: $!");
	push(@$inflight_c, $oid, $cb, $arg);
}

sub _check_cb { # check_async callback
	my ($hex, $type, $size, $result) = @_;
	@$result = ($hex, $type, $size);
}

sub check {
	my ($self, $oid) = @_;
	my $result = [];
	check_async($self, $oid, \&_check_cb, $result);
	check_async_wait($self);
	my ($hex, $type, $size) = @$result;

	# Future versions of git.git may show 'ambiguous', but for now,
	# we must handle 'dangling' below (and maybe some other oddball
	# stuff):
	# https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
	return if $type eq 'missing' || $type eq 'ambiguous';
	return if $hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop';
	($hex, $type, $size);
}

sub _destroy {
	my ($self, $rbuf, $in, $out, $pid, $err) = @_;
	delete @$self{($rbuf, $in, $out)};
	delete $self->{$err} if $err; # `err_c'

	# GitAsyncCat::event_step may delete {pid}
	my $p = delete $self->{$pid} or return;
	dwaitpid($p) if $$ == $self->{"$pid.owner"};
}

sub async_abort ($) {
	my ($self) = @_;
	while (scalar(@{$self->{inflight_c} // []}) ||
			scalar(@{$self->{inflight} // []})) {
		for my $c ('', '_c') {
			my $q = $self->{"inflight$c"} or next;
			while (@$q) {
				my ($req, $cb, $arg) = splice(@$q, 0, 3);
				$req = $$req if ref($req);
				$req =~ s/ .*//; # drop git_dir for Gcf2Client
				eval { $cb->(undef, $req, undef, undef, $arg) };
				warn "E: (in abort) $req: $@" if $@;
			}
			delete $self->{"inflight$c"};
			delete $self->{"rbuf$c"};
		}
	}
	cleanup($self);
}

sub fail { # may be augmented in subclasses
	my ($self, $msg) = @_;
	async_abort($self);
	croak(ref($self) . ' ' . ($self->{git_dir} // '') . ": $msg");
}

sub async_err ($$$$$) {
	my ($self, $req, $oid, $err, $action) = @_;
	$req = $$req if ref($req); # retried
	my $msg = "E: $action $req ($oid): $err";
	$async_warn ? carp($msg) : $self->fail($msg);
}

# $git->popen(qw(show f00)); # or
# $git->popen(qw(show f00), { GIT_CONFIG => ... }, { 2 => ... });
sub popen {
	my ($self, $cmd) = splice(@_, 0, 2);
	$cmd = [ 'git', "--git-dir=$self->{git_dir}",
		ref($cmd) ? @$cmd : ($cmd, grep { defined && !ref } @_) ];
	popen_rd($cmd, grep { !defined || ref } @_); # env and opt
}

# same args as popen above
sub qx {
	my $fh = popen(@_);
	if (wantarray) {
		my @ret = <$fh>;
		close $fh; # caller should check $?
		@ret;
	} else {
		local $/;
		my $ret = <$fh>;
		close $fh; # caller should check $?
		$ret;
	}
}

sub date_parse {
	my $self = shift;
	map {
		substr($_, length('--max-age='), -1)
	} $self->qx('rev-parse', map { "--since=$_" } @_);
}

# check_async and cat_async may trigger the other, so ensure they're
# both completely done by using this:
sub async_wait_all ($) {
	my ($self) = @_;
	while (scalar(@{$self->{inflight_c} // []}) ||
			scalar(@{$self->{inflight} // []})) {
		check_async_wait($self);
		cat_async_wait($self);
	}
}

# returns true if there are pending "git cat-file" processes
sub cleanup {
	my ($self, $lazy) = @_;
	return 1 if $lazy && (scalar(@{$self->{inflight_c} // []}) ||
				scalar(@{$self->{inflight} // []}));
	local $in_cleanup = 1;
	delete $self->{async_cat};
	async_wait_all($self);
	delete $self->{inflight};
	delete $self->{inflight_c};
	_destroy($self, qw(rbuf in out pid));
	_destroy($self, qw(rbuf_c in_c out_c pid_c err_c));
	undef;
}

# assuming a well-maintained repo, this should be a somewhat
# accurate estimation of its size
# TODO: show this in the WWW UI as a hint to potential cloners
sub packed_bytes {
	my ($self) = @_;
	my $n = 0;
	my $pack_dir = git_path($self, 'objects/pack');
	foreach my $p (bsd_glob("$pack_dir/*.pack", GLOB_NOSORT)) {
		$n += -s $p;
	}
	$n
}

sub DESTROY { cleanup(@_) }

sub local_nick ($) {
	# don't show full FS path, basename should be OK:
	$_[0]->{git_dir} =~ m!/([^/]+?)(?:/*\.git/*)?\z! ? "$1.git" : '???';
}

sub host_prefix_url ($$) {
	my ($env, $url) = @_;
	return $url if index($url, '//') >= 0;
	my $scheme = $env->{'psgi.url_scheme'};
	my $host_port = $env->{HTTP_HOST} //
		"$env->{SERVER_NAME}:$env->{SERVER_PORT}";
	"$scheme://$host_port". ($env->{SCRIPT_NAME} || '/') . $url;
}

sub pub_urls {
	my ($self, $env) = @_;
	if (my $urls = $self->{cgit_url}) {
		return map { host_prefix_url($env, $_) } @$urls;
	}
	(local_nick($self));
}

sub cat_async_begin {
	my ($self) = @_;
	cleanup($self) if $self->alternates_changed;
	$self->batch_prepare;
	die 'BUG: already in async' if $self->{inflight};
	$self->{inflight} = [];
}

sub cat_async ($$$;$) {
	my ($self, $oid, $cb, $arg) = @_;
	my $inflight = $self->{inflight} // cat_async_begin($self);
	while (scalar(@$inflight) >= MAX_INFLIGHT) {
		cat_async_step($self, $inflight);
	}
	print { $self->{out} } $oid, "\n" or $self->fail("write error: $!");
	push(@$inflight, $oid, $cb, $arg);
}

# returns the modified time of a git repo, same as the "modified" field
# of a grokmirror manifest
sub modified ($) {
	# committerdate:unix is git 2.9.4+ (2017-05-05), so using raw instead
	my $fh = popen($_[0], qw[for-each-ref --sort=-committerdate
				--format=%(committerdate:raw) --count=1]);
	(split(/ /, <$fh> // time))[0] + 0; # integerize for JSON
}

# for grokmirror, which doesn't read gitweb.description
# templates/hooks--update.sample and git-multimail in git.git
# only match "Unnamed repository", not the full contents of
# templates/this--description in git.git
sub manifest_entry {
	my ($self, $epoch, $default_desc) = @_;
	my $fh = $self->popen('show-ref');
	my $dig = Digest::SHA->new(1);
	while (read($fh, my $buf, 65536)) {
		$dig->add($buf);
	}
	close $fh or return; # empty, uninitialized git repo
	undef $fh; # for open, below
	my $git_dir = $self->{git_dir};
	my $ent = {
		fingerprint => $dig->hexdigest,
		reference => undef,
		modified => modified($self),
	};
	chomp(my $owner = $self->qx('config', 'gitweb.owner'));
	utf8::decode($owner);
	$ent->{owner} = $owner eq '' ? undef : $owner;
	my $desc = '';
	if (open($fh, '<', "$git_dir/description")) {
		local $/ = "\n";
		chomp($desc = <$fh>);
		utf8::decode($desc);
	}
	$desc = 'Unnamed repository' if $desc eq '';
	if (defined $epoch && $desc =~ /\AUnnamed repository/) {
		$desc = "$default_desc [epoch $epoch]";
	}
	$ent->{description} = $desc;
	if (open($fh, '<', "$git_dir/objects/info/alternates")) {
		# n.b.: GitPython doesn't seem to handle comments or C-quoted
		# strings like native git does; and we don't for now, either.
		local $/ = "\n";
		chomp(my @alt = <$fh>);

		# grokmirror only supports 1 alternate for "reference",
		if (scalar(@alt) == 1) {
			my $objdir = "$git_dir/objects";
			my $ref = File::Spec->rel2abs($alt[0], $objdir);
			$ref =~ s!/[^/]+/?\z!!; # basename
			$ent->{reference} = $ref;
		}
	}
	$ent;
}

# returns true if there are pending cat-file processes
sub cleanup_if_unlinked {
	my ($self) = @_;
	return cleanup($self, 1) if $^O ne 'linux';
	# Linux-specific /proc/$PID/maps access
	# TODO: support this inside git.git
	my $ret = 0;
	for my $fld (qw(pid pid_c)) {
		my $pid = $self->{$fld} // next;
		open my $fh, '<', "/proc/$pid/maps" or return cleanup($self, 1);
		while (<$fh>) {
			# n.b. we do not restart for unlinked multi-pack-index
			# since it's not too huge, and the startup cost may
			# be higher.
			/\.(?:idx|pack) \(deleted\)$/ and
				return cleanup($self, 1);
		}
		++$ret;
	}
	$ret;
}

1;
__END__
=pod

=head1 NAME

PublicInbox::Git - git wrapper

=head1 VERSION

version 1.0

=head1 SYNOPSIS

	use PublicInbox::Git;
	chomp(my $git_dir = `git rev-parse --git-dir`);
	$git_dir or die "GIT_DIR= must be specified\n";
	my $git = PublicInbox::Git->new($git_dir);

=head1 DESCRIPTION

Unstable API outside of the L method.
It requires L to be installed.

=head1 METHODS

=cut

=head2 new

	my $git = PublicInbox::Git->new($git_dir);

Initialize a new PublicInbox::Git object for use with L
This is the only public API method we support.  Everything else
in this module is subject to change.

=head1 SEE ALSO

L, L

=head1 CONTACT

All feedback welcome via plain-text mail to L

The mail archives are hosted at L

=head1 COPYRIGHT

Copyright (C) 2016 all contributors L

License: AGPL-3.0+ L

=cut
public-inbox-1.9.0/lib/PublicInbox/GitAsyncCat.pm000066400000000000000000000054171430031475700216220ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 
#
# internal class used by PublicInbox::Git + PublicInbox::DS
# This parses the output pipe of "git cat-file --batch"
package PublicInbox::GitAsyncCat;
use strict;
use parent qw(PublicInbox::DS Exporter);
use POSIX qw(WNOHANG);
use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
our @EXPORT = qw(ibx_async_cat ibx_async_prefetch);
use PublicInbox::Git ();

our $GCF2C; # singleton PublicInbox::Gcf2Client

sub close {
	my ($self) = @_;
	if (my $git = delete $self->{git}) {
		$git->async_abort;
	}
	$self->SUPER::close; # PublicInbox::DS::close
}

sub event_step {
	my ($self) = @_;
	my $git = $self->{git} or return;
	return $self->close if ($git->{in} // 0) != ($self->{sock} // 1);
	my $inflight = $git->{inflight};
	if ($inflight && @$inflight) {
		$git->cat_async_step($inflight);

		# child death?
		if (($git->{in} // 0) != ($self->{sock} // 1)) {
			$self->close;
		} elsif (@$inflight || exists $git->{rbuf}) {
			# ok, more to do, requeue for fairness
			$self->requeue;
		}
	} elsif ((my $pid = waitpid($git->{pid}, WNOHANG)) > 0) {
		# May happen if the child process is killed by a BOFH
		# (or segfaults)
		delete $git->{pid};
		warn "E: git $pid exited with \$?=$?\n";
		$self->close;
	}
}

sub ibx_async_cat ($$$$) {
	my ($ibx, $oid, $cb, $arg) = @_;
	my $git = $ibx->git;
	# {topdir} means ExtSearch (likely [extindex "all"]) with potentially
	# 100K alternates.  git(1) has a proposed patch for 100K alternates:
	# 
	if (!defined($ibx->{topdir}) && ($GCF2C //= eval {
		require PublicInbox::Gcf2Client;
		PublicInbox::Gcf2Client::new();
	} // 0)) { # 0: do not retry if libgit2 or Inline::C are missing
		$GCF2C->gcf2_async(\"$oid $git->{git_dir}\n", $cb, $arg);
		\undef;
	} else { # read-only end of git-cat-file pipe
		$git->cat_async($oid, $cb, $arg);
		$git->{async_cat} //= do {
			my $self = bless { git => $git }, __PACKAGE__;
			$git->{in}->blocking(0);
			$self->SUPER::new($git->{in}, EPOLLIN|EPOLLET);
			\undef; # this is a true ref()
		};
	}
}

# this is safe to call inside $cb, but not guaranteed to enqueue
# returns true if successful, undef if not.  For fairness, we only
# prefetch if there's no in-flight requests.
sub ibx_async_prefetch {
	my ($ibx, $oid, $cb, $arg) = @_;
	my $git = $ibx->git;
	if (!defined($ibx->{topdir}) && $GCF2C) {
		if (!@{$GCF2C->{inflight} // []}) {
			$oid .= " $git->{git_dir}\n";
			return $GCF2C->gcf2_async(\$oid, $cb, $arg); # true
		}
	} elsif ($git->{async_cat} && (my $inflight = $git->{inflight})) {
		if (!@$inflight) {
			print { $git->{out} } $oid, "\n" or
						$git->fail("write error: $!");
			return push(@$inflight, $oid, $cb, $arg);
		}
	}
	undef;
}

1;
public-inbox-1.9.0/lib/PublicInbox/GitCredential.pm000066400000000000000000000036001430031475700221570ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 
package PublicInbox::GitCredential;
use strict;
use PublicInbox::Spawn qw(popen_rd);

sub run ($$;$) {
	my ($self, $op, $lei) = @_;
	my ($in_r, $in_w, $out_r);
	my $cmd = [ qw(git credential), $op ];
	pipe($in_r, $in_w) or die "pipe: $!";
	if ($lei) { # we'll die if disconnected:
		pipe($out_r, my $out_w) or die "pipe: $!";
		$lei->send_exec_cmd([ $in_r, $out_w ], $cmd, {});
	} else {
		$out_r = popen_rd($cmd, undef, { 0 => $in_r });
	}
	close $in_r or die "close in_r: $!";

	my $out = '';
	for my $k (qw(url protocol host username password)) {
		defined(my $v = $self->{$k}) or next;
		die "`$k' contains `\\n' or `\\0'\n" if $v =~ /[\n\0]/;
		$out .= "$k=$v\n";
	}
	$out .= "\n";
	print $in_w $out or die "print (git credential $op): $!";
	close $in_w or die "close (git credential $op): $!";
	return $out_r if $op eq 'fill';
	<$out_r> and die "unexpected output from `git credential $op'\n";
	close $out_r or die "`git credential $op' failed: \$!=$! \$?=$?\n";
}

sub check_netrc {
	my ($self, $lei) = @_;

	# n.b. lei doesn't load ~/.netrc by default, public-inbox-watch does,
	# which may've been a mistake, but we have to live with it.
	return if ($lei && !$lei->{opt}->{netrc});

	# part of the standard library, but distributions may split it out
	eval { require Net::Netrc };
	if ($@) {
		warn "W: Net::Netrc missing: $@\n";
		return;
	}
	if (my $x = Net::Netrc->lookup($self->{host}, $self->{username})) {
		$self->{username} //= $x->login;
		$self->{password} = $x->password;
	}
}

sub fill {
	my ($self, $lei) = @_;
	my $out_r = run($self, 'fill', $lei);
	while (<$out_r>) {
		chomp;
		return if $_ eq '';
		/\A([^=]+)=(.*)\z/ or die "bad line: $_\n";
		$self->{$1} = $2;
	}
	close $out_r or die "git credential fill failed: \$!=$! \$?=$?\n";
	$self->{filled} = 1;
}

1;
public-inbox-1.9.0/lib/PublicInbox/GitHTTPBackend.pm000066400000000000000000000104371430031475700221420ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors 
# License: AGPL-3.0+ 

# when no endpoints match, fallback to this and serve a static file
# or smart HTTP.  This is our wrapper for git-http-backend(1)
package PublicInbox::GitHTTPBackend;
use strict;
use v5.10.1;
use Fcntl qw(:seek);
use IO::Handle; # ->flush
use HTTP::Date qw(time2str);
use PublicInbox::Qspawn;
use PublicInbox::Tmpfile;
use PublicInbox::WwwStatic qw(r @NO_CACHE);
use Carp ();

# 32 is same as the git-daemon connection limit
my $default_limiter = PublicInbox::Qspawn::Limiter->new(32);

# n.b. serving "description" and "cloneurl" should be innocuous enough to
# not cause problems.  serving "config" might...
my @text = qw[HEAD info/refs info/attributes
	objects/info/(?:http-alternates|alternates|packs)
	cloneurl description];

my @binary = qw!
	objects/[a-f0-9]{2}/[a-f0-9]{38}
	objects/pack/pack-[a-f0-9]{40}\.(?:pack|idx)
	!;

our $ANY = join('|', @binary, @text, 'git-upload-pack');
my $BIN = join('|', @binary);
my $TEXT = join('|', @text);

sub serve {
	my ($env, $git, $path) = @_;

	# Documentation/technical/http-protocol.txt in git.git
	# requires one and exactly one query parameter:
	if ($env->{QUERY_STRING} =~ /\Aservice=git-[A-Za-z0-9_]+-pack\z/ ||
				$path =~ /\Agit-[A-Za-z0-9_]+-pack\z/) {
		my $ok = serve_smart($env, $git, $path);
		return $ok if $ok;
	}

	serve_dumb($env, $git, $path);
}

sub ucarp { Carp::carp(@_); undef }

my $prev = 0;
my $exp;
sub cache_one_year {
	my ($h) = @_;
	my $t = time + 31536000;
	push @$h, 'Expires', $t == $prev ? $exp : ($exp = time2str($prev = $t)),
		'Cache-Control', 'public, max-age=31536000';
}

sub serve_dumb {
	my ($env, $git, $path) = @_;

	my $h = [];
	my $type;
	if ($path =~ m!\Aobjects/[a-f0-9]{2}/[a-f0-9]{38}\z!) {
		$type = 'application/x-git-loose-object';
		cache_one_year($h);
	} elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40}\.pack\z!) {
		$type = 'application/x-git-packed-objects';
		cache_one_year($h);
	} elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40}\.idx\z!) {
		$type = 'application/x-git-packed-objects-toc';
		cache_one_year($h);
	} elsif ($path =~ /\A(?:$TEXT)\z/o) {
		$type = 'text/plain';
		push @$h, @NO_CACHE;
	} else {
		return r(404);
	}
	$path = "$git->{git_dir}/$path";
	PublicInbox::WwwStatic::response($env, $h, $path, $type);
}

sub git_parse_hdr { # {parse_hdr} for Qspawn
	my ($r, $bref, $dumb_args) = @_;
	my $res = parse_cgi_headers($r, $bref) or return; # incomplete
	$res->[0] == 403 ? serve_dumb(@$dumb_args) : $res;
}

# returns undef if 403 so it falls back to dumb HTTP
sub serve_smart {
	my ($env, $git, $path) = @_;
	my %env = %ENV;
	# GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL
	# may be set in the server-process and are passed as-is
	foreach my $name (qw(QUERY_STRING
				REMOTE_USER REMOTE_ADDR
				HTTP_CONTENT_ENCODING
				HTTP_GIT_PROTOCOL
				CONTENT_TYPE
				SERVER_PROTOCOL
				REQUEST_METHOD)) {
		my $val = $env->{$name};
		$env{$name} = $val if defined $val;
	}
	my $limiter = $git->{-httpbackend_limiter} || $default_limiter;
	$env{GIT_HTTP_EXPORT_ALL} = '1';
	$env{PATH_TRANSLATED} = "$git->{git_dir}/$path";
	my $rdr = input_prepare($env) or return r(500);
	my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr);
	$qsp->psgi_return($env, $limiter, \&git_parse_hdr, [$env, $git, $path]);
}

sub input_prepare {
	my ($env) = @_;

	my $input = $env->{'psgi.input'};
	my $fd = eval { fileno($input) };
	return { 0 => $fd } if (defined $fd && $fd >= 0);
	my $id = "git-http.input.$env->{REMOTE_ADDR}:$env->{REMOTE_PORT}";
	my $in = tmpfile($id) // return ucarp("tmpfile: $!");
	my $buf;
	while (1) {
		my $r = $input->read($buf, 8192) // return ucarp("read $!");
		last if $r == 0;
		print $in $buf // return ucarp("print: $!");
	}
	# ensure it's visible to git-http-backend(1):
	$in->flush // return ucarp("flush: $!");
	sysseek($in, 0, SEEK_SET) // return ucarp($env, "seek: $!");
	{ 0 => $in };
}

sub parse_cgi_headers {
	my ($r, $bref) = @_;
	return r(500) unless defined $r && $r >= 0;
	$$bref =~ s/\A(.*?)\r?\n\r?\n//s or return $r == 0 ? r(500) : undef;
	my $h = $1;
	my $code = 200;
	my @h;
	foreach my $l (split(/\r?\n/, $h)) {
		my ($k, $v) = split(/:\s*/, $l, 2);
		if ($k =~ /\AStatus\z/i) {
			($code) = ($v =~ /\b([0-9]+)\b/);
		} else {
			push @h, $k, $v;
		}
	}
	[ $code, \@h ]
}

1;
public-inbox-1.9.0/lib/PublicInbox/GzipFilter.pm000066400000000000000000000146711430031475700215320ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 
#
# In public-inbox <=1.5.0, public-inbox-httpd favored "getline"
# response bodies to take a "pull"-based approach to feeding
# slow clients (as opposed to a more common "push" model).
#
# In newer versions, public-inbox-httpd supports a backpressure-aware
# pull/push model which also accounts for slow git blob storage.
# async_next callbacks only run when the DS {wbuf} is drained
# async_eml callbacks only run when a blob arrives from git.
#
# We continue to support getline+close for generic PSGI servers.
package PublicInbox::GzipFilter;
use strict;
use parent qw(Exporter);
use Compress::Raw::Zlib qw(Z_OK);
use PublicInbox::CompressNoop;
use PublicInbox::Eml;
use PublicInbox::GitAsyncCat;

our @EXPORT_OK = qw(gzf_maybe);
my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip);

sub new { bless {}, shift } # qspawn filter

# for Qspawn if using $env->{'pi-httpd.async'}
sub attach {
	my ($self, $http_out) = @_;
	$self->{http_out} = $http_out; # PublicInbox::HTTP::{Chunked,Identity}
	$self
}

sub gz_or_noop {
	my ($res_hdr, $env) = @_;
	if (($env->{HTTP_ACCEPT_ENCODING} // '') =~ /\bgzip\b/) {
		$env->{'plack.skip-deflater'} = 1;
		push @$res_hdr, @GZIP_HDRS;
		gzip_or_die();
	} else {
		PublicInbox::CompressNoop::new();
	}
}

sub gzf_maybe ($$) { bless { gz => gz_or_noop(@_) }, __PACKAGE__ }

sub psgi_response {
	# $code may be an HTTP response code (e.g. 200) or a CODE ref (mbox_hdr)
	my ($self, $code, $res_hdr) = @_;
	if ($self->{env}->{'pi-httpd.async'}) {
		my $http = $self->{env}->{'psgix.io'}; # PublicInbox::HTTP
		$http->{forward} = $self;
		sub {
			my ($wcb) = @_; # -httpd provided write callback
			$self->{wcb_args} = [ $code, $res_hdr, $wcb ];
			$self->can('async_next')->($http); # start stepping
		};
	} else { # generic PSGI code path
		ref($code) eq 'CODE' and
			($code, $res_hdr) = @{$code->($self)};
		$self->{gz} //= gz_or_noop($res_hdr, $self->{env});
		[ $code, $res_hdr, $self ];
	}
}

sub qsp_maybe ($$) {
	my ($res_hdr, $env) = @_;
	return if ($env->{HTTP_ACCEPT_ENCODING} // '') !~ /\bgzip\b/;
	my $hdr = join("\n", @$res_hdr);
	return if $hdr !~ m!^Content-Type\n
				(?:(?:text/(?:html|plain))|
				application/atom\+xml)\b!ixsm;
	return if $hdr =~ m!^Content-Encoding\ngzip\n!smi;
	return if $hdr =~ m!^Content-Length\n[0-9]+\n!smi;
	return if $hdr =~ m!^Transfer-Encoding\n!smi;
	# in case Plack::Middleware::Deflater is loaded:
	return if $env->{'plack.skip-deflater'}++;
	push @$res_hdr, @GZIP_HDRS;
	bless {}, __PACKAGE__;
}

sub gzip_or_die () {
	my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
	$err == Z_OK or die "Deflate->new failed: $err";
	$gz;
}

sub gone { # what: search/over/mm
	my ($ctx, $what) = @_;
	warn "W: `$ctx->{ibx}->{name}' $what went away unexpectedly\n";
	undef;
}

# for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
# Also used for ->getline callbacks
sub translate ($$) {
	my $self = $_[0]; # $_[1] => input

	# allocate the zlib context lazily here, instead of in ->new.
	# Deflate contexts are memory-intensive and this object may
	# be sitting in the Qspawn limiter queue for a while.
	my $gz = $self->{gz} //= gzip_or_die();
	my $zbuf = delete($self->{zbuf});
	if (defined $_[1]) { # my $buf = $_[1];
		my $err = $gz->deflate($_[1], $zbuf);
		die "gzip->deflate: $err" if $err != Z_OK;
		return $zbuf if length($zbuf) >= 8192;

		$self->{zbuf} = $zbuf;
		'';
	} else { # undef == EOF
		my $err = $gz->flush($zbuf);
		die "gzip->flush: $err" if $err != Z_OK;
		$zbuf;
	}
}

# returns PublicInbox::HTTP::{Chunked,Identity}
sub http_out ($) {
	my ($self) = @_;
	$self->{http_out} // do {
		my $args = delete $self->{wcb_args} // return undef;
		my $wcb = pop @$args; # from PublicInbox:HTTP async
		# $args->[0] may be \&mbox_hdr or similar
		$args = $args->[0]->($self) if ref($args->[0]) eq 'CODE';
		$self->{gz} //= gz_or_noop($args->[1], $self->{env});
		$self->{http_out} = $wcb->($args); # $wcb->([$code, $hdr_ary])
	};
}

sub write {
	# my $ret = bytes::length($_[1]); # XXX does anybody care?
	http_out($_[0])->write(translate($_[0], $_[1]));
}

# similar to ->translate; use this when we're sure we know we have
# more data to buffer after this
sub zmore {
	my $self = $_[0]; # $_[1] => input
	http_out($self);
	my $err = $self->{gz}->deflate($_[1], $self->{zbuf});
	die "gzip->deflate: $err" if $err != Z_OK;
	undef;
}

# flushes and returns the final bit of gzipped data
sub zflush ($;$) {
	my $self = $_[0]; # $_[1] => final input (optional)
	my $zbuf = delete $self->{zbuf};
	my $gz = delete $self->{gz};
	my $err;
	if (defined $_[1]) { # it's a bug iff $gz is undef w/ $_[1]
		$err = $gz->deflate($_[1], $zbuf);
		die "gzip->deflate: $err" if $err != Z_OK;
	}
	$gz // return ''; # not a bug, recursing on DS->write failure
	$err = $gz->flush($zbuf);
	die "gzip->flush: $err" if $err != Z_OK;
	$zbuf;
}

sub close {
	my ($self) = @_;
	my $http_out = http_out($self) // return;
	$http_out->write(zflush($self));
	(delete($self->{http_out}) // return)->close;
}

sub bail  {
	my $self = shift;
	if (my $env = $self->{env}) {
		warn @_, "\n";
		my $http = $env->{'psgix.io'} or return; # client abort
		eval { $http->close }; # should hit our close
		warn "E: error in http->close: $@" if $@;
		eval { $self->close }; # just in case...
		warn "E: error in self->close: $@" if $@;
	} else {
		warn @_, "\n";
	}
}

# this is public-inbox-httpd-specific
sub async_blob_cb { # git->cat_async callback
	my ($bref, $oid, $type, $size, $self) = @_;
	my $http = $self->{env}->{'psgix.io'}; # PublicInbox::HTTP
	$http->{forward} or return; # client aborted
	my $smsg = $self->{smsg} or bail($self, 'BUG: no smsg');
	if (!defined($oid)) {
		# it's possible to have TOCTOU if an admin runs
		# public-inbox-(edit|purge), just move onto the next message
		warn "E: $smsg->{blob} missing in $self->{ibx}->{inboxdir}\n";
		return $http->next_step($self->can('async_next'));
	}
	$smsg->{blob} eq $oid or bail($self, "BUG: $smsg->{blob} != $oid");
	eval { $self->async_eml(PublicInbox::Eml->new($bref)) };
	bail($self, "E: async_eml: $@") if $@;
	if ($self->{-low_prio}) { # run via PublicInbox::WWW::event_step
		push(@{$self->{www}->{-low_prio_q}}, $self) == 1 and
				PublicInbox::DS::requeue($self->{www});
	} else {
		$http->next_step($self->can('async_next'));
	}
}

sub smsg_blob {
	my ($self, $smsg) = @_;
	ibx_async_cat($self->{ibx}, $smsg->{blob}, \&async_blob_cb, $self);
}

1;
public-inbox-1.9.0/lib/PublicInbox/HTTP.pm000066400000000000000000000316511430031475700202270ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 
#
# Generic PSGI server for convenience.  It aims to provide
# a consistent experience for public-inbox admins so they don't have
# to learn different ways to admin both NNTP and HTTP components.
# There's nothing which depends on public-inbox, here.
# Each instance of this class represents a HTTP client socket
#
# fields:
# httpd: PublicInbox::HTTPD ref
# env: PSGI env hashref
# input_left: bytes left to read in request body (e.g. POST/PUT)
# remote_addr: remote IP address as a string (e.g. "127.0.0.1")
# remote_port: peer port
# forward: response body object, response to ->getline + ->close
# alive: HTTP keepalive state:
#	0: drop connection when done
#	1: keep connection when done
#	2: keep connection, chunk responses
package PublicInbox::HTTP;
use strict;
use parent qw(PublicInbox::DS);
use Fcntl qw(:seek);
use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl
use Plack::Util;
use HTTP::Status qw(status_message);
use HTTP::Date qw(time2str);
use PublicInbox::DS qw(msg_more);
use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
use PublicInbox::Tmpfile;
use constant {
	CHUNK_START => -1,   # [a-f0-9]+\r\n
	CHUNK_END => -2,     # \r\n
	CHUNK_ZEND => -3,    # \r\n
	CHUNK_MAX_HDR => 256,
};
use Errno qw(EAGAIN);

# Use the same configuration parameter as git since this is primarily
# a slow-client sponge for git-http-backend
# TODO: support per-respository http.maxRequestBuffer somehow...
our $MAX_REQUEST_BUFFER = $ENV{GIT_HTTP_MAX_REQUEST_BUFFER} ||
			(10 * 1024 * 1024);

open(my $null_io, '<', '/dev/null') or die "failed to open /dev/null: $!";
my $http_date;
my $prev = 0;
sub http_date () {
	my $now = time;
	$now == $prev ? $http_date : ($http_date = time2str($prev = $now));
}

sub new ($$$) {
	my ($class, $sock, $addr, $srv_env) = @_;
	my $self = bless { srv_env => $srv_env }, $class;
	my $ev = EPOLLIN;
	my $wbuf;
	if ($sock->can('accept_SSL') && !$sock->accept_SSL) {
		return CORE::close($sock) if $! != EAGAIN;
		$ev = PublicInbox::TLS::epollbit() or return CORE::close($sock);
		$wbuf = [ \&PublicInbox::DS::accept_tls_step ];
	}
	$self->{wbuf} = $wbuf if $wbuf;
	($self->{remote_addr}, $self->{remote_port}) =
		PublicInbox::Daemon::host_with_port($addr);
	$self->SUPER::new($sock, $ev | EPOLLONESHOT);
}

sub event_step { # called by PublicInbox::DS
	my ($self) = @_;
	local $SIG{__WARN__} = $self->{srv_env}->{'pi-httpd.warn_cb'};
	return unless $self->flush_write && $self->{sock};

	# only read more requests if we've drained the write buffer,
	# otherwise we can be buffering infinitely w/o backpressure

	return read_input($self) if ref($self->{env});

	my $rbuf = $self->{rbuf} // (\(my $x = ''));
	my %env = %{$self->{srv_env}}; # full hash copy
	my $r;
	while (($r = parse_http_request($$rbuf, \%env)) < 0) {
		# We do not support Trailers in chunked requests, for
		# now (they are rarely-used and git (as of 2.7.2) does
		# not use them).
		# this length-check is necessary for PURE_PERL=1:
		if ($r == -1 || $env{HTTP_TRAILER} ||
				($r == -2 && length($$rbuf) > 0x4000)) {
			return quit($self, 400);
		}
		$self->do_read($rbuf, 8192, length($$rbuf)) or return;
	}
	return quit($self, 400) if grep(/\s/, keys %env); # stop smugglers
	$$rbuf = substr($$rbuf, $r);
	my $len = input_prepare($self, \%env) //
		return write_err($self, undef); # EMFILE/ENFILE

	$len ? read_input($self, $rbuf) : app_dispatch($self, undef, $rbuf);
}

sub read_input ($;$) {
	my ($self, $rbuf) = @_;
	$rbuf //= $self->{rbuf} // (\(my $x = ''));
	my $env = $self->{env};
	return read_input_chunked($self, $rbuf) if env_chunked($env);

	# env->{CONTENT_LENGTH} (identity)
	my $len = delete $self->{input_left};
	my $input = $env->{'psgi.input'};

	while ($len > 0) {
		if ($$rbuf ne '') {
			my $w = syswrite($input, $$rbuf, $len);
			return write_err($self, $len) unless $w;
			$len -= $w;
			die "BUG: $len < 0 (w=$w)" if $len < 0;
			if ($len == 0) { # next request may be pipelined
				$$rbuf = substr($$rbuf, $w);
				last;
			}
			$$rbuf = '';
		}
		$self->do_read($rbuf, 8192) or return recv_err($self, $len);
		# continue looping if $r > 0;
	}
	app_dispatch($self, $input, $rbuf);
}

sub app_dispatch {
	my ($self, $input, $rbuf) = @_;
	$self->rbuf_idle($rbuf);
	my $env = $self->{env};
	$self->{env} = undef; # for exists() check in ->busy
	$env->{REMOTE_ADDR} = $self->{remote_addr};
	$env->{REMOTE_PORT} = $self->{remote_port};
	if (defined(my $host = $env->{HTTP_HOST})) {
		$host =~ s/:([0-9]+)\z// and $env->{SERVER_PORT} = $1 + 0;
		$env->{SERVER_NAME} = $host;
	}
	if (defined $input) {
		sysseek($input, 0, SEEK_SET) or
			die "BUG: psgi.input seek failed: $!";
	}
	# note: NOT $self->{sock}, we want our close (+ PublicInbox::DS::close),
	# to do proper cleanup:
	$env->{'psgix.io'} = $self; # for ->close or async_pass
	my $res = Plack::Util::run_app($env->{'pi-httpd.app'}, $env);
	eval {
		if (ref($res) eq 'CODE') {
			$res->(sub { response_write($self, $env, $_[0]) });
		} else {
			response_write($self, $env, $res);
		}
	};
	if ($@) {
		warn "response_write error: $@";
		$self->close;
	}
}

sub response_header_write {
	my ($self, $env, $res) = @_;
	my $proto = $env->{SERVER_PROTOCOL} or return; # HTTP/0.9 :P
	my $status = $res->[0];
	my $h = "$proto $status " . status_message($status) . "\r\n";
	my ($len, $chunked);
	my $headers = $res->[1];

	for (my $i = 0; $i < @$headers; $i += 2) {
		my $k = $headers->[$i];
		my $v = $headers->[$i + 1];
		next if $k =~ /\A(?:Connection|Date)\z/i;

		$len = $v if $k =~ /\AContent-Length\z/i;
		if ($k =~ /\ATransfer-Encoding\z/i && $v =~ /\bchunked\b/i) {
			$chunked = 1;
		}
		$h .= "$k: $v\r\n";
	}

	my $conn = $env->{HTTP_CONNECTION} || '';
	my $term = defined($len) || $chunked;
	my $prot_persist = ($proto eq 'HTTP/1.1') && ($conn !~ /\bclose\b/i);
	my $alive;
	if (!$term && $prot_persist) { # auto-chunk
		$chunked = $alive = 2;
		$alive = 3 if $env->{REQUEST_METHOD} eq 'HEAD';
		$h .= "Transfer-Encoding: chunked\r\n";
		# no need for "Connection: keep-alive" with HTTP/1.1
	} elsif ($term && ($prot_persist || ($conn =~ /\bkeep-alive\b/i))) {
		$alive = 1;
		$h .= "Connection: keep-alive\r\n";
	} else {
		$alive = 0;
		$h .= "Connection: close\r\n";
	}
	$h .= 'Date: ' . http_date() . "\r\n\r\n";

	if (($len || $chunked) && $env->{REQUEST_METHOD} ne 'HEAD') {
		msg_more($self, $h);
	} else {
		$self->write(\$h);
	}
	$alive;
}

# middlewares such as Deflater may write empty strings
sub chunked_write ($$) {
	my $self = $_[0];
	return if $_[1] eq '';
	msg_more($self, sprintf("%x\r\n", length($_[1])));
	msg_more($self, $_[1]);

	# use $self->write(\"\n\n") if you care about real-time
	# streaming responses, public-inbox WWW does not.
	msg_more($self, "\r\n");
}

sub identity_write ($$) {
	my $self = $_[0];
	$self->write(\($_[1])) if $_[1] ne '';
}

sub response_done {
	my ($self, $alive) = @_;
	delete $self->{env}; # we're no longer busy
	# HEAD requests set $alive = 3 so we don't send "0\r\n\r\n";
	$self->write(\"0\r\n\r\n") if $alive == 2;
	$self->write($alive ? $self->can('requeue') : \&close);
}

sub getline_pull {
	my ($self) = @_;
	my $forward = $self->{forward};

	# limit our own running time for fairness with other
	# clients and to avoid buffering too much:
	my $buf = eval {
		local $/ = \65536;
		$forward->getline;
	} if $forward;

	if (defined $buf) {
		# may close in PublicInbox::DS::write
		if ($self->{alive} == 2) {
			chunked_write($self, $buf);
		} else {
			identity_write($self, $buf);
		}

		if ($self->{sock}) {
			# autovivify wbuf
			my $new_size = push(@{$self->{wbuf}}, \&getline_pull);

			# wbuf may be populated by {chunked,identity}_write()
			# above, no need to rearm if so:
			$self->requeue if $new_size == 1;
			return; # likely
		}
	} elsif ($@) {
		warn "response ->getline error: $@";
		$self->close;
	}
	# avoid recursion
	if (delete $self->{forward}) {
		eval { $forward->close };
		if ($@) {
			warn "response ->close error: $@";
			$self->close; # idempotent
		}
	}
	response_done($self, delete $self->{alive});
}

sub response_write {
	my ($self, $env, $res) = @_;
	my $alive = response_header_write($self, $env, $res);
	if (defined(my $body = $res->[2])) {
		if (ref $body eq 'ARRAY') {
			if ($alive == 2) {
				chunked_write($self, $_) for @$body;
			} else {
				identity_write($self, $_) for @$body;
			}
			response_done($self, $alive);
		} else {
			$self->{forward} = $body;
			$self->{alive} = $alive;
			getline_pull($self); # kick-off!
		}
	# these are returned to the calling application:
	} elsif ($alive >= 2) {
		bless [ $self, $alive ], 'PublicInbox::HTTP::Chunked';
	} else {
		bless [ $self, $alive ], 'PublicInbox::HTTP::Identity';
	}
}

sub input_prepare {
	my ($self, $env) = @_;
	my ($input, $len);

	# rfc 7230 3.3.2, 3.3.3,: favor Transfer-Encoding over Content-Length
	my $hte = $env->{HTTP_TRANSFER_ENCODING};
	if (defined $hte) {
		# rfc7230 3.3.3, point 3 says only chunked is accepted
		# as the final encoding.  Since neither public-inbox-httpd,
		# git-http-backend, or our WWW-related code uses "gzip",
		# "deflate" or "compress" as the Transfer-Encoding, we'll
		# reject them:
		return quit($self, 400) if $hte !~ /\Achunked\z/i;

		$len = CHUNK_START;
		$input = tmpfile('http.input', $self->{sock});
	} else {
		$len = $env->{CONTENT_LENGTH};
		if (defined $len) {
			# rfc7230 3.3.3.4
			return quit($self, 400) if $len !~ /\A[0-9]+\z/;
			return quit($self, 413) if $len > $MAX_REQUEST_BUFFER;
			$input = $len ? tmpfile('http.input', $self->{sock})
				: $null_io;
		} else {
			$input = $null_io;
		}
	}

	# TODO: expire idle clients on ENFILE / EMFILE
	$env->{'psgi.input'} = $input // return;
	$self->{env} = $env;
	$self->{input_left} = $len || 0;
}

sub env_chunked { ($_[0]->{HTTP_TRANSFER_ENCODING} // '') =~ /\Achunked\z/i }

sub write_err {
	my ($self, $len) = @_;
	my $msg = $! || '(zero write)';
	$msg .= " ($len bytes remaining)" if defined $len;
	warn "error buffering to input: $msg";
	quit($self, 500);
}

sub recv_err {
	my ($self, $len) = @_;
	if ($! == EAGAIN) { # epoll/kevent watch already set by do_read
		$self->{input_left} = $len;
	} else {
		warn "error reading input: $! ($len bytes remaining)";
	}
}

sub read_input_chunked { # unlikely...
	my ($self, $rbuf) = @_;
	$rbuf //= $self->{rbuf} // (\(my $x = ''));
	my $input = $self->{env}->{'psgi.input'};
	my $len = delete $self->{input_left};

	while (1) { # chunk start
		if ($len == CHUNK_ZEND) {
			$$rbuf =~ s/\A\r\n//s and
				return app_dispatch($self, $input, $rbuf);

			return quit($self, 400) if length($$rbuf) > 2;
		}
		if ($len == CHUNK_END) {
			if ($$rbuf =~ s/\A\r\n//s) {
				$len = CHUNK_START;
			} elsif (length($$rbuf) > 2) {
				return quit($self, 400);
			}
		}
		if ($len == CHUNK_START) {
			if ($$rbuf =~ s/\A([a-f0-9]+).*?\r\n//i) {
				$len = hex $1;
				if (($len + -s $input) > $MAX_REQUEST_BUFFER) {
					return quit($self, 413);
				}
			} elsif (length($$rbuf) > CHUNK_MAX_HDR) {
				return quit($self, 400);
			}
			# will break from loop since $len >= 0
		}

		if ($len < 0) { # chunk header is trickled, read more
			$self->do_read($rbuf, 8192, length($$rbuf)) or
				return recv_err($self, $len);
			# (implicit) goto chunk_start if $r > 0;
		}
		$len = CHUNK_ZEND if $len == 0;

		# drain the current chunk
		until ($len <= 0) {
			if ($$rbuf ne '') {
				my $w = syswrite($input, $$rbuf, $len);
				return write_err($self, "$len chunk") if !$w;
				$len -= $w;
				if ($len == 0) {
					# we may have leftover data to parse
					# in chunk
					$$rbuf = substr($$rbuf, $w);
					$len = CHUNK_END;
				} elsif ($len < 0) {
					die "BUG: len < 0: $len";
				} else {
					$$rbuf = '';
				}
			}
			if ($$rbuf eq '') {
				# read more of current chunk
				$self->do_read($rbuf, 8192) or
					return recv_err($self, $len);
			}
		}
	}
}

sub quit {
	my ($self, $status) = @_;
	my $h = "HTTP/1.1 $status " . status_message($status) . "\r\n\r\n";
	$self->write(\$h);
	$self->close;
	undef; # input_prepare expects this
}

sub close {
	my $self = $_[0];
	if (my $forward = delete $self->{forward}) {
		eval { $forward->close };
		warn "forward ->close error: $@" if $@;
	}
	$self->SUPER::close; # PublicInbox::DS::close
}

sub busy { # for graceful shutdown in PublicInbox::Daemon:
	my ($self) = @_;
	defined($self->{rbuf}) || exists($self->{env}) || defined($self->{wbuf})
}

# runs $cb on the next iteration of the event loop at earliest
sub next_step {
	my ($self, $cb) = @_;
	return unless exists $self->{sock};
	$self->requeue if 1 == push(@{$self->{wbuf}}, $cb);
}

# Chunked and Identity packages are used for writing responses.
# They may be exposed to the PSGI application when the PSGI app
# returns a CODE ref for "push"-based responses
package PublicInbox::HTTP::Chunked;
use strict;

sub write {
	# ([$http], $buf) = @_;
	PublicInbox::HTTP::chunked_write($_[0]->[0], $_[1])
}

sub close {
	# $_[0] = [$http, $alive]
	PublicInbox::HTTP::response_done(@{$_[0]});
}

package PublicInbox::HTTP::Identity;
use strict;
our @ISA = qw(PublicInbox::HTTP::Chunked);

sub write {
	# ([$http], $buf) = @_;
	PublicInbox::HTTP::identity_write($_[0]->[0], $_[1]);
}

1;
public-inbox-1.9.0/lib/PublicInbox/HTTPD.pm000066400000000000000000000051351430031475700203310ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# wraps a listen socket for HTTP and links it to the PSGI app in
# public-inbox-httpd
package PublicInbox::HTTPD;
use v5.10.1;
use strict;
use Plack::Util ();
use Plack::Builder;
use PublicInbox::HTTP;
use PublicInbox::HTTPD::Async;

sub pi_httpd_async { PublicInbox::HTTPD::Async->new(@_) }

# we have a different env for ever listener socket for
# SERVER_NAME, SERVER_PORT and psgi.url_scheme
# envs: listener FD => PSGI env
sub new { bless { envs => {}, err => \*STDERR }, __PACKAGE__ }

# this becomes {srv_env} in PublicInbox::HTTP
sub env_for ($$$) {
	my ($self, $srv, $client) = @_;
	my $n = getsockname($srv) or die "not a socket: $srv $!\n";
	my ($host, $port) = PublicInbox::Daemon::host_with_port($n);
	{
		SERVER_NAME => $host,
		SERVER_PORT => $port,
		SCRIPT_NAME => '',
		'psgi.version' => [ 1, 1 ],
		'psgi.errors' => $self->{err},
		'psgi.url_scheme' => $client->can('accept_SSL') ?
					'https' : 'http',
		'psgi.nonblocking' => Plack::Util::TRUE,
		'psgi.streaming' => Plack::Util::TRUE,
		'psgi.run_once'	 => Plack::Util::FALSE,
		'psgi.multithread' => Plack::Util::FALSE,
		'psgi.multiprocess' => Plack::Util::TRUE,

		# We don't use this anywhere, but we can support
		# other PSGI apps which might use it:
		'psgix.input.buffered' => Plack::Util::TRUE,

		# XXX unstable API!, only GitHTTPBackend needs
		# this to limit git-http-backend(1) parallelism.
		# We also check for the truthiness of this to
		# detect when to use async paths for slow blobs
		'pi-httpd.async' => \&pi_httpd_async,
		'pi-httpd.app' => $self->{app},
		'pi-httpd.warn_cb' => $self->{warn_cb},
	}
}

sub refresh_groups {
	my ($self) = @_;
	my $app;
	$self->{psgi} //= $main::ARGV[0] if @main::ARGV;
	if ($self->{psgi}) {
		eval { $app = Plack::Util::load_psgi($self->{psgi}) };
		die $@, <new;
		$www->preload;
		$app = builder {
			eval { enable 'ReverseProxy' };
			$@ and warn <call(@_) };
		};
	}
	$_->{'pi-httpd.app'} = $app for values %{$self->{envs}};
	$self->{app} = $app;
}

sub post_accept_cb { # for Listener->{post_accept}
	my ($self) = @_;
	sub {
		my ($client, $addr, $srv) = @_; # $_[4] - tls_wrap (unused)
		PublicInbox::HTTP->new($client, $addr,
				$self->{envs}->{fileno($srv)} //=
					env_for($self, $srv, $client));
	}
}

1;
public-inbox-1.9.0/lib/PublicInbox/HTTPD/000077500000000000000000000000001430031475700177675ustar00rootroot00000000000000public-inbox-1.9.0/lib/PublicInbox/HTTPD/Async.pm000066400000000000000000000065461430031475700214150ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors 
# License: AGPL-3.0+ 
#
# XXX This is a totally unstable API for public-inbox internal use only
# This is exposed via the 'pi-httpd.async' key in the PSGI env hash.
# The name of this key is not even stable!
# Currently intended for use with read-only pipes with expensive
# processes such as git-http-backend(1), cgit(1)
#
# fields:
# http: PublicInbox::HTTP ref
# fh: PublicInbox::HTTP::{Identity,Chunked} ref (can ->write + ->close)
# cb: initial read callback
# arg: arg for {cb}
# end_obj: CODE or object which responds to ->event_step when ->close is called
package PublicInbox::HTTPD::Async;
use strict;
use parent qw(PublicInbox::DS);
use Errno qw(EAGAIN);
use PublicInbox::Syscall qw(EPOLLIN);

# This is called via: $env->{'pi-httpd.async'}->()
# $io is a read-only pipe ($rpipe) for now, but may be a
# bidirectional socket in the future.
sub new {
	my ($class, $io, $cb, $arg, $end_obj) = @_;

	# no $io? call $cb at the top of the next event loop to
	# avoid recursion:
	unless (defined($io)) {
		PublicInbox::DS::requeue($cb ? $cb : $arg);
		die '$end_obj unsupported w/o $io' if $end_obj;
		return;
	}
	my $self = bless {
		cb => $cb, # initial read callback
		arg => $arg, # arg for $cb
		end_obj => $end_obj, # like END{}, can ->event_step
	}, $class;
	my $pp = tied *$io;
	$pp->{fh}->blocking(0) // die "$io->blocking(0): $!";
	$self->SUPER::new($io, EPOLLIN);
}

sub event_step {
	my ($self) = @_;
	if (my $cb = delete $self->{cb}) {
		# this may call async_pass when headers are done
		$cb->(my $refcnt_guard = delete $self->{arg});
	} elsif (my $sock = $self->{sock}) {
		my $http = $self->{http};
		# $self->{sock} is a read pipe for git-http-backend or cgit
		# and 65536 is the default Linux pipe size
		my $r = sysread($sock, my $buf, 65536);
		if ($r) {
			$self->{fh}->write($buf); # may call $http->close
			# let other clients get some work done, too
			return if $http->{sock}; # !closed

			# else: fall through to close below...
		} elsif (!defined $r && $! == EAGAIN) {
			return; # EPOLLIN means we'll be notified
		}

		# Done! Error handling will happen in $self->{fh}->close
		# called by end_obj->event_step handler
		delete $http->{forward};
		$self->close; # queues end_obj->event_step to be called
	} # else { # we may've been requeued but closed by $http
}

# once this is called, all data we read is passed to the
# to the PublicInbox::HTTP instance ($http) via $fh->write
sub async_pass {
	my ($self, $http, $fh, $bref) = @_;
	# In case the client HTTP connection ($http) dies, it
	# will automatically close this ($self) object.
	$http->{forward} = $self;

	# write anything we overread when we were reading headers
	$fh->write($$bref); # PublicInbox:HTTP::{chunked,identity}_wcb

	# we're done with this, free this memory up ASAP since the
	# calls after this may use much memory:
	$$bref = undef;

	$self->{http} = $http;
	$self->{fh} = $fh;
}

# may be called as $forward->close in PublicInbox::HTTP or EOF (event_step)
sub close {
	my $self = $_[0];
	$self->SUPER::close; # DS::close

	# we defer this to the next timer loop since close is deferred
	if (my $end_obj = delete $self->{end_obj}) {
		# this calls $end_obj->event_step
		# (likely PublicInbox::Qspawn::event_step,
		#  NOT PublicInbox::HTTPD::Async::event_step)
		PublicInbox::DS::requeue($end_obj);
	}
}

1;
public-inbox-1.9.0/lib/PublicInbox/HlMod.pm000066400000000000000000000077101430031475700204520ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors 
# License: AGPL-3.0+ 

# I have no idea how stable or safe this is for handling untrusted
# input, but it seems to have been around for a while, and the
# highlight(1) executable is supported by gitweb and cgit.
#
# I'm also unsure about API stability, but highlight 3.x seems to
# have been around a few years and ikiwiki (apparently the only
# user of the SWIG/Perl bindings, at least in Debian) hasn't needed
# major changes to support it in recent years.
#
# Some code stolen from ikiwiki (GPL-2.0+)
# wrapper for SWIG-generated highlight.pm bindings
package PublicInbox::HlMod;
use strict;
use v5.10.1;
use highlight; # SWIG-generated stuff
use PublicInbox::Hval qw(src_escape ascii_html);
my $hl;

sub _parse_filetypes ($) {
	my $ft_conf = $_[0]->getFiletypesConfPath('filetypes') or
				die 'filetypes.conf not found by highlight';
	open my $fh, '<', $ft_conf or die "failed to open($ft_conf): $!";
	local $/;
	my $cfg = <$fh>;
	my %ext2lang;
	my @shebang; # order matters

	# Hrm... why isn't this exposed by the highlight API?
	# highlight >= 3.2 format (bind-style) (from ikiwiki)
	while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
			 Extensions\s*=\s*{([^}]+)}/sgx) {
		my $lang = $1;
		foreach my $bit (split(/,/, $2)) {
			$bit =~ s/.*"(.*)".*/$1/s;
			$ext2lang{$bit} = $lang;
		}
	}
	# AFAIK, all the regexps used by in filetypes.conf distributed
	# by highlight work as Perl REs
	while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
			Shebang\s*=\s*\[\s*\[([^}]+)\s*\]\s*\]\s*}\s*,/sgx) {
		my ($lang, $re) = ($1, $2);
		eval {
			my $perl_re = qr/$re/;
			push @shebang, [ $lang, $perl_re ];
		};
		if ($@) {
			warn "$lang shebang=[[$re]] did not work in Perl: $@";
		}
	}
	(\%ext2lang, \@shebang);
}

# We only need one instance
sub new {
	my ($class) = @_;
	$hl ||= do {
		my $dir = highlight::DataDir->new;
		$dir->initSearchDirectories('');
		my ($ext2lang, $shebang) = _parse_filetypes($dir);
		bless {
			-dir => $dir,
			-ext2lang => $ext2lang,
			-shebang => $shebang,
		}, $class;
	};
}

sub _shebang2lang ($$) {
	my ($self, $str) = @_;
	my $shebang = $self->{-shebang};
	foreach my $s (@$shebang) {
		return $s->[0] if $$str =~ $s->[1];
	}
	undef;
}

sub _path2lang ($$) {
	my ($self, $path) = @_;
	my ($ext) = ($path =~ m!([^\\/\.]+)\z!);
	$ext = lc($ext);
	$self->{-ext2lang}->{$ext} || $ext;
}

sub do_hl {
	my ($self, $str, $path) = @_;
	my $lang = _path2lang($self, $path) if defined $path;
	do_hl_lang($self, $str, $lang);
}

sub do_hl_lang {
	my ($self, $str, $lang) = @_;

	my $langpath;
	if (defined $lang) {
		$langpath = $self->{-dir}->getLangPath("$lang.lang") or return;
		undef $lang unless -f $langpath;
	}
	$lang //= _shebang2lang($self, $str) // return;
	$langpath = $self->{-dir}->getLangPath("$lang.lang") or return;
	return unless -f $langpath;

	my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
	$g->setFragmentCode(1); # generate html fragment

	# whatever theme works
	$g->initTheme($self->{-dir}->getThemePath('print.theme'));
	$g->loadLanguage($langpath);
	$g->setEncoding('utf-8');
	# we assume $$str is valid UTF-8, but the SWIG binding doesn't
	# know that, so ensure it's marked as UTF-8 even if it isnt...
	my $out = $g->generateString($$str);
	highlight::CodeGenerator::deleteInstance($g);
	utf8::decode($out);
	src_escape($out);
	\$out;
}

# Highlight text, but support Markdown "```$LANG" notation
# while preserving WYSIWYG of plain-text documentation.
# This is NOT to be enabled by default or encouraged for parsing
# emails, since it is NOT stable and can lead to standards
# proliferation of email.
sub do_hl_text {
	my ($self, $str) = @_;

	$$str = join('', map {
		if (/\A(``` ?)(\w+)\s*?\n(.+)(^```\s*)\z/sm) {
			my ($pfx, $lang, $code, $post) = ($1, $2, $3, $4);
			my $hl = do_hl_lang($self, \$code, $lang) || \$code;
			$pfx . $lang . "\n" . $$hl . $post;
		} else {
			ascii_html($_);
		}
	} split(/(^``` ?\w+\s*?\n.+?^```\s*$)/sm, $$str));
}

1;
public-inbox-1.9.0/lib/PublicInbox/Hval.pm000066400000000000000000000077711430031475700203500ustar00rootroot00000000000000# Copyright (C) 2014-2021 all contributors 
# License: AGPL-3.0+ 
#
# represents a header value in various forms.  Used for HTML generation
# in our web interface(s)
package PublicInbox::Hval;
use strict;
use warnings;
use Encode qw(find_encoding);
use PublicInbox::MID qw/mid_clean mid_escape/;
use base qw/Exporter/;
our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape
		to_attr prurl mid_href fmt_ts ts2str/;
use POSIX qw(strftime);
my $enc_ascii = find_encoding('us-ascii');

# safe-ish acceptable filename pattern for portability
our $FN = '[a-zA-Z0-9][a-zA-Z0-9_\-\.]+[a-zA-Z0-9]'; # needs \z anchor

sub mid_href { ascii_html(mid_escape($_[0])) }

# some of these overrides are standard C escapes so they're
# easy-to-understand when rendered.
my %escape_sequence = (
	"\x00" => '\\0', # NUL
	"\x07" => '\\a', # bell
	"\x08" => '\\b', # backspace
	"\x09" => "\t", # obvious to show as-is
	"\x0a" => "\n", # obvious to show as-is
	"\x0b" => '\\v', # vertical tab
	"\x0c" => '\\f', # form feed
	"\x0d" => '\\r', # carriage ret (not preceding \n)
	"\x1b" => '^[', # ASCII escape (mutt seems to escape this way)
	"\x7f" => '\\x7f', # DEL
);

our %xhtml_map = (
	'"' => '"',
	'&' => '&',
	"'" => ''',
	'<' => '<',
	'>' => '>',
);

$xhtml_map{chr($_)} = sprintf('\\x%02x', $_) for (0..31);
%xhtml_map = (%xhtml_map, %escape_sequence);

# for post-processing the output of highlight.pm and perhaps other
# highlighers in the future
sub src_escape ($) {
	$_[0] =~ s/\r\n/\n/sg;
	$_[0] =~ s/'/'/sg; # workaround https://bugs.debian.org/927409
	$_[0] =~ s/([\x7f\x00-\x1f])/$xhtml_map{$1}/sge;
	$_[0] = $enc_ascii->encode($_[0], Encode::HTMLCREF);
}

sub ascii_html {
	my ($s) = @_;
	$s =~ s/([<>&'"\x7f\x00-\x1f])/$xhtml_map{$1}/sge;
	$enc_ascii->encode($s, Encode::HTMLCREF);
}

# returns a protocol-relative URL string
sub prurl ($$) {
	my ($env, $u) = @_;
	if (ref($u) eq 'ARRAY') {
		my $h = $env->{HTTP_HOST} // $env->{SERVER_NAME};
		my @host_match = grep(/\b\Q$h\E\b/, @$u);
		$u = $host_match[0] // $u->[0];
		# fall through to below:
	}
	index($u, '//') == 0 ? "$env->{'psgi.url_scheme'}:$u" : $u;
}

# for misguided people who believe in this stuff, give them a
# substitution for '.'
# ․ · and ͺ were also candidates:
#   https://public-inbox.org/meta/20170615015250.GA6484@starla/
# However, • was chosen to make copy+paste errors more obvious
sub obfuscate_addrs ($$;$) {
	my $ibx = $_[0];
	my $repl = $_[2] // '•';
	my $re = $ibx->{-no_obfuscate_re}; # regex of domains
	my $addrs = $ibx->{-no_obfuscate}; # { $address => 1 }
	$_[1] =~ s#(\S+)\@([\w\-]+\.[\w\.\-]+)#
		my ($pfx, $domain) = ($1, $2);
		if (index($pfx, '://') > 0 || $pfx !~ s/([\w\.\+=\-]+)\z//) {
			"$pfx\@$domain";
		} else {
			my $user = $1;
			my $addr = "$user\@$domain";
			if ($addrs->{$addr} || ((defined($re) &&
						$domain =~ $re))) {
				$pfx.$addr;
			} else {
				$domain =~ s!([^\.]+)\.!$1$repl!;
				$pfx . $user . '@' . $domain
			}
		}
		#sge;
}

# like format_sanitized_subject in git.git pretty.c with '%f' format string
sub to_filename ($) {
	my $s = (split(/\n/, $_[0]))[0] // return; # empty string => undef
	$s =~ s/[^A-Za-z0-9_\.]+/-/g;
	$s =~ tr/././s;
	$s =~ s/[\.\-]+\z//;
	$s =~ s/\A[\.\-]+//;
	$s eq '' ? undef : $s;
}

# convert a filename (or any string) to HTML attribute

my %ESCAPES = map { chr($_) => sprintf('::%02x', $_) } (0..255);
$ESCAPES{'/'} = ':'; # common

sub to_attr ($) {
	my ($str) = @_;

	# git would never do this to us:
	return if index($str, '//') >= 0;

	my $first = '';
	utf8::encode($str); # to octets
	if ($str =~ s/\A([^A-Ya-z])//ms) { # start with a letter
		  $first = sprintf('Z%02x', ord($1));
	}
	$str =~ s/([^A-Za-z0-9_\.\-])/$ESCAPES{$1}/egms;
	utf8::decode($str); # allow wide chars
	$first . $str;
}

# for the t= query parameter passed to overview DB
sub ts2str ($) { strftime('%Y%m%d%H%M%S', gmtime($_[0])) };

# human-friendly format
sub fmt_ts ($) { strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }

1;
public-inbox-1.9.0/lib/PublicInbox/IMAP.pm000066400000000000000000001110011430031475700201620ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 
#
# Each instance of this represents an IMAP client connected to
# public-inbox-imapd.  Much of this was taken from NNTP, but
# further refined while experimenting on future ideas to handle
# slow storage.
#
# data notes:
#
# * NNTP article numbers are UIDs, mm->created_at is UIDVALIDITY
#
# * public-inboxes are sliced into mailboxes of 50K messages
#   to not overload MUAs: $NEWSGROUP_NAME.$SLICE_INDEX
#   Slices are similar in concept to v2 "epochs".  Epochs
#   are for the limitations of git clients, while slices are
#   for the limitations of IMAP clients.
#
# * We also take advantage of slices being only 50K to store
#   "UID offset" to message sequence number (MSN) mapping
#   as a 50K uint16_t array (via pack("S*", ...)).  "UID offset"
#   is the offset from {uid_base} which determines the start of
#   the mailbox slice.
#
# fields:
# imapd: PublicInbox::IMAPD ref
# ibx: PublicInbox::Inbox ref
# long_cb: long_response private data
# uid_base: base UID for mailbox slice (0-based)
# -login_tag: IMAP TAG for LOGIN
# -idle_tag: IMAP response tag for IDLE
# uo2m: UID-to-MSN mapping
package PublicInbox::IMAP;
use strict;
use parent qw(PublicInbox::DS);
use PublicInbox::Eml;
use PublicInbox::EmlContentFoo qw(parse_content_disposition);
use PublicInbox::DS qw(now);
use PublicInbox::GitAsyncCat;
use Text::ParseWords qw(parse_line);
use Errno qw(EAGAIN);
use PublicInbox::IMAPsearchqp;

my $Address;
for my $mod (qw(Email::Address::XS Mail::Address)) {
	eval "require $mod" or next;
	$Address = $mod and last;
}
die "neither Email::Address::XS nor Mail::Address loaded: $@" if !$Address;

sub LINE_MAX () { 8000 } # RFC 2683 3.2.1.5

# Changing UID_SLICE will cause grief for clients which cache.
# This also needs to be <64K: we pack it into a uint16_t
# for long_response UID (offset) => MSN mappings
sub UID_SLICE () { 50_000 }

# these values area also used for sorting
sub NEED_SMSG () { 1 }
sub NEED_BLOB () { NEED_SMSG|2 }
sub CRLF_BREF () { 4 }
sub EML_HDR () { 8 }
sub CRLF_HDR () { 16 }
sub EML_BDY () { 32 }
sub CRLF_BDY () { 64 }
my $OP_EML_NEW = [ EML_HDR - 1, \&op_eml_new ];
my $OP_CRLF_BREF = [ CRLF_BREF, \&op_crlf_bref ];
my $OP_CRLF_HDR = [ CRLF_HDR, \&op_crlf_hdr ];
my $OP_CRLF_BDY = [ CRLF_BDY, \&op_crlf_bdy ];

my %FETCH_NEED = (
	'BODY[HEADER]' => [ NEED_BLOB|EML_HDR|CRLF_HDR, \&emit_rfc822_header ],
	'BODY[TEXT]' => [ NEED_BLOB|EML_BDY|CRLF_BDY, \&emit_rfc822_text ],
	'BODY[]' => [ NEED_BLOB|CRLF_BREF, \&emit_rfc822 ],
	'RFC822.HEADER' => [ NEED_BLOB|EML_HDR|CRLF_HDR, \&emit_rfc822_header ],
	'RFC822.TEXT' => [ NEED_BLOB|EML_BDY|CRLF_BDY, \&emit_rfc822_text ],
	'RFC822.SIZE' => [ NEED_SMSG, \&emit_rfc822_size ],
	RFC822 => [ NEED_BLOB|CRLF_BREF, \&emit_rfc822 ],
	BODY => [ NEED_BLOB|EML_HDR|EML_BDY, \&emit_body ],
	BODYSTRUCTURE => [ NEED_BLOB|EML_HDR|EML_BDY, \&emit_bodystructure ],
	ENVELOPE => [ NEED_BLOB|EML_HDR, \&emit_envelope ],
	FLAGS => [ 0, \&emit_flags ],
	INTERNALDATE => [ NEED_SMSG, \&emit_internaldate ],
);
my %FETCH_ATT = map { $_ => [ $_ ] } keys %FETCH_NEED;

# aliases (RFC 3501 section 6.4.5)
$FETCH_ATT{FAST} = [ qw(FLAGS INTERNALDATE RFC822.SIZE) ];
$FETCH_ATT{ALL} = [ @{$FETCH_ATT{FAST}}, 'ENVELOPE' ];
$FETCH_ATT{FULL} = [ @{$FETCH_ATT{ALL}}, 'BODY' ];

for my $att (keys %FETCH_ATT) {
	my %h = map { $_ => $FETCH_NEED{$_} } @{$FETCH_ATT{$att}};
	$FETCH_ATT{$att} = \%h;
}
undef %FETCH_NEED;

my $valid_range = '[0-9]+|[0-9]+:[0-9]+|[0-9]+:\*';
$valid_range = qr/\A(?:$valid_range)(?:,(?:$valid_range))*\z/;

sub do_greet {
	my ($self) = @_;
	my $capa = capa($self);
	$self->write(\"* OK [$capa] public-inbox-imapd ready\r\n");
}

sub new {
	my (undef, $sock, $imapd) = @_;
	(bless { imapd => $imapd }, 'PublicInbox::IMAP_preauth')->greet($sock)
}

sub logged_in { 1 }

sub capa ($) {
	my ($self) = @_;

	# dovecot advertises IDLE pre-login; perhaps because some clients
	# depend on it, so we'll do the same
	my $capa = 'CAPABILITY IMAP4rev1 IDLE';
	if ($self->logged_in) {
		$capa .= ' COMPRESS=DEFLATE';
	} else {
		if (!($self->{sock} // $self)->can('accept_SSL') &&
			$self->{imapd}->{ssl_ctx_opt}) {
			$capa .= ' STARTTLS';
		}
		$capa .= ' AUTH=ANONYMOUS';
	}
}

sub login_success ($$) {
	my ($self, $tag) = @_;
	bless $self, 'PublicInbox::IMAP';
	my $capa = capa($self);
	"$tag OK [$capa] Logged in\r\n";
}

sub auth_challenge_ok ($) {
	my ($self) = @_;
	my $tag = delete($self->{-login_tag}) or return;
	$self->{anon} = 1;
	login_success($self, $tag);
}

sub cmd_login ($$$$) {
	my ($self, $tag) = @_; # ignore ($user, $password) = ($_[2], $_[3])
	login_success($self, $tag);
}

sub cmd_close ($$) {
	my ($self, $tag) = @_;
	delete @$self{qw(uid_base uo2m)};
	delete $self->{ibx} ? "$tag OK Close done\r\n"
				: "$tag BAD No mailbox\r\n";
}

sub cmd_logout ($$) {
	my ($self, $tag) = @_;
	delete $self->{-idle_tag};
	$self->write(\"* BYE logging out\r\n$tag OK Logout done\r\n");
	$self->shutdn; # PublicInbox::DS::shutdn
	undef;
}

sub cmd_authenticate ($$$) {
	my ($self, $tag) = @_; # $method = $_[2], should be "ANONYMOUS"
	$self->{-login_tag} = $tag;
	"+\r\n"; # challenge
}

sub cmd_capability ($$) {
	my ($self, $tag) = @_;
	'* '.capa($self)."\r\n$tag OK Capability done\r\n";
}

# uo2m: UID Offset to MSN, this is an arrayref by default,
# but uo2m_hibernate can compact and deduplicate it
sub uo2m_ary_new ($;$) {
	my ($self, $exists) = @_;
	my $ub = $self->{uid_base};
	my $uids = $self->{ibx}->over(1)->uid_range($ub + 1, $ub + UID_SLICE);

	# convert UIDs to offsets from {base}
	my @tmp; # [$UID_OFFSET] => $MSN
	my $msn = 0;
	++$ub;
	$tmp[$_ - $ub] = ++$msn for @$uids;
	$$exists = $msn if $exists;
	\@tmp;
}

# changes UID-offset-to-MSN mapping into a deduplicated scalar:
# uint16_t uo2m[UID_SLICE].
# May be swapped out for idle clients if THP is disabled.
sub uo2m_hibernate ($) {
	my ($self) = @_;
	ref(my $uo2m = $self->{uo2m}) or return;
	my %dedupe = ( uo2m_pack($uo2m) => undef );
	$self->{uo2m} = (keys(%dedupe))[0];
	undef;
}

sub uo2m_last_uid ($) {
	my ($self) = @_;
	defined(my $uo2m = $self->{uo2m}) or die 'BUG: uo2m_last_uid w/o {uo2m}';
	(ref($uo2m) ? @$uo2m : (length($uo2m) >> 1)) + $self->{uid_base};
}

sub uo2m_pack ($) {
	# $_[0] is an arrayref of MSNs, it may have undef gaps if there
	# are gaps in the corresponding UIDs: [ msn1, msn2, undef, msn3 ]
	no warnings 'uninitialized';
	pack('S*', @{$_[0]});
}

# extend {uo2m} to account for new messages which arrived since
# {uo2m} was created.
sub uo2m_extend ($$;$) {
	my ($self, $new_uid_max) = @_;
	defined(my $uo2m = $self->{uo2m}) or
		return($self->{uo2m} = uo2m_ary_new($self));
	my $beg = uo2m_last_uid($self); # last UID we've learned
	return $uo2m if $beg >= $new_uid_max; # fast path

	# need to extend the current range:
	my $base = $self->{uid_base};
	++$beg;
	my $uids = $self->{ibx}->over(1)->uid_range($beg, $base + UID_SLICE);
	return $uo2m if !scalar(@$uids);
	my @tmp; # [$UID_OFFSET] => $MSN
	my $write_method = $_[2] // 'msg_more';
	if (ref($uo2m)) {
		my $msn = $uo2m->[-1];
		$tmp[$_ - $beg] = ++$msn for @$uids;
		$self->$write_method("* $msn EXISTS\r\n");
		push @$uo2m, @tmp;
		$uo2m;
	} else {
		my $msn = unpack('S', substr($uo2m, -2, 2));
		$tmp[$_ - $beg] = ++$msn for @$uids;
		$self->$write_method("* $msn EXISTS\r\n");
		$uo2m .= uo2m_pack(\@tmp);
		my %dedupe = ($uo2m => undef);
		$self->{uo2m} = (keys %dedupe)[0];
	}
}

sub cmd_noop ($$) {
	my ($self, $tag) = @_;
	defined($self->{uid_base}) and
		uo2m_extend($self, $self->{uid_base} + UID_SLICE);
	\"$tag OK Noop done\r\n";
}

# the flexible version which works on scalars and array refs.
# Must call uo2m_extend before this
sub uid2msn ($$) {
	my ($self, $uid) = @_;
	my $uo2m = $self->{uo2m};
	my $off = $uid - $self->{uid_base} - 1;
	ref($uo2m) ? $uo2m->[$off] : unpack('S', substr($uo2m, $off << 1, 2));
}

# returns an arrayref of UIDs, so MSNs can be translated to UIDs via:
# $msn2uid->[$MSN-1] => $UID.  The result of this is always ephemeral
# and does not live beyond the event loop.
sub msn2uid ($) {
	my ($self) = @_;
	my $base = $self->{uid_base};
	my $uo2m = uo2m_extend($self, $base + UID_SLICE);
	$uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m);

	my $uo = 0;
	my @msn2uid;
	for my $msn (@$uo2m) {
		++$uo;
		$msn2uid[$msn - 1] = $uo + $base if $msn;
	}
	\@msn2uid;
}

# converts a set of message sequence numbers in requests to UIDs:
sub msn_to_uid_range ($$) {
	my $msn2uid = $_[0];
	$_[1] =~ s!([0-9]+)!$msn2uid->[$1 - 1] // ($msn2uid->[-1] // 0 + 1)!sge;
}

# called by PublicInbox::InboxIdle
sub on_inbox_unlock {
	my ($self, $ibx) = @_;
	my $uid_end = $self->{uid_base} + UID_SLICE;
	uo2m_extend($self, $uid_end, 'write');
	my $new = uo2m_last_uid($self);
	if ($new == $uid_end) { # max exceeded $uid_end
		# continue idling w/o inotify
		my $sock = $self->{sock} or return;
		$ibx->unsubscribe_unlock(fileno($sock));
	}
}

# called every minute or so by PublicInbox::DS::later
my $IDLERS; # fileno($obj->{sock}) => PublicInbox::IMAP
sub idle_tick_all {
	my $old = $IDLERS;
	$IDLERS = undef;
	for my $i (values %$old) {
		next if ($i->{wbuf} || !exists($i->{-idle_tag}));
		$IDLERS->{fileno($i->{sock})} = $i;
		$i->write(\"* OK Still here\r\n");
	}
	$IDLERS and
		PublicInbox::DS::add_uniq_timer('idle', 60, \&idle_tick_all);
}

sub cmd_idle ($$) {
	my ($self, $tag) = @_;
	# IDLE seems allowed by dovecot w/o a mailbox selected *shrug*
	my $ibx = $self->{ibx} or return "$tag BAD no mailbox selected\r\n";
	my $uid_end = $self->{uid_base} + UID_SLICE;
	uo2m_extend($self, $uid_end);
	my $sock = $self->{sock} or return;
	my $fd = fileno($sock);
	$self->{-idle_tag} = $tag;
	# only do inotify on most recent slice
	if ($ibx->over(1)->max < $uid_end) {
		$ibx->subscribe_unlock($fd, $self);
		$self->{imapd}->idler_start;
	}
	PublicInbox::DS::add_uniq_timer('idle', 60, \&idle_tick_all);
	$IDLERS->{$fd} = $self;
	\"+ idling\r\n"
}

sub stop_idle ($$) {
	my ($self, $ibx) = @_;
	my $sock = $self->{sock} or return;
	my $fd = fileno($sock);
	delete $IDLERS->{$fd};
	$ibx->unsubscribe_unlock($fd);
}

sub idle_done ($$) {
	my ($self, $tag) = @_; # $tag is "DONE" (case-insensitive)
	defined(my $idle_tag = delete $self->{-idle_tag}) or
		return "$tag BAD not idle\r\n";
	my $ibx = $self->{ibx} or do {
		warn "BUG: idle_tag set w/o inbox";
		return "$tag BAD internal bug\r\n";
	};
	stop_idle($self, $ibx);
	"$idle_tag OK Idle done\r\n";
}

sub ensure_slices_exist ($$) {
	my ($imapd, $ibx) = @_;
	my $mb_top = $ibx->{newsgroup} // return;
	my $mailboxes = $imapd->{mailboxes};
	my $list = $imapd->{mailboxlist}; # may be undef, just autoviv + noop
	for (my $i = int($ibx->art_max/UID_SLICE); $i >= 0; --$i) {
		my $sub_mailbox = "$mb_top.$i";
		last if exists $mailboxes->{$sub_mailbox};
		$mailboxes->{$sub_mailbox} = $ibx;
		$sub_mailbox =~ s/\Ainbox\./INBOX./i; # more familiar to users
		push @$list, qq[* LIST (\\HasNoChildren) "." $sub_mailbox\r\n]
	}
}

sub inbox_lookup ($$;$) {
	my ($self, $mailbox, $examine) = @_;
	my ($ibx, $exists, $uidmax, $uid_base) = (undef, 0, 0, 0);
	$mailbox = lc $mailbox;
	$ibx = $self->{imapd}->{mailboxes}->{$mailbox} or return;
	my $over = $ibx->over(1);
	if ($over != $ibx) { # not a dummy
		$mailbox =~ /\.([0-9]+)\z/ or
				die "BUG: unexpected dummy mailbox: $mailbox\n";
		$uid_base = $1 * UID_SLICE;

		$uidmax = $ibx->mm->num_highwater // 0;
		if ($examine) {
			$self->{uid_base} = $uid_base;
			$self->{ibx} = $ibx;
			$self->{uo2m} = uo2m_ary_new($self, \$exists);
		} else {
			my $uid_end = $uid_base + UID_SLICE;
			$exists = $over->imap_exists($uid_base, $uid_end);
		}
		delete $ibx->{-art_max};
		ensure_slices_exist($self->{imapd}, $ibx);
	} else {
		if ($examine) {
			$self->{uid_base} = $uid_base;
			$self->{ibx} = $ibx;
			delete $self->{uo2m};
		}
		# if "INBOX.foo.bar" is selected and "INBOX.foo.bar.0",
		# check for new UID ranges (e.g. "INBOX.foo.bar.1")
		if (my $ibx = $self->{imapd}->{mailboxes}->{"$mailbox.0"}) {
			delete $ibx->{-art_max};
			ensure_slices_exist($self->{imapd}, $ibx);
		}
	}
	($ibx, $exists, $uidmax + 1, $uid_base);
}

sub cmd_examine ($$$) {
	my ($self, $tag, $mailbox) = @_;
	# XXX: do we need this? RFC 5162/7162
	my $ret = $self->{ibx} ? "* OK [CLOSED] previous closed\r\n" : '';
	my ($ibx, $exists, $uidnext, $base) = inbox_lookup($self, $mailbox, 1);
	return "$tag NO Mailbox doesn't exist: $mailbox\r\n" if !$ibx;
	$ret .= <{uidvalidity}]\r
$tag OK [READ-ONLY] EXAMINE/SELECT done\r
EOF
}

sub _esc ($) {
	my ($v) = @_;
	if (!defined($v)) {
		'NIL';
	} elsif ($v =~ /[{"\r\n%*\\\[]/) { # literal string
		'{' . length($v) . "}\r\n" . $v;
	} else { # quoted string
		qq{"$v"}
	}
}

sub addr_envelope ($$;$) {
	my ($eml, $x, $y) = @_;
	my $v = $eml->header_raw($x) //
		($y ? $eml->header_raw($y) : undef) // return 'NIL';

	my @x = $Address->parse($v) or return 'NIL';
	'(' . join('',
		map { '(' . join(' ',
				_esc($_->name), 'NIL',
				_esc($_->user), _esc($_->host)
			) . ')'
		} @x) .
	')';
}

sub eml_envelope ($) {
	my ($eml) = @_;
	'(' . join(' ',
		_esc($eml->header_raw('Date')),
		_esc($eml->header_raw('Subject')),
		addr_envelope($eml, 'From'),
		addr_envelope($eml, 'Sender', 'From'),
		addr_envelope($eml, 'Reply-To', 'From'),
		addr_envelope($eml, 'To'),
		addr_envelope($eml, 'Cc'),
		addr_envelope($eml, 'Bcc'),
		_esc($eml->header_raw('In-Reply-To')),
		_esc($eml->header_raw('Message-ID')),
	) . ')';
}

sub _esc_hash ($) {
	my ($hash) = @_;
	if ($hash && scalar keys %$hash) {
		$hash = [ %$hash ]; # flatten hash into 1-dimensional array
		'(' . join(' ', map { _esc($_) } @$hash) . ')';
	} else {
		'NIL';
	}
}

sub body_disposition ($) {
	my ($eml) = @_;
	my $cd = $eml->header_raw('Content-Disposition') or return 'NIL';
	$cd = parse_content_disposition($cd);
	my $buf = '('._esc($cd->{type});
	$buf .= ' ' . _esc_hash($cd->{attributes});
	$buf .= ')';
}

sub body_leaf ($$;$) {
	my ($eml, $structure, $hold) = @_;
	my $buf = '';
	$eml->{is_submsg} and # parent was a message/(rfc822|news|global)
		$buf .= eml_envelope($eml). ' ';
	my $ct = $eml->ct;
	$buf .= '('._esc($ct->{type}).' ';
	$buf .= _esc($ct->{subtype});
	$buf .= ' ' . _esc_hash($ct->{attributes});
	$buf .= ' ' . _esc($eml->header_raw('Content-ID'));
	$buf .= ' ' . _esc($eml->header_raw('Content-Description'));
	my $cte = $eml->header_raw('Content-Transfer-Encoding') // '7bit';
	$buf .= ' ' . _esc($cte);
	$buf .= ' ' . $eml->{imap_body_len};
	$buf .= ' '.($eml->body_raw =~ tr/\n/\n/) if lc($ct->{type}) eq 'text';

	# for message/(rfc822|global|news), $hold[0] should have envelope
	$buf .= ' ' . (@$hold ? join('', @$hold) : 'NIL') if $hold;

	if ($structure) {
		$buf .= ' '._esc($eml->header_raw('Content-MD5'));
		$buf .= ' '. body_disposition($eml);
		$buf .= ' '._esc($eml->header_raw('Content-Language'));
		$buf .= ' '._esc($eml->header_raw('Content-Location'));
	}
	$buf .= ')';
}

sub body_parent ($$$) {
	my ($eml, $structure, $hold) = @_;
	my $ct = $eml->ct;
	my $type = lc($ct->{type});
	if ($type eq 'multipart') {
		my $buf = '(';
		$buf .= @$hold ? join('', @$hold) : 'NIL';
		$buf .= ' '._esc($ct->{subtype});
		if ($structure) {
			$buf .= ' '._esc_hash($ct->{attributes});
			$buf .= ' '.body_disposition($eml);
			$buf .= ' '._esc($eml->header_raw('Content-Language'));
			$buf .= ' '._esc($eml->header_raw('Content-Location'));
		}
		$buf .= ')';
		@$hold = ($buf);
	} else { # message/(rfc822|global|news)
		@$hold = (body_leaf($eml, $structure, $hold));
	}
}

# this is gross, but we need to process the parent part AFTER
# the child parts are done
sub bodystructure_prep {
	my ($p, $q) = @_;
	my ($eml, $depth) = @$p; # ignore idx
	# set length here, as $eml->{bdy} gets deleted for message/rfc822
	$eml->{imap_body_len} = length($eml->body_raw);
	push @$q, $eml, $depth;
}

# for FETCH BODY and FETCH BODYSTRUCTURE
sub fetch_body ($;$) {
	my ($eml, $structure) = @_;
	my @q;
	$eml->each_part(\&bodystructure_prep, \@q, 0, 1);
	my $cur_depth = 0;
	my @hold;
	do {
		my ($part, $depth) = splice(@q, -2);
		my $is_mp_parent = $depth == ($cur_depth - 1);
		$cur_depth = $depth;

		if ($is_mp_parent) {
			body_parent($part, $structure, \@hold);
		} else {
			unshift @hold, body_leaf($part, $structure);
		}
	} while (@q);
	join('', @hold);
}

sub fetch_run_ops {
	my ($self, $smsg, $bref, $ops, $partial) = @_;
	my $uid = $smsg->{num};
	$self->msg_more('* '.uid2msn($self, $uid)." FETCH (UID $uid");
	my ($eml, $k);
	for (my $i = 0; $i < @$ops;) {
		$k = $ops->[$i++];
		$ops->[$i++]->($self, $k, $smsg, $bref, $eml);
	}
	partial_emit($self, $partial, $eml) if $partial;
	$self->msg_more(")\r\n");
}

sub requeue { # overrides PublicInbox::DS::requeue
	my ($self) = @_;
	if ($self->{anon}) { # AUTH=ANONYMOUS gets high priority
		$self->SUPER::requeue;
	} else { # low priority
		push(@{$self->{imapd}->{-authed_q}}, $self) == 1 and
			PublicInbox::DS::requeue($self->{imapd});
	}
}

sub fetch_blob_cb { # called by git->cat_async via ibx_async_cat
	my ($bref, $oid, $type, $size, $fetch_arg) = @_;
	my ($self, undef, $msgs, $range_info, $ops, $partial) = @$fetch_arg;
	my $ibx = $self->{ibx} or return $self->close; # client disconnected
	my $smsg = shift @$msgs or die 'BUG: no smsg';
	if (!defined($oid)) {
		# it's possible to have TOCTOU if an admin runs
		# public-inbox-(edit|purge), just move onto the next message
		warn "E: $smsg->{blob} missing in $ibx->{inboxdir}\n";
		return $self->requeue_once;
	} else {
		$smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
	}
	my $pre;
	($self->{anon} && !$self->{wbuf} && $msgs->[0]) and
		$pre = ibx_async_prefetch($ibx, $msgs->[0]->{blob},
					\&fetch_blob_cb, $fetch_arg);
	fetch_run_ops($self, $smsg, $bref, $ops, $partial);
	$pre ? $self->dflush : $self->requeue_once;
}

sub emit_rfc822 {
	my ($self, $k, undef, $bref) = @_;
	$self->msg_more(" $k {" . length($$bref)."}\r\n");
	$self->msg_more($$bref);
}

# Mail::IMAPClient::message_string cares about this by default,
# (->Ignoresizeerrors attribute).  Admins are encouraged to
# --reindex for IMAP support, anyways.
sub emit_rfc822_size {
	my ($self, $k, $smsg) = @_;
	$self->msg_more(' RFC822.SIZE ' . $smsg->{bytes});
}

sub emit_internaldate {
	my ($self, undef, $smsg) = @_;
	$self->msg_more(' INTERNALDATE "'.$smsg->internaldate.'"');
}

sub emit_flags { $_[0]->msg_more(' FLAGS ()') }

sub emit_envelope {
	my ($self, undef, undef, undef, $eml) = @_;
	$self->msg_more(' ENVELOPE '.eml_envelope($eml));
}

sub emit_rfc822_header {
	my ($self, $k, undef, undef, $eml) = @_;
	$self->msg_more(" $k {".length(${$eml->{hdr}})."}\r\n");
	$self->msg_more(${$eml->{hdr}});
}

# n.b. this is sorted to be after any emit_eml_new ops
sub emit_rfc822_text {
	my ($self, $k, undef, $bref) = @_;
	$self->msg_more(" $k {".length($$bref)."}\r\n");
	$self->msg_more($$bref);
}

sub emit_bodystructure {
	my ($self, undef, undef, undef, $eml) = @_;
	$self->msg_more(' BODYSTRUCTURE '.fetch_body($eml, 1));
}

sub emit_body {
	my ($self, undef, undef, undef, $eml) = @_;
	$self->msg_more(' BODY '.fetch_body($eml));
}

# set $eml once ($_[4] == $eml, $_[3] == $bref)
sub op_eml_new { $_[4] = PublicInbox::Eml->new($_[3]) }

# s/From / fixes old bug from import (pre-a0c07cba0e5d8b6a)
sub to_crlf_full {
	${$_[0]} =~ s/(?{hdr}) }

sub op_crlf_bdy { ${$_[4]->{bdy}} =~ s/(?{bdy} }

sub uid_clamp ($$$) {
	my ($self, $beg, $end) = @_;
	my $uid_min = $self->{uid_base} + 1;
	my $uid_end = $uid_min + UID_SLICE - 1;
	$$beg = $uid_min if $$beg < $uid_min;
	$$end = $uid_end if $$end > $uid_end;
}

sub range_step ($$) {
	my ($self, $range_csv) = @_;
	my ($beg, $end, $range);
	if ($$range_csv =~ s/\A([^,]+),//) {
		$range = $1;
	} else {
		$range = $$range_csv;
		$$range_csv = undef;
	}
	my $uid_base = $self->{uid_base};
	my $uid_end = $uid_base + UID_SLICE;
	if ($range =~ /\A([0-9]+):([0-9]+)\z/) {
		($beg, $end) = ($1 + 0, $2 + 0);
		uid_clamp($self, \$beg, \$end);
	} elsif ($range =~ /\A([0-9]+):\*\z/) {
		$beg = $1 + 0;
		$end = $self->{ibx}->over(1)->max;
		$end = $uid_end if $end > $uid_end;
		$beg = $end if $beg > $end;
		uid_clamp($self, \$beg, \$end);
	} elsif ($range =~ /\A[0-9]+\z/) {
		$beg = $end = $range + 0;
		# just let the caller do an out-of-range query if a single
		# UID is out-of-range
		++$beg if ($beg <= $uid_base || $end > $uid_end);
	} else {
		return 'BAD fetch range';
	}
	[ $beg, $end, $$range_csv ];
}

sub refill_range ($$$) {
	my ($self, $msgs, $range_info) = @_;
	my ($beg, $end, $range_csv) = @$range_info;
	if (scalar(@$msgs = @{$self->{ibx}->over(1)->query_xover($beg, $end)})){
		$range_info->[0] = $msgs->[-1]->{num} + 1;
		return;
	}
	return 'OK Fetch done' if !$range_csv;
	my $next_range = range_step($self, \$range_csv);
	return $next_range if !ref($next_range); # error
	@$range_info = @$next_range;
	undef; # keep looping
}

sub fetch_blob { # long_response
	my ($self, $tag, $msgs, $range_info, $ops, $partial) = @_;
	while (!@$msgs) { # rare
		if (my $end = refill_range($self, $msgs, $range_info)) {
			$self->write(\"$tag $end\r\n");
			return;
		}
	}
	uo2m_extend($self, $msgs->[-1]->{num});
	ibx_async_cat($self->{ibx}, $msgs->[0]->{blob},
			\&fetch_blob_cb, \@_);
}

sub fetch_smsg { # long_response
	my ($self, $tag, $msgs, $range_info, $ops) = @_;
	while (!@$msgs) { # rare
		if (my $end = refill_range($self, $msgs, $range_info)) {
			$self->write(\"$tag $end\r\n");
			return;
		}
	}
	uo2m_extend($self, $msgs->[-1]->{num});
	fetch_run_ops($self, $_, undef, $ops) for @$msgs;
	@$msgs = ();
	1; # more
}

sub refill_uids ($$$;$) {
	my ($self, $uids, $range_info, $sql) = @_;
	my ($beg, $end, $range_csv) = @$range_info;
	my $over = $self->{ibx}->over(1);
	while (1) {
		if (scalar(@$uids = @{$over->uid_range($beg, $end, $sql)})) {
			$range_info->[0] = $uids->[-1] + 1; # update $beg
			return;
		} elsif (!$range_csv) {
			return 0;
		} else {
			my $next_range = range_step($self, \$range_csv);
			return $next_range if !ref($next_range); # error
			($beg, $end, $range_csv) = @$range_info = @$next_range;
			# continue looping
		}
	}
}

sub fetch_uid { # long_response
	my ($self, $tag, $uids, $range_info, $ops) = @_;
	if (defined(my $err = refill_uids($self, $uids, $range_info))) {
		$err ||= 'OK Fetch done';
		$self->write("$tag $err\r\n");
		return;
	}
	my $adj = $self->{uid_base} + 1;
	my $uo2m = uo2m_extend($self, $uids->[-1]);
	$uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m);
	my ($i, $k);
	for (@$uids) {
		$self->msg_more("* $uo2m->[$_ - $adj] FETCH (UID $_");
		for ($i = 0; $i < @$ops;) {
			$k = $ops->[$i++];
			$ops->[$i++]->($self, $k);
		}
		$self->msg_more(")\r\n");
	}
	@$uids = ();
	1; # more
}

sub cmd_status ($$$;@) {
	my ($self, $tag, $mailbox, @items) = @_;
	return "$tag BAD no items\r\n" if !scalar(@items);
	($items[0] !~ s/\A\(//s || $items[-1] !~ s/\)\z//s) and
		return "$tag BAD invalid args\r\n";
	my ($ibx, $exists, $uidnext) = inbox_lookup($self, $mailbox);
	return "$tag NO Mailbox doesn't exist: $mailbox\r\n" if !$ibx;
	my @it;
	for my $it (@items) {
		$it = uc($it);
		push @it, $it;
		if ($it =~ /\A(?:MESSAGES|UNSEEN|RECENT)\z/) {
			push @it, $exists;
		} elsif ($it eq 'UIDNEXT') {
			push @it, $uidnext;
		} elsif ($it eq 'UIDVALIDITY') {
			push @it, $ibx->{uidvalidity};
		} else {
			return "$tag BAD invalid item\r\n";
		}
	}
	return "$tag BAD no items\r\n" if !@it;
	"* STATUS $mailbox (".join(' ', @it).")\r\n" .
	"$tag OK Status done\r\n";
}

my %patmap = ('*' => '.*', '%' => '[^\.]*');
sub cmd_list ($$$$) {
	my ($self, $tag, $refname, $wildcard) = @_;
	my $l = $self->{imapd}->{mailboxlist};
	if ($refname eq '' && $wildcard eq '') {
		# request for hierarchy delimiter
		$l = [ qq[* LIST (\\Noselect) "." ""\r\n] ];
	} elsif ($refname ne '' || $wildcard ne '*') {
		$wildcard =~ s!([^a-z0-9_])!$patmap{$1} // "\Q$1"!egi;
		$l = [ grep(/ \Q$refname\E$wildcard\r\n\z/is, @$l) ];
	}
	\(join('', @$l, "$tag OK List done\r\n"));
}

sub cmd_lsub ($$$$) {
	my (undef, $tag) = @_; # same args as cmd_list
	"$tag OK Lsub done\r\n";
}

sub eml_index_offs_i { # PublicInbox::Eml::each_part callback
	my ($p, $all) = @_;
	my ($eml, undef, $idx) = @$p;
	if ($idx && lc($eml->ct->{type}) eq 'multipart') {
		$eml->{imap_bdy} = $eml->{bdy} // \'';
	}
	$all->{$idx} = $eml; # $idx => Eml
}

# prepares an index for BODY[$SECTION_IDX] fetches
sub eml_body_idx ($$) {
	my ($eml, $section_idx) = @_;
	my $idx = $eml->{imap_all_parts} // do {
		my $all = {};
		$eml->each_part(\&eml_index_offs_i, $all, 0, 1);
		# top-level of multipart, BODY[0] not allowed (nz-number)
		delete $all->{0};
		$eml->{imap_all_parts} = $all;
	};
	$idx->{$section_idx};
}

# BODY[($SECTION_IDX)?(.$SECTION_NAME)?]<$offset.$bytes>
sub partial_body {
	my ($eml, $section_idx, $section_name) = @_;
	if (defined $section_idx) {
		$eml = eml_body_idx($eml, $section_idx) or return;
	}
	if (defined $section_name) {
		if ($section_name eq 'MIME') {
			# RFC 3501 6.4.5 states:
			#	The MIME part specifier MUST be prefixed
			#	by one or more numeric part specifiers
			return unless defined $section_idx;
			return $eml->header_obj->as_string . "\r\n";
		}
		my $bdy = $eml->{bdy} // $eml->{imap_bdy} // \'';
		$eml = PublicInbox::Eml->new($$bdy);
		if ($section_name eq 'TEXT') {
			return $eml->body_raw;
		} elsif ($section_name eq 'HEADER') {
			return $eml->header_obj->as_string . "\r\n";
		} else {
			die "BUG: bad section_name=$section_name";
		}
	}
	${$eml->{bdy} // $eml->{imap_bdy} // \''};
}

# similar to what's in PublicInbox::Eml::re_memo, but doesn't memoize
# to avoid OOM with malicious users
sub hdrs_regexp ($) {
	my ($hdrs) = @_;
	my $names = join('|', map { "\Q$_" } split(/[ \t]+/, $hdrs));
	qr/^(?:$names):[ \t]*[^\n]*\r?\n # 1st line
		# continuation lines:
		(?:[^:\n]*?[ \t]+[^\n]*\r?\n)*
		/ismx;
}

# BODY[($SECTION_IDX.)?HEADER.FIELDS.NOT ($HDRS)]<$offset.$bytes>
sub partial_hdr_not {
	my ($eml, $section_idx, $hdrs_re) = @_;
	if (defined $section_idx) {
		$eml = eml_body_idx($eml, $section_idx) or return;
	}
	my $str = $eml->header_obj->as_string;
	$str =~ s/$hdrs_re//g;
	$str =~ s/(?
sub partial_hdr_get {
	my ($eml, $section_idx, $hdrs_re) = @_;
	if (defined $section_idx) {
		$eml = eml_body_idx($eml, $section_idx) or return;
	}
	my $str = $eml->header_obj->as_string;
	$str = join('', ($str =~ m/($hdrs_re)/g));
	$str =~ s/(?= 0) {
		my $next = shift @$want or return;
		$att .= ' ' . uc($next);
	}
	if ($att =~ /\ABODY\[([0-9]+(?:\.[0-9]+)*)? # 1 - section_idx
			(?:\.(HEADER|MIME|TEXT))? # 2 - section_name
			\](?:<([0-9]+)(?:\.([0-9]+))?>)?\z/sx) { # 3, 4
		$partial->{$att} = [ \&partial_body, $1, $2, $3, $4 ];
		$$need |= CRLF_BREF|EML_HDR|EML_BDY;
	} elsif ($att =~ /\ABODY\[(?:([0-9]+(?:\.[0-9]+)*)\.)? # 1 - section_idx
				(?:HEADER\.FIELDS(\.NOT)?)\x20 # 2
				\(([A-Z0-9\-\x20]+)\) # 3 - hdrs
			\](?:<([0-9]+)(?:\.([0-9]+))?>)?\z/sx) { # 4 5
		my $tmp = $partial->{$att} = [ $2 ? \&partial_hdr_not
						: \&partial_hdr_get,
						$1, undef, $4, $5 ];
		$tmp->[2] = hdrs_regexp($3);

		# don't emit CRLF_HDR instruction, here, partial_hdr_*
		# will do CRLF conversion with only the extracted result
		# and not waste time converting lines we don't care about.
		$$need |= EML_HDR;
	} else {
		undef;
	}
}

sub partial_emit ($$$) {
	my ($self, $partial, $eml) = @_;
	for (@$partial) {
		my ($k, $cb, @args) = @$_;
		my ($offset, $len) = splice(@args, -2);
		# $cb is partial_body|partial_hdr_get|partial_hdr_not
		my $str = $cb->($eml, @args) // '';
		if (defined $offset) {
			if (defined $len) {
				$str = substr($str, $offset, $len);
				$k =~ s/\.$len>\z/>/ or warn
"BUG: unable to remove `.$len>' from `$k'";
			} else {
				$str = substr($str, $offset);
				$len = length($str);
			}
		} else {
			$len = length($str);
		}
		$self->msg_more(" $k {$len}\r\n");
		$self->msg_more($str);
	}
}

sub fetch_compile ($) {
	my ($want) = @_;
	if ($want->[0] =~ s/\A\(//s) {
		$want->[-1] =~ s/\)\z//s or return 'BAD no rparen';
	}
	my (%partial, %seen, @op);
	my $need = 0;
	while (defined(my $att = shift @$want)) {
		$att = uc($att);
		next if $att eq 'UID'; # always returned
		$att =~ s/\ABODY\.PEEK\[/BODY\[/; # we're read-only
		my $x = $FETCH_ATT{$att};
		if ($x) {
			while (my ($k, $fl_cb) = each %$x) {
				next if $seen{$k}++;
				$need |= $fl_cb->[0];
				push @op, [ @$fl_cb, $k ];
			}
		} elsif (!partial_prepare(\$need, \%partial, $want, $att)) {
			return "BAD param: $att";
		}
	}
	my @r;

	# stabilize partial order for consistency and ease-of-debugging:
	if (scalar keys %partial) {
		$need |= NEED_BLOB;
		@{$r[2]} = map { [ $_, @{$partial{$_}} ] } sort keys %partial;
	}

	push @op, $OP_EML_NEW if ($need & (EML_HDR|EML_BDY));

	# do we need CRLF conversion?
	if ($need & CRLF_BREF) {
		push @op, $OP_CRLF_BREF;
	} elsif (my $crlf = ($need & (CRLF_HDR|CRLF_BDY))) {
		if ($crlf == (CRLF_HDR|CRLF_BDY)) {
			push @op, $OP_CRLF_BREF;
		} elsif ($need & CRLF_HDR) {
			push @op, $OP_CRLF_HDR;
		} else {
			push @op, $OP_CRLF_BDY;
		}
	}

	$r[0] = $need & NEED_BLOB ? \&fetch_blob :
		($need & NEED_SMSG ? \&fetch_smsg : \&fetch_uid);

	# r[1] = [ $key1, $cb1, $key2, $cb2, ... ]
	use sort 'stable'; # makes output more consistent
	@{$r[1]} = map { ($_->[2], $_->[1]) } sort { $a->[0] <=> $b->[0] } @op;
	@r;
}

sub cmd_uid_fetch ($$$$;@) {
	my ($self, $tag, $range_csv, @want) = @_;
	my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
	my ($cb, $ops, $partial) = fetch_compile(\@want);
	return "$tag $cb\r\n" unless $ops;

	# cb is one of fetch_blob, fetch_smsg, fetch_uid
	$range_csv = 'bad' if $range_csv !~ $valid_range;
	my $range_info = range_step($self, \$range_csv);
	return "$tag $range_info\r\n" if !ref($range_info);
	uo2m_hibernate($self) if $cb == \&fetch_blob; # slow, save RAM
	$self->long_response($cb, $tag, [], $range_info, $ops, $partial);
}

sub cmd_fetch ($$$$;@) {
	my ($self, $tag, $range_csv, @want) = @_;
	my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
	my ($cb, $ops, $partial) = fetch_compile(\@want);
	return "$tag $cb\r\n" unless $ops;

	# cb is one of fetch_blob, fetch_smsg, fetch_uid
	$range_csv = 'bad' if $range_csv !~ $valid_range;
	msn_to_uid_range(msn2uid($self), $range_csv);
	my $range_info = range_step($self, \$range_csv);
	return "$tag $range_info\r\n" if !ref($range_info);
	uo2m_hibernate($self) if $cb == \&fetch_blob; # slow, save RAM
	$self->long_response($cb, $tag, [], $range_info, $ops, $partial);
}

sub msn_convert ($$) {
	my ($self, $uids) = @_;
	my $adj = $self->{uid_base} + 1;
	my $uo2m = uo2m_extend($self, $uids->[-1]);
	$uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m);
	$_ = $uo2m->[$_ - $adj] for @$uids;
}

sub search_uid_range { # long_response
	my ($self, $tag, $sql, $range_info, $want_msn) = @_;
	my $uids = [];
	if (defined(my $err = refill_uids($self, $uids, $range_info, $sql))) {
		$err ||= 'OK Search done';
		$self->write("\r\n$tag $err\r\n");
		return;
	}
	msn_convert($self, $uids) if $want_msn;
	$self->msg_more(join(' ', '', @$uids));
	1; # more
}

sub parse_imap_query ($$) {
	my ($self, $query) = @_;
	my $q = PublicInbox::IMAPsearchqp::parse($self, $query);
	if (ref($q)) {
		my $max = $self->{ibx}->over(1)->max;
		my $beg = 1;
		uid_clamp($self, \$beg, \$max);
		$q->{range_info} = [ $beg, $max ];
	}
	$q;
}

sub search_common {
	my ($self, $tag, $query, $want_msn) = @_;
	my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
	my $q = parse_imap_query($self, $query);
	return "$tag $q\r\n" if !ref($q);
	my ($sql, $range_info) = delete @$q{qw(sql range_info)};
	if (!scalar(keys %$q)) { # overview.sqlite3
		$self->msg_more('* SEARCH');
		$self->long_response(\&search_uid_range,
				$tag, $sql, $range_info, $want_msn);
	} elsif ($q = $q->{xap}) {
		my $srch = $self->{ibx}->isrch or
			return "$tag BAD search not available for mailbox\r\n";
		my $opt = {
			relevance => -1,
			limit => UID_SLICE,
			uid_range => $range_info
		};
		my $mset = $srch->mset($q, $opt);
		my $uids = $srch->mset_to_artnums($mset, $opt);
		msn_convert($self, $uids) if scalar(@$uids) && $want_msn;
		"* SEARCH @$uids\r\n$tag OK Search done\r\n";
	} else {
		"$tag BAD Error\r\n";
	}
}

sub cmd_uid_search ($$$) {
	my ($self, $tag, $query) = @_;
	search_common($self, $tag, $query);
}

sub cmd_search ($$$;) {
	my ($self, $tag, $query) = @_;
	search_common($self, $tag, $query, 1);
}

# returns 1 if we can continue, 0 if not due to buffered writes or disconnect
sub process_line ($$) {
	my ($self, $l) = @_;

	# TODO: IMAP allows literals for big requests to upload messages
	# (which we don't support) but maybe some big search queries use it.
	# RFC 3501 9 (2) doesn't permit TAB or multiple SP
	my ($tag, $req, @args) = parse_line('[ \t]+', 0, $l);
	pop(@args) if (@args && !defined($args[-1]));
	if (@args && uc($req) eq 'UID') {
		$req .= "_".(shift @args);
	}
	my $res = eval {
		if (defined(my $idle_tag = $self->{-idle_tag})) {
			(uc($tag // '') eq 'DONE' && !defined($req)) ?
				idle_done($self, $tag) :
				"$idle_tag BAD expected DONE\r\n";
		} elsif (my $cmd = $self->can('cmd_'.lc($req // ''))) {
			if ($cmd == \&cmd_uid_search || $cmd == \&cmd_search) {
				# preserve user-supplied quotes for search
				(undef, @args) = split(/ search /i, $l, 2);
			}
			$cmd->($self, $tag, @args);
		} else { # this is weird
			auth_challenge_ok($self) //
					($tag // '*') .
					' BAD Error in IMAP command '.
					($req // '(???)').
					": Unknown command\r\n";
		}
	};
	my $err = $@;
	if ($err && $self->{sock}) {
		$l =~ s/\r?\n//s;
		warn("error from: $l ($err)\n");
		$tag //= '*';
		$res = \"$tag BAD program fault - command not performed\r\n";
	}
	defined($res) ? $self->write($res) : 0;
}

sub out ($$;@) {
	my ($self, $fmt, @args) = @_;
	printf { $self->{imapd}->{out} } $fmt."\n", @args;
}

# callback used by PublicInbox::DS for any (e)poll (in/out/hup/err)
sub event_step {
	my ($self) = @_;
	local $SIG{__WARN__} = $self->{imapd}->{warn_cb};
	return unless $self->flush_write && $self->{sock} && !$self->{long_cb};

	# only read more requests if we've drained the write buffer,
	# otherwise we can be buffering infinitely w/o backpressure

	my $rbuf = $self->{rbuf} // \(my $x = '');
	my $line = index($$rbuf, "\n");
	while ($line < 0) {
		if (length($$rbuf) >= LINE_MAX) {
			$self->write(\"\* BAD request too long\r\n");
			return $self->close;
		}
		$self->do_read($rbuf, LINE_MAX, length($$rbuf)) or
				return uo2m_hibernate($self);
		$line = index($$rbuf, "\n");
	}
	$line = substr($$rbuf, 0, $line + 1, '');
	$line =~ s/\r?\n\z//s;
	return $self->close if $line =~ /[[:cntrl:]]/s;
	my $t0 = now();
	my $fd = fileno($self->{sock});
	my $r = eval { process_line($self, $line) };
	my $pending = $self->{wbuf} ? ' pending' : '';
	out($self, "[$fd] %s - %0.6f$pending - $r", $line, now() - $t0);

	return $self->close if $r < 0;
	$self->rbuf_idle($rbuf);

	# maybe there's more pipelined data, or we'll have
	# to register it for socket-readiness notifications
	$self->requeue unless $pending;
}

# RFC 4978
sub cmd_compress ($$$) {
	my ($self, $tag, $alg) = @_;
	return "$tag BAD DEFLATE only\r\n" if uc($alg) ne "DEFLATE";
	return "$tag BAD COMPRESS active\r\n" if $self->compressed;

	# CRIME made TLS compression obsolete
	# return "$tag NO [COMPRESSIONACTIVE]\r\n" if $self->tls_compressed;

	PublicInbox::IMAPdeflate->enable($self) or return
				\"$tag BAD failed to activate compression\r\n";
	PublicInbox::DS::write($self, \"$tag OK DEFLATE active\r\n");
	$self->requeue;
	undef
}

sub cmd_starttls ($$) {
	my ($self, $tag) = @_;
	(($self->{sock} // return)->can('stop_SSL') || $self->compressed) and
		return "$tag BAD TLS or compression already enabled\r\n";
	$self->{imapd}->{ssl_ctx_opt} or
		return "$tag BAD can not initiate TLS negotiation\r\n";
	$self->write(\"$tag OK begin TLS negotiation now\r\n");
	PublicInbox::TLS::start($self->{sock}, $self->{imapd});
	$self->requeue if PublicInbox::DS::accept_tls_step($self);
	undef;
}

sub busy { # for graceful shutdown in PublicInbox::Daemon:
	my ($self) = @_;
	if (defined($self->{-idle_tag})) {
		$self->write(\"* BYE server shutting down\r\n");
		return; # not busy anymore
	}
	defined($self->{rbuf}) || defined($self->{wbuf}) ||
		!$self->write(\"* BYE server shutting down\r\n");
}

sub close {
	my ($self) = @_;
	if (my $ibx = delete $self->{ibx}) {
		stop_idle($self, $ibx);
	}
	$self->SUPER::close; # PublicInbox::DS::close
}

# we're read-only, so SELECT and EXAMINE do the same thing
no warnings 'once';
*cmd_select = \&cmd_examine;

package PublicInbox::IMAP_preauth;
our @ISA = qw(PublicInbox::IMAP);

sub logged_in { 0 }

package PublicInbox::IMAPdeflate;
use PublicInbox::DSdeflate;
our @ISA = qw(PublicInbox::DSdeflate PublicInbox::IMAP);

1;
public-inbox-1.9.0/lib/PublicInbox/IMAPClient.pm000066400000000000000000000074461430031475700213420ustar00rootroot00000000000000# This library is free software; you can redistribute it and/or modify it
# under the same terms as Perl itself, either Perl version 5.8.0 or, at
# your option, any later version of Perl 5 you may have available.
#
# The license for this file differs from the rest of public-inbox.
#
# Workaround some bugs in upstream Mail::IMAPClient <= 3.42 when
# compression is enabled:
# - reference cycle: https://rt.cpan.org/Ticket/Display.html?id=132654
# - read starvation: https://rt.cpan.org/Ticket/Display.html?id=132720
package PublicInbox::IMAPClient;
use strict;
use parent 'Mail::IMAPClient';
unless (eval('use Mail::IMAPClient 3.43')) {
require Errno;
no warnings 'once';

# RFC4978 COMPRESS
*compress = sub {
    my ($self) = @_;

    # BUG? strict check on capability commented out for now...
    #my $can = $self->has_capability("COMPRESS")
    #return undef unless $can and $can eq "DEFLATE";

    $self->_imap_command("COMPRESS DEFLATE") or return undef;

    my $zcl = $self->_load_module("Compress-Zlib") or return undef;

    # give caller control of args if desired
    $self->Compress(
        [
            -WindowBits => -$zcl->MAX_WBITS(),
            -Level      => $zcl->Z_BEST_SPEED()
        ]
    ) unless ( $self->Compress and ref( $self->Compress ) eq "ARRAY" );

    my ( $rc, $do, $io );

    ( $do, $rc ) = Compress::Zlib::deflateInit( @{ $self->Compress } );
    unless ( $rc == $zcl->Z_OK ) {
        $self->LastError("deflateInit failed (rc=$rc)");
        return undef;
    }

    ( $io, $rc ) =
      Compress::Zlib::inflateInit( -WindowBits => -$zcl->MAX_WBITS() );
    unless ( $rc == $zcl->Z_OK ) {
        $self->LastError("inflateInit failed (rc=$rc)");
        return undef;
    }

    $self->{Prewritemethod} = sub {
        my ( $self, $string ) = @_;

        my ( $rc, $out1, $out2 );
        ( $out1, $rc ) = $do->deflate($string);
        ( $out2, $rc ) = $do->flush( $zcl->Z_PARTIAL_FLUSH() )
          unless ( $rc != $zcl->Z_OK );

        unless ( $rc == $zcl->Z_OK ) {
            $self->LastError("deflate/flush failed (rc=$rc)");
            return undef;
        }

        return $out1 . $out2;
    };

    # need to retain some state for Readmoremethod/Readmethod calls
    my ( $Zbuf, $Ibuf ) = ( "", "" );

    $self->{Readmoremethod} = sub {
        my $self = shift;
        return 1 if ( length($Zbuf) || length($Ibuf) );
        $self->__read_more(@_);
    };

    $self->{Readmethod} = sub {
        my ( $self, $fh, $buf, $len, $off ) = @_;

        # get more data, but empty $Ibuf first if any data is left
        my ( $lz, $li ) = ( length $Zbuf, length $Ibuf );
        if ( $lz || !$li ) {
            my $readlen = $self->Buffer || 4096;
            my $ret = sysread( $fh, $Zbuf, $readlen, length $Zbuf );
            $lz = length $Zbuf;
            return $ret if ( !$ret && !$lz );    # $ret is undef or 0
        }

        # accumulate inflated data in $Ibuf
        if ($lz) {
            my ( $tbuf, $rc ) = $io->inflate( \$Zbuf );
            unless ( $rc == $zcl->Z_OK ) {
                $self->LastError("inflate failed (rc=$rc)");
                return undef;
            }
            $Ibuf .= $tbuf;
            $li = length $Ibuf;
        }

        if ( !$li ) {
            # note: faking EAGAIN here is only safe with level-triggered
            # I/O readiness notifications (select, poll).  Refactoring
            # callers will be needed in the unlikely case somebody wants
            # to use edge-triggered notifications (EV_CLEAR, EPOLLET).
            $! = Errno::EAGAIN();
            return undef;
        }

        # pull desired length of data from $Ibuf
        my $tbuf = substr( $Ibuf, 0, $len );
        substr( $Ibuf, 0, $len ) = "";
        substr( $$buf, $off ) = $tbuf;

        return length $tbuf;
    };

    return $self;
};
} # $Mail::IMAPClient::VERSION < 3.43

1;
public-inbox-1.9.0/lib/PublicInbox/IMAPD.pm000066400000000000000000000056501430031475700203020ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# represents an IMAPD, see script/public-inbox-imapd for how it is used
package PublicInbox::IMAPD;
use strict;
use v5.10.1;
use PublicInbox::Config;
use PublicInbox::InboxIdle;
use PublicInbox::IMAP;
use PublicInbox::DummyInbox;
my $dummy = bless { uidvalidity => 0 }, 'PublicInbox::DummyInbox';

sub new {
	my ($class) = @_;
	bless {
		# mailboxes => {},
		err => \*STDERR,
		out => \*STDOUT,
		# ssl_ctx_opt => { SSL_cert_file => ..., SSL_key_file => ... }
		# pi_cfg => PublicInbox::Config
		# idler => PublicInbox::InboxIdle
	}, $class;
}

sub _refresh_ibx { # pi_cfg->each_inbox cb
	my ($ibx, $imapd, $cache, $dummies) = @_;
	my $ngname = $ibx->{newsgroup} // return;

	# We require lower-case since IMAP mailbox names are
	# case-insensitive (but -nntpd matches INN in being
	# case-sensitive)
	if ($ngname =~ m![^a-z0-9/_\.\-\~\@\+\=:]! ||
			# don't confuse with 50K slices
			$ngname =~ /\.[0-9]+\z/) {
		warn "mailbox name invalid: newsgroup=`$ngname'\n";
		return;
	}
	my $ce = $cache->{$ngname};
	%$ibx = (%$ibx, %$ce) if $ce;
	# only valid if msgmap and over works:
	if (defined($ibx->uidvalidity)) {
		# fill ->{mailboxes}:
		PublicInbox::IMAP::ensure_slices_exist($imapd, $ibx);
		# preload to avoid fragmentation:
		$ibx->description;
		# ensure dummies are selectable:
		do {
			$dummies->{$ngname} = $dummy;
		} while ($ngname =~ s/\.[^\.]+\z//);
	}
	delete @$ibx{qw(mm over)};
}

sub refresh_groups {
	my ($self, $sig) = @_;
	my $pi_cfg = PublicInbox::Config->new;
	$self->{mailboxes} = $pi_cfg->{-imap_mailboxes} // do {
		my $mailboxes = $self->{mailboxes} = {};
		my $cache = eval { $pi_cfg->ALL->misc->nntpd_cache_load } // {};
		my $dummies = {};
		$pi_cfg->each_inbox(\&_refresh_ibx, $self, $cache, $dummies);
		%$mailboxes = (%$dummies, %$mailboxes);
		@{$pi_cfg->{-imap_mailboxlist}} = map { $_->[2] }
			sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] }
			map {
				# capitalize "INBOX" for user-familiarity
				my $u = $_;
				$u =~ s/\Ainbox(\.|\z)/INBOX$1/i;
				if ($mailboxes->{$_} == $dummy) {
					[ $u, -1,
					  qq[* LIST (\\HasChildren) "." $u\r\n]]
				} else {
					$u =~ /\A(.+)\.([0-9]+)\z/ or die
"BUG: `$u' has no slice digit(s)";
					[ $1, $2 + 0, '* LIST '.
					  qq[(\\HasNoChildren) "." $u\r\n] ]
				}
			} keys %$mailboxes;
		$pi_cfg->{-imap_mailboxes} = $mailboxes;
	};
	$self->{mailboxlist} = $pi_cfg->{-imap_mailboxlist} //
			die 'BUG: no mailboxlist';
	$self->{pi_cfg} = $pi_cfg;
	if (my $idler = $self->{idler}) {
		$idler->refresh($pi_cfg);
	}
}

sub idler_start {
	$_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_cfg});
}

sub event_step { # called vai requeue for low-priority IMAP clients
	my ($self) = @_;
	my $imap = shift(@{$self->{-authed_q}}) // return;
	PublicInbox::DS::requeue($self) if scalar(@{$self->{-authed_q}});
	$imap->event_step; # PublicInbox::IMAP::event_step
}

1;
public-inbox-1.9.0/lib/PublicInbox/IMAPTracker.pm000066400000000000000000000043561430031475700215140ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 
package PublicInbox::IMAPTracker;
use strict;
use parent qw(PublicInbox::Lock);
use DBI;
use DBD::SQLite;
use PublicInbox::Config;

sub create_tables ($) {
	my ($dbh) = @_;

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS imap_last (
	url VARCHAR PRIMARY KEY NOT NULL,
	uid_validity INTEGER NOT NULL,
	uid INTEGER NOT NULL,
	UNIQUE (url)
)

}

sub dbh_new ($) {
	my ($dbname) = @_;
	my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", '', '', {
		AutoCommit => 1,
		RaiseError => 1,
		PrintError => 0,
		sqlite_use_immediate_transaction => 1,
	});
	$dbh->{sqlite_unicode} = 1;

	# TRUNCATE reduces I/O compared to the default (DELETE).
	# Allow and preserve user-overridden WAL, but don't force it.
	my $jm = $dbh->selectrow_array('PRAGMA journal_mode');
	$dbh->do('PRAGMA journal_mode = TRUNCATE') if $jm ne 'wal';

	create_tables($dbh);
	$dbh;
}

sub get_last ($) {
	my ($self) = @_;
	my $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
SELECT uid_validity, uid FROM imap_last WHERE url = ?

	$sth->execute($self->{url});
	$sth->fetchrow_array;
}

sub update_last ($$$) {
	my ($self, $validity, $last_uid) = @_;
	return unless defined $last_uid;
	my $sth = $self->{dbh}->prepare_cached(<<'');
INSERT OR REPLACE INTO imap_last (url, uid_validity, uid)
VALUES (?, ?, ?)

	$self->lock_acquire;
	my $rv = $sth->execute($self->{url}, $validity, $last_uid);
	$self->lock_release;
	$rv;
}

sub new {
	my ($class, $url) = @_;

	# original name for compatibility with old setups:
	my $dbname = PublicInbox::Config->config_dir() . '/imap.sqlite3';

	# use the new XDG-compliant name for new setups:
	if (!-f $dbname) {
		$dbname = ($ENV{XDG_DATA_HOME} //
			(($ENV{HOME} // '/nonexistent').'/.local/share')) .
			'/public-inbox/imap.sqlite3';
	}
	if (!-f $dbname) {
		require File::Path;
		require PublicInbox::Syscall;
		my ($dir) = ($dbname =~ m!(.*?/)[^/]+\z!);
		File::Path::mkpath($dir);
		PublicInbox::Syscall::nodatacow_dir($dir);
		open my $fh, '+>>', $dbname or die "failed to open $dbname: $!";
	}
	my $self = bless { lock_path => "$dbname.lock", url => $url }, $class;
	$self->lock_acquire;
	$self->{dbh} = dbh_new($dbname);
	$self->lock_release;
	$self;
}

1;
public-inbox-1.9.0/lib/PublicInbox/IMAPsearchqp.pm000066400000000000000000000203071430031475700217210ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 
# IMAP search query parser.  cf RFC 3501

# We currently compile Xapian queries to a string which is fed
# to Xapian's query parser.  However, we may use Xapian-provided
# Query object API to build an optree, instead.
package PublicInbox::IMAPsearchqp;
use strict;
use Parse::RecDescent;
use Time::Local qw(timegm);
use POSIX qw(strftime);
our $q = bless {}, __PACKAGE__; # singleton, reachable in generated P::RD
my @MoY = qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC);
my %MM = map {; $MoY[$_-1] => sprintf('%02u', $_) } (1..12);

# IMAP to Xapian header search key mapping
my %IH2X = (
	SUBJECT => 's:',
	BODY => 'b:',
	# TEXT => undef, # => everything
	FROM => 'f:',
	TO => 't:',
	CC => 'c:',
	# BCC => 'bcc:', # TODO

	# IMAP allows searching arbitrary headers via
	# "HEADER $field_name $string" which gets silly expensive.
	# We only allow the headers we already index.
	'MESSAGE-ID' => 'm:',
	'LIST-ID' => 'l:',
	# KEYWORD # TODO ? dfpre,dfpost,...
);

sub uid_set_xap ($$) {
	my ($self, $seq_set) = @_;
	my @u;
	do {
		my $u = $self->{imap}->range_step(\$seq_set);
		die $u unless ref($u); # break out of the parser on error
		push @u, "uid:$u->[0]..$u->[1]";
	} while ($seq_set);
	push(@{$q->{xap}}, @u > 1 ? '('.join(' OR ', @u).')' : $u[0]);
}

sub xap_only ($;$) {
	my ($self, $query) = @_;
	delete $self->{sql}; # query too complex for over.sqlite3
	push @{$self->{xap}}, $query if defined($query);

	# looks like we can't use SQLite-only, convert SQLite UID
	# ranges to Xapian:
	if (my $uid = delete $self->{uid}) {
		uid_set_xap($self, $_) for @$uid;
	}
	1;
}

sub ih2x {
	my ($self, $field_name, $s) = @_; # $self == $q
	$s =~ /\A"(.*?)"\z/s and $s = $1;

	# AFAIK Xapian can't handle [*"] in probabilistic terms,
	# and it relies on lowercase
	my $xk = defined($field_name) ? ($IH2X{$field_name} // '') : '';
	xap_only($self,
		lc(join(' ', map { qq[$xk"$_"] } split(/[\*"\s]+/, $s))));
	1;
}

sub subq_enter {
	xap_only($q);
	my $old = delete($q->{xap}) // [];
	my $nr = push @{$q->{stack}}, $old;
	die 'BAD deep recursion' if $nr > 10;
	$q->{xap} = [];
}

sub subq_leave {
	my $child = delete $q->{xap};
	my $parent = $q->{xap} = pop @{$q->{stack}};
	push(@$parent, @$child > 1 ? '('.join(' ', @$child).')' : $child->[0]);
	1;
}

sub yyyymmdd ($) {
	my ($item) = @_;
	my ($dd, $mon, $yyyy) = split(/-/, $item->{date}, 3);
	my $mm = $MM{$mon} // die "BAD month: $mon";
	wantarray ? ($yyyy, $mm, sprintf('%02u', $dd))
		: timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
}

sub SENTSINCE {
	my ($self, $item) = @_;
	my ($yyyy, $mm, $dd) = yyyymmdd($item);
	push @{$self->{xap}}, "d:$yyyy$mm$dd..";
	my $sql = $self->{sql} or return 1;
	my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
	$$sql .= " AND ds >= $ds";
}

sub SENTON {
	my ($self, $item) = @_;
	my ($yyyy, $mm, $dd) = yyyymmdd($item);
	my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
	my $end = $ds + 86399; # no leap day
	my $dt_end = strftime('%Y%m%d%H%M%S', gmtime($end));
	push @{$self->{xap}}, "dt:$yyyy$mm$dd"."000000..$dt_end";
	my $sql = $self->{sql} or return 1;
	$$sql .= " AND ds >= $ds AND ds <= $end";
}

sub SENTBEFORE {
	my ($self, $item) = @_;
	my ($yyyy, $mm, $dd) = yyyymmdd($item);
	push @{$self->{xap}}, "d:..$yyyy$mm$dd";
	my $sql = $self->{sql} or return 1;
	my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
	$$sql .= " AND ds <= $ds";
}

sub ON {
	my ($self, $item) = @_;
	my $ts = yyyymmdd($item);
	my $end = $ts + 86399; # no leap day
	push @{$self->{xap}}, "rt:$ts..$end";
	my $sql = $self->{sql} or return 1;
	$$sql .= " AND ts >= $ts AND ts <= $end";
}

sub BEFORE {
	my ($self, $item) = @_;
	my $ts = yyyymmdd($item);
	push @{$self->{xap}}, "rt:..$ts";
	my $sql = $self->{sql} or return 1;
	$$sql .= " AND ts <= $ts";
}

sub SINCE {
	my ($self, $item) = @_;
	my $ts = yyyymmdd($item);
	push @{$self->{xap}}, "rt:$ts..";
	my $sql = $self->{sql} or return 1;
	$$sql .= " AND ts >= $ts";
}

sub uid_set ($$) {
	my ($self, $seq_set) = @_;
	if ($self->{sql}) {
		push @{$q->{uid}}, $seq_set;
	} else { # we've gone Xapian-only
		uid_set_xap($self, $seq_set);
	}
	1;
}

sub msn_set {
	my ($self, $seq_set) = @_;
	PublicInbox::IMAP::msn_to_uid_range(
		$self->{msn2uid} //= $self->{imap}->msn2uid, $seq_set);
	uid_set($self, $seq_set);
}

# things that should not match
sub impossible {
	my ($self) = @_;
	push @{$self->{xap}}, 'z:..0';
	my $sql = $self->{sql} or return 1;
	$$sql .= ' AND num < 0';
}

my $prd = Parse::RecDescent->new(<<'EOG');

{ my $q = $PublicInbox::IMAPsearchqp::q; }
search_key : CHARSET(?) search_key1(s) { $return = $q }

# n.b. we silently ignore most per-message flags right now;
# they're here for now to not dump parser errors.
search_key1 : "ALL" | "ANSWERED" | "RECENT" | "UNSEEN" | "SEEN" | "NEW"
	| "UNANSWERED" | "UNDELETED" | "UNDRAFT" | "UNFLAGGED"
	| DELETED | DRAFT | FLAGGED | OLD
	| OR_search_keys
	| NOT_search_key
	| LARGER_number
	| SMALLER_number
	| SENTSINCE_date
	| SENTON_date
	| SENTBEFORE_date
	| SINCE_date
	| ON_date
	| BEFORE_date
	| FROM_string
	| HEADER_field_name_string
	| TO_string
	| CC_string
	| BCC_string
	| SUBJECT_string
	| BODY_string
	| TEXT_string
	| UID_set
	| MSN_set
	| sub_query
	| 

charset : /\S+/
CHARSET : 'CHARSET' charset
{ $item{charset} =~ /\A(?:UTF-8|US-ASCII)\z/ ? 1 : die('NO [BADCHARSET]'); }

SENTSINCE_date : 'SENTSINCE' date { $q->SENTSINCE(\%item) }
SENTON_date : 'SENTON' date { $q->SENTON(\%item) }
SENTBEFORE_date : 'SENTBEFORE' date { $q->SENTBEFORE(\%item) }

SINCE_date : 'SINCE' date { $q->SINCE(\%item) }
ON_date : 'ON' date { $q->ON(\%item) }
BEFORE_date : 'BEFORE' date { $q->BEFORE(\%item) }

MSN_set : sequence_set { $q->msn_set($item{sequence_set}) }
UID_set : "UID" sequence_set { $q->uid_set($item{sequence_set}) }
LARGER_number : "LARGER" number { $q->xap_only("z:$item{number}..") }
SMALLER_number : "SMALLER" number { $q->xap_only("z:..$item{number}") }

DELETED : "DELETED" { $q->impossible }
OLD : "OLD" { $q->impossible }
FLAGGED : "FLAGGED" { $q->impossible }
DRAFT : "DRAFT" { $q->impossible }

# pass "NOT" through XXX is this right?
OP_NOT : "NOT" { $q->xap_only('NOT') }
NOT_search_key : OP_NOT search_key1
OP_OR : "OR" {
	$q->xap_only('OP_OR');
	my $cur = delete $q->{xap};
	push @{$q->{stack}}, $cur;
	$q->{xap} = [];
}
search_key_a : search_key1
{
	my $ka = delete $q->{xap};
	$q->{xap} = [];
	push @{$q->{stack}}, $ka;
}
OR_search_keys : OP_OR search_key_a search_key1
{
	my $kb = delete $q->{xap};
	my $ka = pop @{$q->{stack}};
	my $xap = $q->{xap} = pop @{$q->{stack}};
	my $op = pop @$xap;
	$op eq 'OP_OR' or die "BAD expected OR: $op";
	$ka = @$ka > 1 ? '('.join(' ', @$ka).')' : $ka->[0];
	$kb = @$kb > 1 ? '('.join(' ', @$kb).')' : $kb->[0];
	push @$xap, "($ka OR $kb)";
}
HEADER_field_name_string : "HEADER" field_name string
{
	$q->ih2x($item{field_name}, $item{string});
}
FROM_string : "FROM" string { $q->ih2x('FROM', $item{string}) }
TO_string : "TO" string { $q->ih2x('TO', $item{string}) }
CC_string : "CC" string { $q->ih2x('CC', $item{string}) }
BCC_string : "BCC" string { $q->ih2x('BCC', $item{string}) }
SUBJECT_string : "SUBJECT" string { $q->ih2x('SUBJECT', $item{string}) }
BODY_string : "BODY" string { $q->ih2x('BODY', $item{string}) }
TEXT_string : "TEXT" string { $q->ih2x(undef, $item{string}) }
op_subq_enter : '(' { $q->subq_enter }
sub_query : op_subq_enter search_key1(s) ')' { $q->subq_leave }

field_name : /[\x21-\x39\x3b-\x7e]+/
string : quoted | literal
literal : /[^"\(\) \t]+/ # bogus, I know
quoted : /"[^"]*"/
number : /[0-9]+/
date : /[0123]?[0-9]-[A-Z]{3}-[0-9]{4,}/
sequence_set : /\A[0-9][0-9,:]*[0-9\*]?\z/
EOG

sub parse {
	my ($imap, $query) = @_;
	my $sql = '';
	%$q = (sql => \$sql, imap => $imap); # imap = PublicInbox::IMAP obj
	# $::RD_TRACE = 1;
	my $res = eval { $prd->search_key(uc($query)) };
	return $@ if $@ && $@ =~ /\A(?:BAD|NO) /;
	return 'BAD unexpected result' if !$res || $res != $q;
	if (exists $q->{sql}) {
		delete $q->{xap};
		if (my $uid = delete $q->{uid}) {
			my @u;
			for my $uid_set (@$uid) {
				my $u = $q->{imap}->range_step(\$uid_set);
				return $u if !ref($u);
				push @u, "num >= $u->[0] AND num <= $u->[1]";
			}
			$sql .= ' AND ('.join(' OR ', @u).')';
		}
	} else {
		$q->{xap} = join(' ', @{$q->{xap}});
	}
	delete @$q{qw(imap msn2uid)};
	$q;
}

1
public-inbox-1.9.0/lib/PublicInbox/IPC.pm000066400000000000000000000340041430031475700200560ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# base class for remote IPC calls and workqueues, requires Storable or Sereal
# - ipc_do and ipc_worker_* is for a single worker/producer and uses pipes
# - wq_io_do and wq_worker* is for a single producer and multiple workers,
#   using SOCK_SEQPACKET for work distribution
# use ipc_do when you need work done on a certain process
# use wq_io_do when your work can be done on any idle worker
package PublicInbox::IPC;
use strict;
use v5.10.1;
use parent qw(Exporter);
use Carp qw(croak);
use PublicInbox::DS qw(dwaitpid);
use PublicInbox::Spawn;
use PublicInbox::OnDestroy;
use PublicInbox::WQWorker;
use Socket qw(AF_UNIX MSG_EOR SOCK_STREAM);
my $MY_MAX_ARG_STRLEN = 4096 * 33; # extra 4K for serialization
my $SEQPACKET = eval { Socket::SOCK_SEQPACKET() }; # portable enough?
our @EXPORT_OK = qw(ipc_freeze ipc_thaw);
my ($enc, $dec);
# ->imports at BEGIN turns sereal_*_with_object into custom ops on 5.14+
# and eliminate method call overhead
BEGIN {
	eval {
		require Sereal::Encoder;
		require Sereal::Decoder;
		Sereal::Encoder->import('sereal_encode_with_object');
		Sereal::Decoder->import('sereal_decode_with_object');
		($enc, $dec) = (Sereal::Encoder->new, Sereal::Decoder->new);
	};
};

if ($enc && $dec) { # should be custom ops
	*ipc_freeze = sub ($) { sereal_encode_with_object $enc, $_[0] };
	*ipc_thaw = sub ($) { sereal_decode_with_object $dec, $_[0], my $ret };
} else {
	require Storable;
	*ipc_freeze = \&Storable::freeze;
	*ipc_thaw = \&Storable::thaw;
}

my $recv_cmd = PublicInbox::Spawn->can('recv_cmd4');
our $send_cmd = PublicInbox::Spawn->can('send_cmd4') // do {
	require PublicInbox::CmdIPC4;
	$recv_cmd //= PublicInbox::CmdIPC4->can('recv_cmd4');
	PublicInbox::CmdIPC4->can('send_cmd4');
} // do {
	require PublicInbox::Syscall;
	$recv_cmd //= PublicInbox::Syscall->can('recv_cmd4');
	PublicInbox::Syscall->can('send_cmd4');
};

sub _get_rec ($) {
	my ($r) = @_;
	defined(my $len = <$r>) or return;
	chop($len) eq "\n" or croak "no LF byte in $len";
	defined(my $n = read($r, my $buf, $len)) or croak "read error: $!";
	$n == $len or croak "short read: $n != $len";
	ipc_thaw($buf);
}

sub _send_rec ($$) {
	my ($w, $ref) = @_;
	my $buf = ipc_freeze($ref);
	print $w length($buf), "\n", $buf or croak "print: $!";
}

sub ipc_return ($$$) {
	my ($w, $ret, $exc) = @_;
	_send_rec($w, $exc ? bless(\$exc, 'PublicInbox::IPC::Die') : $ret);
}

sub ipc_worker_loop ($$$) {
	my ($self, $r_req, $w_res) = @_;
	my ($rec, $wantarray, $sub, @args);
	local $/ = "\n";
	while ($rec = _get_rec($r_req)) {
		($wantarray, $sub, @args) = @$rec;
		# no waiting if client doesn't care,
		# this is the overwhelmingly likely case
		if (!defined($wantarray)) {
			eval { $self->$sub(@args) };
			warn "$$ die: $@ (from nowait $sub)\n" if $@;
		} elsif ($wantarray) {
			my @ret = eval { $self->$sub(@args) };
			ipc_return($w_res, \@ret, $@);
		} else { # '' => wantscalar
			my $ret = eval { $self->$sub(@args) };
			ipc_return($w_res, \$ret, $@);
		}
	}
}

# starts a worker if Sereal or Storable is installed
sub ipc_worker_spawn {
	my ($self, $ident, $oldset, $fields) = @_;
	return if ($self->{-ipc_ppid} // -1) == $$; # idempotent
	delete(@$self{qw(-ipc_req -ipc_res -ipc_ppid -ipc_pid)});
	pipe(my ($r_req, $w_req)) or die "pipe: $!";
	pipe(my ($r_res, $w_res)) or die "pipe: $!";
	my $sigset = $oldset // PublicInbox::DS::block_signals();
	$self->ipc_atfork_prepare;
	my $seed = rand(0xffffffff);
	my $pid = fork // die "fork: $!";
	if ($pid == 0) {
		srand($seed);
		eval { Net::SSLeay::randomize() };
		eval { PublicInbox::DS->Reset };
		delete @$self{qw(-wq_s1 -wq_s2 -wq_workers -wq_ppid)};
		$w_req = $r_res = undef;
		$w_res->autoflush(1);
		$SIG{$_} = 'IGNORE' for (qw(TERM INT QUIT));
		local $0 = $ident;
		# ensure we properly exit even if warn() dies:
		my $end = PublicInbox::OnDestroy->new($$, sub { exit(!!$@) });
		eval {
			$fields //= {};
			local @$self{keys %$fields} = values(%$fields);
			my $on_destroy = $self->ipc_atfork_child;
			local @SIG{keys %SIG} = values %SIG;
			PublicInbox::DS::sig_setmask($sigset);
			ipc_worker_loop($self, $r_req, $w_res);
		};
		warn "worker $ident PID:$$ died: $@\n" if $@;
		undef $end; # trigger exit
	}
	PublicInbox::DS::sig_setmask($sigset) unless $oldset;
	$r_req = $w_res = undef;
	$w_req->autoflush(1);
	$self->{-ipc_req} = $w_req;
	$self->{-ipc_res} = $r_res;
	$self->{-ipc_ppid} = $$;
	$self->{-ipc_pid} = $pid;
}

sub ipc_worker_reap { # dwaitpid callback
	my ($args, $pid) = @_;
	my ($self, @uargs) = @$args;
	delete $self->{-wq_workers}->{$pid};
	return $self->{-reap_do}->($args, $pid) if $self->{-reap_do};
	return if !$?;
	my $s = $? & 127;
	# TERM(15) is our default exit signal, PIPE(13) is likely w/ pager
	warn "$self->{-wq_ident} PID:$pid died \$?=$?\n" if $s != 15 && $s != 13
}

sub wq_wait_async {
	my ($self, $cb, @uargs) = @_;
	local $PublicInbox::DS::in_loop = 1;
	$self->{-reap_async} = 1;
	$self->{-reap_do} = $cb;
	my @pids = keys %{$self->{-wq_workers}};
	dwaitpid($_, \&ipc_worker_reap, [ $self, @uargs ]) for @pids;
}

# for base class, override in sub classes
sub ipc_atfork_prepare {}

sub wq_atexit_child {}

sub ipc_atfork_child {
	my ($self) = @_;
	my $io = delete($self->{-ipc_atfork_child_close}) or return;
	close($_) for @$io;
	undef;
}

# idempotent, can be called regardless of whether worker is active or not
sub ipc_worker_stop {
	my ($self, $args) = @_;
	my ($pid, $ppid) = delete(@$self{qw(-ipc_pid -ipc_ppid)});
	my ($w_req, $r_res) = delete(@$self{qw(-ipc_req -ipc_res)});
	if (!$w_req && !$r_res) {
		die "unexpected PID:$pid without IPC pipes" if $pid;
		return; # idempotent
	}
	die 'no PID with IPC pipes' unless $pid;
	$w_req = $r_res = undef;

	return if $$ != $ppid;
	dwaitpid($pid, \&ipc_worker_reap, [$self, $args]);
}

# use this if we have multiple readers reading curl or "pigz -dc"
# and writing to the same store
sub ipc_lock_init {
	my ($self, $f) = @_;
	$f // die 'BUG: no filename given';
	require PublicInbox::Lock;
	$self->{-ipc_lock} //= bless { lock_path => $f }, 'PublicInbox::Lock'
}

sub _wait_return ($$) {
	my ($r_res, $sub) = @_;
	my $ret = _get_rec($r_res) // die "no response on $sub";
	die $$ret if ref($ret) eq 'PublicInbox::IPC::Die';
	wantarray ? @$ret : $$ret;
}

# call $self->$sub(@args), on a worker if ipc_worker_spawn was used
sub ipc_do {
	my ($self, $sub, @args) = @_;
	if (my $w_req = $self->{-ipc_req}) { # run in worker
		my $ipc_lock = $self->{-ipc_lock};
		my $lock = $ipc_lock ? $ipc_lock->lock_for_scope : undef;
		if (defined(wantarray)) {
			my $r_res = $self->{-ipc_res} or die 'no ipc_res';
			_send_rec($w_req, [ wantarray, $sub, @args ]);
			_wait_return($r_res, $sub);
		} else { # likely, fire-and-forget into pipe
			_send_rec($w_req, [ undef , $sub, @args ]);
		}
	} else { # run locally
		$self->$sub(@args);
	}
}

# needed when there's multiple IPC workers and the parent forking
# causes newer siblings to inherit older siblings sockets
sub ipc_sibling_atfork_child {
	my ($self) = @_;
	my ($pid, undef) = delete(@$self{qw(-ipc_pid -ipc_ppid)});
	delete(@$self{qw(-ipc_req -ipc_res)});
	$pid == $$ and die "BUG: $$ ipc_atfork_child called on itself";
}

sub recv_and_run {
	my ($self, $s2, $len, $full_stream) = @_;
	my @fds = $recv_cmd->($s2, my $buf, $len // $MY_MAX_ARG_STRLEN);
	return if scalar(@fds) && !defined($fds[0]);
	my $n = length($buf) or return 0;
	my $nfd = 0;
	for my $fd (@fds) {
		if (open(my $cmdfh, '+<&=', $fd)) {
			$self->{$nfd++} = $cmdfh;
			$cmdfh->autoflush(1);
		} else {
			die "$$ open(+<&=$fd) (FD:$nfd): $!";
		}
	}
	while ($full_stream && $n < $len) {
		my $r = sysread($s2, $buf, $len - $n, $n) // croak "read: $!";
		croak "read EOF after $n/$len bytes" if $r == 0;
		$n = length($buf);
	}
	# Sereal dies on truncated data, Storable returns undef
	my $args = ipc_thaw($buf) // die "thaw error on buffer of size: $n";
	undef $buf;
	my $sub = shift @$args;
	eval { $self->$sub(@$args) };
	warn "$$ $0 wq_worker: $sub: $@" if $@;
	delete @$self{0..($nfd-1)};
	$n;
}

sub wq_worker_loop ($$) {
	my ($self, $bcast2) = @_;
	my $wqw = PublicInbox::WQWorker->new($self, $self->{-wq_s2});
	PublicInbox::WQWorker->new($self, $bcast2) if $bcast2;
	PublicInbox::DS->SetPostLoopCallback(sub { $wqw->{sock} });
	PublicInbox::DS::event_loop();
	PublicInbox::DS->Reset;
}

sub do_sock_stream { # via wq_io_do, for big requests
	my ($self, $len) = @_;
	recv_and_run($self, my $s2 = delete $self->{0}, $len, 1);
}

sub wq_broadcast {
	my ($self, $sub, @args) = @_;
	if (my $wkr = $self->{-wq_workers}) {
		my $buf = ipc_freeze([$sub, @args]);
		for my $bcast1 (values %$wkr) {
			my $sock = $bcast1 // $self->{-wq_s1} // next;
			send($sock, $buf, MSG_EOR) // croak "send: $!";
			# XXX shouldn't have to deal with EMSGSIZE here...
		}
	} else {
		eval { $self->$sub(@args) };
		warn "wq_broadcast: $@" if $@;
	}
}

sub stream_in_full ($$$) {
	my ($s1, $fds, $buf) = @_;
	socketpair(my $r, my $w, AF_UNIX, SOCK_STREAM, 0) or
		croak "socketpair: $!";
	my $n = $send_cmd->($s1, [ fileno($r) ],
			ipc_freeze(['do_sock_stream', length($buf)]),
			MSG_EOR) // croak "sendmsg: $!";
	undef $r;
	$n = $send_cmd->($w, $fds, $buf, 0) // croak "sendmsg: $!";
	while ($n < length($buf)) {
		my $x = syswrite($w, $buf, length($buf) - $n, $n) //
				croak "syswrite: $!";
		croak "syswrite wrote 0 bytes" if $x == 0;
		$n += $x;
	}
}

sub wq_io_do { # always async
	my ($self, $sub, $ios, @args) = @_;
	if (my $s1 = $self->{-wq_s1}) { # run in worker
		my $fds = [ map { fileno($_) } @$ios ];
		my $buf = ipc_freeze([$sub, @args]);
		if (length($buf) > $MY_MAX_ARG_STRLEN) {
			stream_in_full($s1, $fds, $buf);
		} else {
			my $n = $send_cmd->($s1, $fds, $buf, MSG_EOR);
			return if defined($n); # likely
			$!{ETOOMANYREFS} and
				croak "sendmsg: $! (check RLIMIT_NOFILE)";
			$!{EMSGSIZE} ? stream_in_full($s1, $fds, $buf) :
				croak("sendmsg: $!");
		}
	} else {
		@$self{0..$#$ios} = @$ios;
		eval { $self->$sub(@args) };
		warn "wq_io_do: $@" if $@;
		delete @$self{0..$#$ios}; # don't close
	}
}

sub wq_sync_run {
	my ($self, $wantarray, $sub, @args) = @_;
	if ($wantarray) {
		my @ret = eval { $self->$sub(@args) };
		ipc_return($self->{0}, \@ret, $@);
	} else { # '' => wantscalar
		my $ret = eval { $self->$sub(@args) };
		ipc_return($self->{0}, \$ret, $@);
	}
}

sub wq_do {
	my ($self, $sub, @args) = @_;
	if (defined(wantarray)) {
		pipe(my ($r, $w)) or die "pipe: $!";
		wq_io_do($self, 'wq_sync_run', [ $w ], wantarray, $sub, @args);
		undef $w;
		_wait_return($r, $sub);
	} else {
		wq_io_do($self, $sub, [], @args);
	}
}

sub prepare_nonblock {
	($_[0]->{-wq_s1} // die 'BUG: no {-wq_s1}')->blocking(0);
	$_[0]->{-reap_async} or die 'BUG: {-reap_async} needed for nonblock';
	require PublicInbox::WQBlocked;
}

sub wq_nonblock_do { # always async
	my ($self, $sub, @args) = @_;
	my $buf = ipc_freeze([$sub, @args]);
	if ($self->{wqb}) { # saturated once, assume saturated forever
		$self->{wqb}->flush_send($buf);
	} else {
		$send_cmd->($self->{-wq_s1}, [], $buf, MSG_EOR) //
			($!{EAGAIN} ? PublicInbox::WQBlocked->new($self, $buf)
					: croak("sendmsg: $!"));
	}
}

sub _wq_worker_start ($$$$) {
	my ($self, $oldset, $fields, $one) = @_;
	my ($bcast1, $bcast2);
	$one or socketpair($bcast1, $bcast2, AF_UNIX, $SEQPACKET, 0) or
							die "socketpair: $!";
	my $seed = rand(0xffffffff);
	my $pid = fork // die "fork: $!";
	if ($pid == 0) {
		srand($seed);
		eval { Net::SSLeay::randomize() };
		undef $bcast1;
		eval { PublicInbox::DS->Reset };
		delete @$self{qw(-wq_s1 -wq_ppid)};
		$self->{-wq_worker_nr} =
				keys %{delete($self->{-wq_workers}) // {}};
		$SIG{$_} = 'DEFAULT' for (qw(TTOU TTIN TERM QUIT INT CHLD));
		local $0 = $one ? $self->{-wq_ident} :
			"$self->{-wq_ident} $self->{-wq_worker_nr}";
		# ensure we properly exit even if warn() dies:
		my $end = PublicInbox::OnDestroy->new($$, sub { exit(!!$@) });
		eval {
			$fields //= {};
			local @$self{keys %$fields} = values(%$fields);
			my $on_destroy = $self->ipc_atfork_child;
			local @SIG{keys %SIG} = values %SIG;
			PublicInbox::DS::sig_setmask($oldset);
			wq_worker_loop($self, $bcast2);
		};
		warn "worker $self->{-wq_ident} PID:$$ died: $@" if $@;
		undef $end; # trigger exit
	} else {
		$self->{-wq_workers}->{$pid} = $bcast1;
	}
}

# starts workqueue workers if Sereal or Storable is installed
sub wq_workers_start {
	my ($self, $ident, $nr_workers, $oldset, $fields) = @_;
	($send_cmd && $recv_cmd && defined($SEQPACKET)) or return;
	return if $self->{-wq_s1}; # idempotent
	$self->{-wq_s1} = $self->{-wq_s2} = undef;
	socketpair($self->{-wq_s1}, $self->{-wq_s2}, AF_UNIX, $SEQPACKET, 0) or
		die "socketpair: $!";
	$self->ipc_atfork_prepare;
	$nr_workers //= $self->{-wq_nr_workers}; # was set earlier
	my $sigset = $oldset // PublicInbox::DS::block_signals();
	$self->{-wq_workers} = {};
	$self->{-wq_ident} = $ident;
	my $one = $nr_workers == 1;
	$self->{-wq_nr_workers} = $nr_workers;
	_wq_worker_start($self, $sigset, $fields, $one) for (1..$nr_workers);
	PublicInbox::DS::sig_setmask($sigset) unless $oldset;
	$self->{-wq_ppid} = $$;
}

sub wq_close {
	my ($self) = @_;
	if (my $wqb = delete $self->{wqb}) {
		$self->{-reap_async} or die 'BUG: {-reap_async} unset';
		$wqb->enq_close;
	}
	delete @$self{qw(-wq_s1 -wq_s2)} or return;
	return if $self->{-reap_async};
	my @pids = keys %{$self->{-wq_workers}};
	dwaitpid($_, \&ipc_worker_reap, [ $self ]) for @pids;
}

sub wq_kill {
	my ($self, $sig) = @_;
	kill($sig // 'TERM', keys %{$self->{-wq_workers}});
}

sub DESTROY {
	my ($self) = @_;
	my $ppid = $self->{-wq_ppid};
	wq_kill($self) if $ppid && $ppid == $$;
	wq_close($self);
	ipc_worker_stop($self);
}

sub detect_nproc () {
	# _SC_NPROCESSORS_ONLN = 84 on both Linux glibc and musl
	return POSIX::sysconf(84) if $^O eq 'linux';
	return POSIX::sysconf(58) if $^O eq 'freebsd';
	# TODO: more OSes

	# getconf(1) is POSIX, but *NPROCESSORS* vars are not
	for (qw(_NPROCESSORS_ONLN NPROCESSORS_ONLN)) {
		`getconf $_ 2>/dev/null` =~ /^(\d+)$/ and return $1;
	}
	for my $nproc (qw(nproc gnproc)) { # GNU coreutils nproc
		`$nproc 2>/dev/null` =~ /^(\d+)$/ and return $1;
	}

	# should we bother with `sysctl hw.ncpu`?  Those only give
	# us total processor count, not online processor count.
	undef
}

1;
public-inbox-1.9.0/lib/PublicInbox/IdxStack.pm000066400000000000000000000032061430031475700211550ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 

# temporary stack for public-inbox-index
package PublicInbox::IdxStack;
use v5.10.1;
use strict;
use Fcntl qw(:seek);
use constant PACK_FMT => eval { pack('Q', 1) } ? 'A1QQH*H*' : 'A1IIH*H*';

# start off in write-only mode
sub new {
	open(my $io, '+>', undef) or die "open: $!";
	# latest_cmt is still useful when the newest revision is a `d'(elete),
	# otherwise we favor $sync->{latest_cmt} for checkpoints and {quit}
	bless { wr => $io, latest_cmt => $_[1] }, __PACKAGE__
}

# file_char = [d|m]
sub push_rec {
	my ($self, $file_char, $at, $ct, $blob_oid, $cmt_oid) = @_;
	my $rec = pack(PACK_FMT, $file_char, $at, $ct, $blob_oid, $cmt_oid);
	$self->{unpack_fmt} // do {
		my $len = length($cmt_oid);
		my $fmt = PACK_FMT;
		$fmt =~ s/H\*/H$len/g;
		$self->{rec_size} = length($rec);
		$self->{unpack_fmt} = $fmt;
	};
	print { $self->{wr} } $rec or die "print: $!";
	$self->{tot_size} += length($rec);
}

sub num_records {
	my ($self) = @_;
	$self->{rec_size} ? $self->{tot_size} / $self->{rec_size} : 0;
}

# switch into read-only mode and returns self
sub read_prepare {
	my ($self) = @_;
	my $io = $self->{rd} = delete($self->{wr});
	$io->flush or die "flush: $!";
	$self;
}

sub pop_rec {
	my ($self) = @_;
	my $sz = $self->{rec_size} or return;
	my $rec_pos = $self->{tot_size} -= $sz;
	return if $rec_pos < 0;
	my $io = $self->{rd};
	seek($io, $rec_pos, SEEK_SET) or die "seek: $!";
	my $r = read($io, my $buf, $sz);
	defined($r) or die "read: $!";
	$r == $sz or die "read($r != $sz)";
	unpack($self->{unpack_fmt}, $buf);
}

1;
public-inbox-1.9.0/lib/PublicInbox/Import.pm000066400000000000000000000502021430031475700207130ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 
#
# git fast-import-based ssoma-mda MDA replacement
# This is only ever run by public-inbox-mda, public-inbox-learn
# and public-inbox-watch. Not the WWW or NNTP code which only
# requires read-only access.
package PublicInbox::Import;
use strict;
use parent qw(PublicInbox::Lock);
use v5.10.1;
use PublicInbox::Spawn qw(run_die popen_rd);
use PublicInbox::MID qw(mids mid2path);
use PublicInbox::Address;
use PublicInbox::Smsg;
use PublicInbox::MsgTime qw(msg_datestamp);
use PublicInbox::ContentHash qw(content_digest);
use PublicInbox::MDA;
use PublicInbox::Eml;
use POSIX qw(strftime);

sub default_branch () {
	state $default_branch = do {
		my $r = popen_rd([qw(git config --global init.defaultBranch)],
				 { GIT_CONFIG => undef });
		chomp(my $h = <$r> // '');
		close $r;
		$h eq '' ? 'refs/heads/master' : "refs/heads/$h";
	}
}

sub new {
	# we can't change arg order, this is documented in POD
	# and external projects may rely on it:
	my ($class, $git, $name, $email, $ibx) = @_;
	my $ref;
	if ($ibx) {
		$ref = $ibx->{ref_head};
		$name //= $ibx->{name};
		$email //= $ibx->{-primary_address};
		$git //= $ibx->git;
	}
	bless {
		git => $git,
		ident => "$name <$email>",
		mark => 1,
		ref => $ref // default_branch,
		ibx => $ibx,
		path_type => '2/38', # or 'v2'
		lock_path => "$git->{git_dir}/ssoma.lock", # v2 changes this
		bytes_added => 0,
	}, $class
}

# idempotent start function
sub gfi_start {
	my ($self) = @_;

	return ($self->{in}, $self->{out}) if $self->{in};

	my ($in_r, $out_r, $out_w);
	pipe($out_r, $out_w) or die "pipe failed: $!";

	$self->lock_acquire;
	eval {
		my ($git, $ref) = @$self{qw(git ref)};
		local $/ = "\n";
		chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $ref));
		die "fatal: rev-parse --revs-only $ref: \$?=$?" if $?;
		if ($self->{path_type} ne '2/38' && $self->{tip}) {
			my $t = $git->qx(qw(ls-tree -r -z --name-only), $ref);
			die "fatal: ls-tree -r -z --name-only $ref: \$?=$?" if $?;
			$self->{-tree} = { map { $_ => 1 } split(/\0/, $t) };
		}
		$in_r = $self->{in} = $git->popen(qw(fast-import
					--quiet --done --date-format=raw),
					undef, { 0 => $out_r });
		$out_w->autoflush(1);
		$self->{out} = $out_w;
		$self->{nchg} = 0;
	};
	if ($@) {
		$self->lock_release;
		die $@;
	}
	($in_r, $out_w);
}

sub wfail () { die "write to fast-import failed: $!" }

sub now_raw () { time . ' +0000' }

sub norm_body ($) {
	my ($mime) = @_;
	my $b = $mime->body_raw;
	$b =~ s/(\r?\n)+\z//s;
	$b
}

# only used for v1 (ssoma) inboxes
sub _check_path ($$$$) {
	my ($r, $w, $tip, $path) = @_;
	return if $tip eq '';
	print $w "ls $tip $path\n" or wfail;
	local $/ = "\n";
	my $info = <$r> // die "EOF from fast-import: $!";
	$info =~ /\Amissing / ? undef : $info;
}

sub _cat_blob ($$$) {
	my ($r, $w, $oid) = @_;
	print $w "cat-blob $oid\n" or wfail;
	local $/ = "\n";
	my $info = <$r> // die "EOF from fast-import / cat-blob: $!";
	$info =~ /\A[a-f0-9]{40,} blob ([0-9]+)\n\z/ or return;
	my $left = $1;
	my $offset = 0;
	my $buf = '';
	my $n;
	while ($left > 0) {
		$n = read($r, $buf, $left, $offset) //
			die "read cat-blob failed: $!";
		$n == 0 and die 'fast-export (cat-blob) died';
		$left -= $n;
		$offset += $n;
	}
	$n = read($r, my $lf, 1) //
		die "read final byte of cat-blob failed: $!";
	die "bad read on final byte: <$lf>" if $lf ne "\n";

	# fixup some bugginess in old versions:
	$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
	\$buf;
}

sub cat_blob {
	my ($self, $oid) = @_;
	my ($r, $w) = $self->gfi_start;
	_cat_blob($r, $w, $oid);
}

sub check_remove_v1 {
	my ($r, $w, $tip, $path, $mime) = @_;

	my $info = _check_path($r, $w, $tip, $path) or return ('MISSING',undef);
	$info =~ m!\A100644 blob ([a-f0-9]{40,})\t!s or die "not blob: $info";
	my $oid = $1;
	my $msg = _cat_blob($r, $w, $oid) or die "BUG: cat-blob $1 failed";
	my $cur = PublicInbox::Eml->new($msg);
	my $cur_s = $cur->header('Subject') // '';
	my $cur_m = $mime->header('Subject') // '';
	if ($cur_s ne $cur_m || norm_body($cur) ne norm_body($mime)) {
		return ('MISMATCH', $cur);
	}
	(undef, $cur);
}

sub checkpoint {
	my ($self) = @_;
	return unless $self->{in};
	print { $self->{out} } "checkpoint\n" or wfail;
	undef;
}

sub progress {
	my ($self, $msg) = @_;
	return unless $self->{in};
	print { $self->{out} } "progress $msg\n" or wfail;
	readline($self->{in}) eq "progress $msg\n" or die
		"progress $msg not received\n";
	undef;
}

sub _update_git_info ($$) {
	my ($self, $do_gc) = @_;
	# for compatibility with existing ssoma installations
	# we can probably remove this entirely by 2020
	my $git_dir = $self->{git}->{git_dir};
	my @cmd = ('git', "--git-dir=$git_dir");
	my $index = "$git_dir/ssoma.index";
	if (-e $index && !$ENV{FAST}) {
		my $env = { GIT_INDEX_FILE => $index };
		run_die([@cmd, qw(read-tree -m -v -i), $self->{ref}], $env);
	}
	eval { run_die([@cmd, 'update-server-info']) };
	my $ibx = $self->{ibx};
	if ($ibx && $ibx->version == 1 && -d "$ibx->{inboxdir}/public-inbox" &&
				eval { require PublicInbox::SearchIdx }) {
		eval {
			my $s = PublicInbox::SearchIdx->new($ibx);
			$s->index_sync({ ref => $self->{ref} });
		};
		warn "$ibx->{inboxdir} index failed: $@\n" if $@;
	}
	eval { run_die([@cmd, qw(gc --auto)]) } if $do_gc;
}

sub barrier {
	my ($self) = @_;

	# For safety, we ensure git checkpoint is complete before because
	# the data in git is still more important than what is in Xapian
	# in v2.  Performance may be gained by delaying the ->progress
	# call but we lose safety
	if ($self->{nchg}) {
		$self->checkpoint;
		$self->progress('checkpoint');
		_update_git_info($self, 0);
		$self->{nchg} = 0;
	}
}

# used for v2
sub get_mark {
	my ($self, $mark) = @_;
	die "not active\n" unless $self->{in};
	my ($r, $w) = $self->gfi_start;
	print $w "get-mark $mark\n" or wfail;
	my $oid = <$r> // die "get-mark failed, need git 2.6.0+\n";
	chomp($oid);
	$oid;
}

# returns undef on non-existent
# ('MISMATCH', PublicInbox::Eml) on mismatch
# (:MARK, PublicInbox::Eml) on success
#
# v2 callers should check with Xapian before calling this as
# it is not idempotent.
sub remove {
	my ($self, $mime, $msg) = @_; # mime = PublicInbox::Eml or Email::MIME

	my $path_type = $self->{path_type};
	my ($path, $err, $cur, $blob);

	my ($r, $w) = $self->gfi_start;
	my $tip = $self->{tip};
	if ($path_type eq '2/38') {
		$path = mid2path(v1_mid0($mime));
		($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime);
		return ($err, $cur) if $err;
	} else {
		my $sref;
		if (ref($mime) eq 'SCALAR') { # optimization used by V2Writable
			$sref = $mime;
		} else { # XXX should not be necessary:
			my $str = $mime->as_string;
			$sref = \$str;
		}
		my $len = length($$sref);
		$blob = $self->{mark}++;
		print $w "blob\nmark :$blob\ndata $len\n",
			$$sref, "\n" or wfail;
	}

	my $ref = $self->{ref};
	my $commit = $self->{mark}++;
	my $parent = $tip =~ /\A:/ ? $tip : undef;
	unless ($parent) {
		print $w "reset $ref\n" or wfail;
	}
	my $ident = $self->{ident};
	my $now = now_raw();
	$msg //= 'rm';
	my $len = length($msg) + 1;
	print $w "commit $ref\nmark :$commit\n",
		"author $ident $now\n",
		"committer $ident $now\n",
		"data $len\n$msg\n\n",
		'from ', ($parent ? $parent : $tip), "\n" or wfail;
	if (defined $path) {
		print $w "D $path\n\n" or wfail;
	} else {
		clean_tree_v2($self, $w, 'd');
		print $w "M 100644 :$blob d\n\n" or wfail;
	}
	$self->{nchg}++;
	(($self->{tip} = ":$commit"), $cur);
}

sub git_timestamp ($) {
	my ($ts, $zone) = @{$_[0]};
	$ts = 0 if $ts < 0; # git uses unsigned times
	"$ts $zone";
}

sub extract_cmt_info ($;$) {
	my ($mime, $smsg) = @_;
	# $mime is PublicInbox::Eml, but remains Email::MIME-compatible
	$smsg //= bless {}, 'PublicInbox::Smsg';

	$smsg->populate($mime);

	my $sender = '';
	my $from = delete($smsg->{From}) // '';
	my ($email) = PublicInbox::Address::emails($from);
	my ($name) = PublicInbox::Address::names($from);
	if (!defined($name) || !defined($email)) {
		$sender = $mime->header('Sender') // '';
		$name //= (PublicInbox::Address::names($sender))[0];
		$email //= (PublicInbox::Address::emails($sender))[0];
	}
	if (defined $email) {
		# Email::Address::XS may leave quoted '<' in addresses,
		# which git-fast-import doesn't like
		$email =~ tr/<>//d;

		# quiet down wide character warnings with utf8::encode
		utf8::encode($email);
	} else {
		$email = '';
		warn "no email in From: $from or Sender: $sender\n";
	}

	# git gets confused with:
	#  "'A U Thor ' via foo" 
	# ref:
	# 
	if (defined $name) {
		$name =~ tr/<>//d;
		utf8::encode($name);
	} else {
		$name = '';
		warn "no name in From: $from or Sender: $sender\n";
	}

	my $subject = delete($smsg->{Subject}) // '(no subject)';
	utf8::encode($subject);
	my $at = git_timestamp(delete $smsg->{-ds});
	my $ct = git_timestamp(delete $smsg->{-ts});
	("$name <$email>", $at, $ct, $subject);
}

# kill potentially confusing/misleading headers
our @UNWANTED_HEADERS = (qw(Bytes Lines Content-Length),
			qw(Status X-Status));
sub drop_unwanted_headers ($) {
	my ($eml) = @_;
	for (@UNWANTED_HEADERS, @PublicInbox::MDA::BAD_HEADERS) {
		$eml->header_set($_);
	}
}

# used by V2Writable, too
sub append_mid ($$) {
	my ($hdr, $mid0) = @_;
	# @cur is likely empty if we need to call this sub, but it could
	# have random unparseable crap which we'll preserve, too.
	my @cur = $hdr->header_raw('Message-ID');
	$hdr->header_set('Message-ID', @cur, "<$mid0>");
}

sub v1_mid0 ($) {
	my ($eml) = @_;
	my $mids = mids($eml);

	if (!scalar(@$mids)) { # spam often has no Message-ID
		my $mid0 = digest2mid(content_digest($eml), $eml);
		append_mid($eml, $mid0);
		return $mid0;
	}
	$mids->[0];
}
sub clean_tree_v2 ($$$) {
	my ($self, $w, $keep) = @_;
	my $tree = $self->{-tree} or return; #v2 only
	delete $tree->{$keep};
	foreach (keys %$tree) {
		print $w "D $_\n" or wfail;
	}
	%$tree = ($keep => 1);
}

# returns undef on duplicate
# returns the :MARK of the most recent commit
sub add {
	my ($self, $mime, $check_cb, $smsg) = @_;

	my ($author, $at, $ct, $subject) = extract_cmt_info($mime, $smsg);
	my $path_type = $self->{path_type};
	my $path;
	if ($path_type eq '2/38') {
		$path = mid2path(v1_mid0($mime));
	} else { # v2 layout, one file:
		$path = 'm';
	}

	my ($r, $w) = $self->gfi_start;
	my $tip = $self->{tip};
	if ($path_type eq '2/38') {
		_check_path($r, $w, $tip, $path) and return;
	}

	drop_unwanted_headers($mime);

	# spam check:
	if ($check_cb) {
		$mime = $check_cb->($mime, $self->{ibx}) or return;
	}

	my $blob = $self->{mark}++;
	my $raw_email = $mime->{-public_inbox_raw} // $mime->as_string;
	my $n = length($raw_email);
	$self->{bytes_added} += $n;
	print $w "blob\nmark :$blob\ndata ", $n, "\n" or wfail;
	print $w $raw_email, "\n" or wfail;

	# v2: we need this for Xapian
	if ($smsg) {
		$smsg->{blob} = $self->get_mark(":$blob");
		$smsg->set_bytes($raw_email, $n);
		if (my $oidx = delete $smsg->{-oidx}) { # used by LeiStore
			my $eidx_git = delete $smsg->{-eidx_git};

			# we need this sharedkv to dedupe blobs added in the
			# same fast-import transaction
			my $u = $self->{uniq_skv} //= do {
				require PublicInbox::SharedKV;
				my $x = PublicInbox::SharedKV->new;
				$x->dbh;
				$x;
			};
			return if !$u->set_maybe($smsg->oidbin, 1);
			return if (!$oidx->vivify_xvmd($smsg) &&
					$eidx_git->check($smsg->{blob}));
		}
	}
	my $ref = $self->{ref};
	my $commit = $self->{mark}++;
	my $parent = $tip =~ /\A:/ ? $tip : undef;

	unless ($parent) {
		print $w "reset $ref\n" or wfail;
	}

	print $w "commit $ref\nmark :$commit\n",
		"author $author $at\n",
		"committer $self->{ident} $ct\n" or wfail;
	print $w "data ", (length($subject) + 1), "\n",
		$subject, "\n\n" or wfail;
	if ($tip ne '') {
		print $w 'from ', ($parent ? $parent : $tip), "\n" or wfail;
	}
	clean_tree_v2($self, $w, $path);
	print $w "M 100644 :$blob $path\n\n" or wfail;
	$self->{nchg}++;
	$self->{tip} = ":$commit";
}

my @INIT_FILES = ('HEAD' => undef, # filled in at runtime
		'config' => <{git}->{git_dir} if ref($dir);
	require File::Path;
	File::Path::mkpath([ map { "$dir/$_" } qw(objects/info refs/heads) ]);
	$INIT_FILES[1] //= 'ref: '.default_branch."\n";
	my @fn_contents = @INIT_FILES;
	$fn_contents[1] = "ref: refs/heads/$head\n" if defined $head;
	while (my ($fn, $contents) = splice(@fn_contents, 0, 2)) {
		my $f = $dir.'/'.$fn;
		next if -f $f;
		open my $fh, '>', $f or die "open $f: $!";
		print $fh $contents or die "print $f: $!";
		close $fh or die "close $f: $!";
	}
}

# true if locked and active
sub active { !!$_[0]->{out} }

sub done {
	my ($self) = @_;
	my $w = delete $self->{out} or return;
	eval {
		my $r = delete $self->{in} or die 'BUG: missing {in} when done';
		print $w "done\n" or wfail;
		close $r or die "fast-import failed: $?"; # ProcessPipe::CLOSE
	};
	my $wait_err = $@;
	my $nchg = delete $self->{nchg};
	if ($nchg && !$wait_err) {
		eval { _update_git_info($self, 1) };
		warn "E: $self->{git}->{git_dir} update info: $@\n" if $@;
	}
	$self->lock_release(!!$nchg);
	$self->{git}->cleanup;
	die $wait_err if $wait_err;
}

sub atfork_child {
	my ($self) = @_;
	foreach my $f (qw(in out)) {
		next unless defined($self->{$f});
		close $self->{$f} or die "failed to close import[$f]: $!\n";
	}
}

sub digest2mid ($$;$) {
	my ($dig, $hdr, $fallback_time) = @_;
	my $b64 = $dig->clone->b64digest;
	# Make our own URLs nicer:
	# See "Base 64 Encoding with URL and Filename Safe Alphabet" in RFC4648
	$b64 =~ tr!+/=!-_!d;

	# Add a date prefix to prevent a leading '-' in case that trips
	# up some tools (e.g. if a Message-ID were a expected as a
	# command-line arg)
	my $dt = msg_datestamp($hdr, $fallback_time);
	$dt = POSIX::strftime('%Y%m%d%H%M%S', gmtime($dt));
	"$dt.$b64" . '@z';
}

sub rewrite_commit ($$$$) {
	my ($self, $oids, $buf, $mime) = @_;
	my ($author, $at, $ct, $subject);
	if ($mime) {
		($author, $at, $ct, $subject) = extract_cmt_info($mime);
	} else {
		$author = '<>';
		$subject = 'purged '.join(' ', @$oids);
	}
	@$oids = ();
	$subject .= "\n";
	foreach my $i (0..$#$buf) {
		my $l = $buf->[$i];
		if ($l =~ /^author .* ([0-9]+ [\+-]?[0-9]+)$/) {
			$at //= $1;
			$buf->[$i] = "author $author $at\n";
		} elsif ($l =~ /^committer .* ([0-9]+ [\+-]?[0-9]+)$/) {
			$ct //= $1;
			$buf->[$i] = "committer $self->{ident} $ct\n";
		} elsif ($l =~ /^data ([0-9]+)/) {
			$buf->[$i++] = "data " . length($subject) . "\n";
			$buf->[$i] = $subject;
			last;
		}
	}
}

# returns the new commit OID if a replacement was done
# returns undef if nothing was done
sub replace_oids {
	my ($self, $mime, $replace_map) = @_; # oid => raw string
	my $tmp = "refs/heads/replace-".((keys %$replace_map)[0]);
	my $old = $self->{'ref'};
	my $git = $self->{git};
	my @export = (qw(fast-export --no-data --use-done-feature), $old);
	my $rd = $git->popen(@export);
	my ($r, $w) = $self->gfi_start;
	my @buf;
	my $nreplace = 0;
	my @oids;
	my ($done, $mark);
	my $tree = $self->{-tree};
	while (<$rd>) {
		if (/^reset (?:.+)/) {
			push @buf, "reset $tmp\n";
		} elsif (/^commit (?:.+)/) {
			if (@buf) {
				print $w @buf or wfail;
				@buf = ();
			}
			push @buf, "commit $tmp\n";
		} elsif (/^data ([0-9]+)/) {
			# only commit message, so $len is small:
			my $len = $1; # + 1 for trailing "\n"
			push @buf, $_;
			my $n = read($rd, my $buf, $len) or die "read: $!";
			$len == $n or die "short read ($n < $len)";
			push @buf, $buf;
		} elsif (/^M 100644 ([a-f0-9]+) (\w+)/) {
			my ($oid, $path) = ($1, $2);
			$tree->{$path} = 1;
			my $sref = $replace_map->{$oid};
			if (defined $sref) {
				push @oids, $oid;
				my $n = length($$sref);
				push @buf, "M 100644 inline $path\ndata $n\n";
				push @buf, $$sref; # hope CoW works...
				push @buf, "\n";
			} else {
				push @buf, $_;
			}
		} elsif (/^D (\w+)/) {
			my $path = $1;
			push @buf, $_ if $tree->{$path};
		} elsif ($_ eq "\n") {
			if (@oids) {
				if (!$mime) {
					my $out = join('', @buf);
					$out =~ s/^/# /sgm;
					warn "purge rewriting\n", $out, "\n";
				}
				rewrite_commit($self, \@oids, \@buf, $mime);
				$nreplace++;
			}
			print $w @buf, "\n" or wfail;
			@buf = ();
		} elsif ($_ eq "done\n") {
			$done = 1;
		} elsif (/^mark :([0-9]+)$/) {
			push @buf, $_;
			$mark = $1;
		} else {
			push @buf, $_;
		}
	}
	close $rd or die "close fast-export failed: $?";
	if (@buf) {
		print $w @buf or wfail;
	}
	die 'done\n not seen from fast-export' unless $done;
	chomp(my $cmt = $self->get_mark(":$mark")) if $nreplace;
	$self->{nchg} = 0; # prevent _update_git_info until update-ref:
	$self->done;
	my @git = ('git', "--git-dir=$git->{git_dir}");

	run_die([@git, qw(update-ref), $old, $tmp]) if $nreplace;

	run_die([@git, qw(update-ref -d), $tmp]);

	return if $nreplace == 0;

	run_die([@git, qw(-c gc.reflogExpire=now gc --prune=all --quiet)]);

	# check that old OIDs are gone
	my $err = 0;
	foreach my $oid (keys %$replace_map) {
		my @info = $git->check($oid);
		if (@info) {
			warn "$oid not replaced\n";
			$err++;
		}
	}
	_update_git_info($self, 0);
	die "Failed to replace $err object(s)\n" if $err;
	$cmt;
}

1;
__END__
=pod

=head1 NAME

PublicInbox::Import - message importer for public-inbox v1 inboxes

=head1 VERSION

version 1.0

=head1 SYNOPSIS

	use PublicInbox::Eml;
	# PublicInbox::Eml exists as of public-inbox 1.5.0,
	# Email::MIME was used in older versions

	use PublicInbox::Git;
	use PublicInbox::Import;

	chomp(my $git_dir = `git rev-parse --git-dir`);
	$git_dir or die "GIT_DIR= must be specified\n";
	my $git = PublicInbox::Git->new($git_dir);
	my @committer = ('inbox', 'inbox@example.org');
	my $im = PublicInbox::Import->new($git, @committer);

	# to add a message:
	my $message = "From: \n".
		"Subject: test message \n" .
		"Date: Thu, 01 Jan 1970 00:00:00 +0000\n" .
		"Message-ID: \n".
		"\ntest message";
	my $parsed = PublicInbox::Eml->new($message);
	my $ret = $im->add($parsed);
	if (!defined $ret) {
		warn "duplicate: ", $parsed->header_raw('Message-ID'), "\n";
	} else {
		print "imported at mark $ret\n";
	}
	$im->done;

	# to remove a message
	my $junk = PublicInbox::Eml->new($message);
	my ($mark, $orig) = $im->remove($junk);
	if ($mark eq 'MISSING') {
		print "not found\n";
	} elsif ($mark eq 'MISMATCH') {
		print "Message exists but does not match\n\n",
			$orig->as_string, "\n",;
	} else {
		print "removed at mark $mark\n\n",
			$orig->as_string, "\n";
	}
	$im->done;

=head1 DESCRIPTION

An importer and remover for public-inboxes which takes C
or L messages as input and stores them in a git repository as
documented in L,
except it does not allow duplicate Message-IDs.

It requires L and L to be installed.

=head1 METHODS

=cut

=head2 new

	my $im = PublicInbox::Import->new($git, @committer);

Initialize a new PublicInbox::Import object.

=head2 add

	my $parsed = PublicInbox::Eml->new($message);
	$im->add($parsed);

Adds a message to to the git repository.  This will acquire
C<$GIT_DIR/ssoma.lock> and start L if necessary.

Messages added will not be visible to other processes until L
is called, but L may be called on them.

=head2 remove

	my $junk = PublicInbox::Eml->new($message);
	my ($code, $orig) = $im->remove($junk);

Removes a message from the repository.  On success, it returns
a ':'-prefixed numeric code representing the git-fast-import
mark and the original messages as a PublicInbox::Eml
(or Email::MIME) object.
If the message could not be found, the code is "MISSING"
and the original message is undef.  If there is a mismatch where
the "Message-ID" is matched but the subject and body do not match,
the returned code is "MISMATCH" and the conflicting message
is returned as orig.

=head2 done

Finalizes the L and unlocks the repository.
Calling this is required to finalize changes to a repository.

=head1 SEE ALSO

L

=head1 CONTACT

All feedback welcome via plain-text mail to L

The mail archives are hosted at L

=head1 COPYRIGHT

Copyright (C) 2016-2020 all contributors L

License: AGPL-3.0+ L

=cut
public-inbox-1.9.0/lib/PublicInbox/In2Tie.pm000066400000000000000000000012531430031475700205350ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 

# used to ensure PublicInbox::DS can call fileno() as a function
# on Linux::Inotify2 objects
package PublicInbox::In2Tie;
use strict;
use Symbol qw(gensym);

sub io {
	my $in2 = $_[0];
	$in2->blocking(0);
	if ($in2->can('on_overflow')) {
		# broadcasts everything on overflow
		$in2->on_overflow(undef);
	}
	my $io = gensym;
	tie *$io, __PACKAGE__, $in2;
	$io;
}

sub TIEHANDLE {
	my ($class, $in2) = @_;
	bless \$in2, $class; # a scalar reference to an existing reference
}

# this calls Linux::Inotify2::fileno
sub FILENO { ${$_[0]}->fileno }

1;
public-inbox-1.9.0/lib/PublicInbox/Inbox.pm000066400000000000000000000262271430031475700205320ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 
#
# Represents a public-inbox (which may have multiple mailing addresses)
package PublicInbox::Inbox;
use strict;
use v5.10.1;
use PublicInbox::Git;
use PublicInbox::MID qw(mid2path);
use PublicInbox::Eml;
use List::Util qw(max);
use Carp qw(croak);

# returns true if further checking is required
sub check_inodes ($) {
	for (qw(over mm)) { $_[0]->{$_}->check_inodes if $_[0]->{$_} }
}

sub do_cleanup {
	my ($ibx) = @_;
	my $live;
	if (defined $ibx->{git}) {
		$live = $ibx->isa(__PACKAGE__) ? $ibx->{git}->cleanup(1)
					: $ibx->{git}->cleanup_if_unlinked;
		delete($ibx->{git}) unless $live;
	}
	if ($live) {
		check_inodes($ibx);
	} else {
		delete(@$ibx{qw(over mm description cloneurl
				-imap_url -nntp_url -pop3_url)});
	}
	my $srch = $ibx->{search} // $ibx;
	delete @$srch{qw(xdb qp)};
	for my $git (@{$ibx->{-repo_objs} // []}) {
		$live = 1 if $git->cleanup(1);
	}
	PublicInbox::DS::add_uniq_timer($ibx+0, 5, \&do_cleanup, $ibx) if $live;
}

sub _cleanup_later ($) {
	# no need to require DS, here, if it were enabled another
	# module would've require'd it, already
	eval { PublicInbox::DS::in_loop() } and
		PublicInbox::DS::add_uniq_timer($_[0]+0, 30, \&do_cleanup, @_)
}

sub _set_limiter ($$$) {
	my ($self, $pi_cfg, $pfx) = @_;
	my $lkey = "-${pfx}_limiter";
	$self->{$lkey} //= do {
		# full key is: publicinbox.$NAME.httpbackendmax
		my $mkey = $pfx.'max';
		my $val = $self->{$mkey} or return;
		my $lim;
		if ($val =~ /\A[0-9]+\z/) {
			require PublicInbox::Qspawn;
			$lim = PublicInbox::Qspawn::Limiter->new($val);
		} elsif ($val =~ /\A[a-z][a-z0-9]*\z/) {
			$lim = $pi_cfg->limiter($val);
			warn "$mkey limiter=$val not found\n" if !$lim;
		} else {
			warn "$mkey limiter=$val not understood\n";
		}
		$lim;
	}
}

sub new {
	my ($class, $opts) = @_;
	my $v = $opts->{address} ||= [ 'public-inbox@example.com' ];
	my $p = $opts->{-primary_address} = ref($v) eq 'ARRAY' ? $v->[0] : $v;
	$opts->{domain} = ($p =~ /\@(\S+)\z/) ? $1 : 'localhost';
	my $pi_cfg = delete $opts->{-pi_cfg};
	_set_limiter($opts, $pi_cfg, 'httpbackend');
	my $fmax = $opts->{feedmax};
	if (defined($fmax) && $fmax =~ /\A[0-9]+\z/) {
		$opts->{feedmax} += 0;
	} else {
		delete $opts->{feedmax};
	}
	# allow any combination of multi-line or comma-delimited hide entries
	my $hide = {};
	if (defined(my $h = $opts->{hide})) {
		foreach my $v (@$h) {
			$hide->{$_} = 1 foreach (split(/\s*,\s*/, $v));
		}
		$opts->{-hide} = $hide;
	}
	bless $opts, $class;
}

sub version {
	$_[0]->{version} //= -f "$_[0]->{inboxdir}/inbox.lock" ? 2 : 1
}

sub git_epoch {
	my ($self, $epoch) = @_; # v2-only, callers always supply $epoch
	$self->{"$epoch.git"} //= do {
		my $git_dir = "$self->{inboxdir}/git/$epoch.git";
		return unless -d $git_dir;
		my $g = PublicInbox::Git->new($git_dir);
		my $lim = $self->{-httpbackend_limiter};
		$g->{-httpbackend_limiter} = $lim if $lim;
		# caller must manually cleanup when done
		$g;
	};
}

sub git {
	my ($self) = @_;
	$self->{git} //= do {
		my $git_dir = $self->{inboxdir};
		$git_dir .= '/all.git' if $self->version == 2;
		my $g = PublicInbox::Git->new($git_dir);
		my $lim = $self->{-httpbackend_limiter};
		$g->{-httpbackend_limiter} = $lim if $lim;
		_cleanup_later($self);
		$g;
	};
}

sub max_git_epoch {
	my ($self) = @_;
	return if $self->version < 2;
	my $cur = $self->{-max_git_epoch};
	my $changed;
	if (!defined($cur) || ($changed = git($self)->alternates_changed)) {
		$self->{git}->cleanup if $changed;
		my $gits = "$self->{inboxdir}/git";
		if (opendir my $dh, $gits) {
			my $max = max(map {
				substr($_, 0, -4) + 0; # drop ".git" suffix
			} grep(/\A[0-9]+\.git\z/, readdir($dh))) // return;
			$cur = $self->{-max_git_epoch} = $max;
		}
	}
	$cur;
}

sub mm_file {
	my ($self) = @_;
	my $d = $self->{inboxdir};
	($self->version >= 2 ? $d : "$d/public-inbox").'/msgmap.sqlite3';
}

sub mm {
	my ($self, $req) = @_;
	$self->{mm} //= eval {
		require PublicInbox::Msgmap;
		_cleanup_later($self);
		PublicInbox::Msgmap->new_file($self);
	} // ($req ? croak("E: $@") : undef);
}

sub search {
	my ($self) = @_;
	$self->{search} // eval {
		_cleanup_later($self);
		require PublicInbox::Search;
		my $srch = PublicInbox::Search->new($self);
		(eval { $srch->xdb }) ? ($self->{search} = $srch) : undef;
	};
}

# isrch is preferred for read-only interfaces if available since it
# reduces kernel cache and FD overhead
sub isrch { $_[0]->{isrch} // search($_[0]) }

sub over {
	my ($self, $req) = @_;
	$self->{over} // eval {
		my $srch = $self->{search} // do {
			require PublicInbox::Search;
			PublicInbox::Search->new($self);
		};
		_cleanup_later($self);
		my $over = PublicInbox::Over->new("$srch->{xpfx}/over.sqlite3");
		$over->dbh; # may fail
		$self->{over} = $over;
	} // ($req ? croak("E: $@") : undef);
}

sub try_cat {
	my ($path) = @_;
	open(my $fh, '<', $path) or return '';
	local $/;
	<$fh> // '';
}

sub cat_desc ($) {
	my $desc = try_cat($_[0]);
	local $/ = "\n";
	chomp $desc;
	utf8::decode($desc);
	$desc =~ s/\s+/ /smg;
	$desc eq '' ? undef : $desc;
}

sub description {
	my ($self) = @_;
	($self->{description} //= cat_desc("$self->{inboxdir}/description")) //
		'($INBOX_DIR/description missing)';
}

sub cloneurl {
	my ($self) = @_;
	$self->{cloneurl} // do {
		my $s = try_cat("$self->{inboxdir}/cloneurl");
		my @urls = split(/\s+/s, $s);
		scalar(@urls) ? ($self->{cloneurl} = \@urls) : undef;
	} // [];
}

sub base_url {
	my ($self, $env) = @_; # env - PSGI env
	if ($env && $env->{'psgi.url_scheme'}) {
		my $url = PublicInbox::Git::host_prefix_url($env, '');
		# for mount in Plack::Builder
		$url .= '/' if $url !~ m!/\z!;
		return $url .= $self->{name} . '/';
	}
	# called from a non-PSGI environment (e.g. NNTP/POP3):
	my $url = $self->{url} // return undef;
	$url = $url->[0] // return undef;
	# expand protocol-relative URLs to HTTPS if we're
	# not inside a web server
	substr($url, 0, 0, 'https:') if substr($url, 0, 2) eq '//';
	$url .= '/' if substr($url, -1, 1) ne '/';
	$url;
}

# imapserver, nntpserver configs are used here:
sub _x_url ($$$) {
	my ($self, $x, $ctx) = @_; # $x is "imap" or "nntp"
	# no checking for nntp_usable here, we can point entirely
	# to non-local servers or users run by a different user
	my $ns = $self->{"${x}server"} //
	       $ctx->{www}->{pi_cfg}->get_all("publicinbox.${x}server");
	my $group = $self->{newsgroup};
	my @urls;
	if ($ns && $group) {
		@urls = map {
			my $u = m!\A${x}s?://! ? $_ : "$x://$_";
			$u .= '/' if $u !~ m!/\z!;
			$u.$group;
		} @$ns;
	}
	if (my $mirrors = $self->{"${x}mirror"}) {
		my @m;
		for (@$mirrors) {
			my $u = m!\A${x}s?://! ? $_ : "$x://$_";
			if ($u =~ m!\A${x}s?://[^/]+/?\z!) {
				if ($group) {
					$u .= '/' if $u !~ m!/\z!;
					$u .= $group;
				} else { # n.b. IMAP and POP3 use "newsgroup"
					warn <{name}.${x}mirror=$_ missing newsgroup name
EOM
				}
			}
			# else: allow full URLs like:
			# nntp://news.example.com/alt.example
			push @m, $u;
		}

		# List::Util::uniq requires Perl 5.26+, maybe we
		# can use it by 2030 or so
		my %seen;
		@urls = grep { !$seen{$_}++ } (@urls, @m);
	}
	\@urls;
}

# my ($self, $ctx) = @_;
sub imap_url { $_[0]->{-imap_url} //= _x_url($_[0], 'imap', $_[1]) }
sub nntp_url { $_[0]->{-nntp_url} //= _x_url($_[0], 'nntp', $_[1]) }

sub pop3_url {
	my ($self, $ctx) = @_;
	$self->{-pop3_url} //= do {
		my $ps = $self->{'pop3server'} //
		       $ctx->{www}->{pi_cfg}->get_all('publicinbox.pop3server');
		my $group = $self->{newsgroup};
		my @urls;
		($ps && $group) and
			@urls = map { m!\Apop3?s?://! ? $_ : "pop3://$_" } @$ps;
		if (my $mi = $self->{'pop3mirror'}) {
			my @m = map { m!\Apop3?s?://! ? $_ : "pop3://$_" } @$mi;
			my %seen; # List::Util::uniq requires Perl 5.26+
			@urls = grep { !$seen{$_}++ } (@urls, @m);
		}
		my $n = 0;
		for (@urls) { $n += s!/+\z!! }
		warn <cat_file('HEAD:'.$path);
}

sub msg_by_smsg ($$) {
	my ($self, $smsg) = @_;

	# ghosts may have undef smsg (from SearchThread.node) or
	# no {blob} field
	$smsg // return;
	$self->git->cat_file($smsg->{blob} // return);
}

sub smsg_eml {
	my ($self, $smsg) = @_;
	my $bref = msg_by_smsg($self, $smsg) or return;
	my $eml = PublicInbox::Eml->new($bref);
	$smsg->{num} // $smsg->populate($eml);
	$eml;
}

sub smsg_by_mid ($$) {
	my ($self, $mid) = @_;
	my $over = $self->over or return;
	my $smsg;
	if (my $mm = $self->mm) {
		# favor the Message-ID we used for the NNTP article number:
		my $num = $mm->num_for($mid) // return;
		$smsg = $over->get_art($num);
	} else {
		my ($id, $prev);
		$smsg = $over->next_by_mid($mid, \$id, \$prev);
	}
	$smsg ? PublicInbox::Smsg::psgi_cull($smsg) : undef;
}

sub msg_by_mid ($$) {
	my ($self, $mid) = @_;
	my $smsg = smsg_by_mid($self, $mid);
	$smsg ? msg_by_smsg($self, $smsg) : msg_by_path($self, mid2path($mid));
}

sub recent {
	my ($self, $opts, $after, $before) = @_;
	$self->over->recent($opts, $after, $before);
}

sub modified {
	my ($self) = @_;
	if (my $over = $self->over) {
		my $msgs = $over->recent({limit => 1});
		if (my $smsg = $msgs->[0]) {
			return $smsg->{ts};
		}
		return time;
	}
	git($self)->modified; # v1
}

# returns prefix => pathname mapping
# (pathname is NOT public, but prefix is used for Xapian queries)
sub altid_map ($) {
	my ($self) = @_;
	eval {
		require PublicInbox::AltId;
		my $altid = $self->{altid} or return {};
		my %h = map {;
			my $x = PublicInbox::AltId->new($self, $_);
			"$x->{prefix}" => $x->{filename}
		} @$altid;
		\%h;
	} // {};
}

# $obj must respond to ->on_inbox_unlock, which takes Inbox ($self) as an arg
sub subscribe_unlock {
	my ($self, $ident, $obj) = @_;
	$self->{unlock_subs}->{$ident} = $obj;
}

sub unsubscribe_unlock {
	my ($self, $ident) = @_;
	delete $self->{unlock_subs}->{$ident};
}

# called by inotify
sub on_unlock {
	my ($self) = @_;
	check_inodes($self);
	my $subs = $self->{unlock_subs} or return;
	for my $obj (values %$subs) {
		eval { $obj->on_inbox_unlock($self) };
		warn "E: $@ ($self->{inboxdir})\n" if $@;
	}
}

sub uidvalidity { $_[0]->{uidvalidity} //= eval { $_[0]->mm->created_at } }

sub eidx_key { $_[0]->{newsgroup} // $_[0]->{inboxdir} }

# only used by NNTP, so we need ->mm anyways
sub art_min { $_[0]->{-art_min} //= eval { $_[0]->mm(1)->min } }

# used by IMAP, too, which tries to avoid ->mm (but ->{mm} is likely
# faster since it's smaller iff available)
sub art_max {
	$_[0]->{-art_max} //= eval { $_[0]->{mm}->max } //
				eval { $_[0]->over(1)->max };
}

sub mailboxid { # rfc 8474, 8620, 8621
	my ($self, $imap_slice) = @_;
	my $pfx = defined($imap_slice) ? $self->{newsgroup} : $self->{name};
	utf8::encode($pfx); # to octets
	# RFC 8620, 1.2 recommends not starting with dash or digits
	# "A good solution to these issues is to prefix every id with a single
	#  alphabetical character."
	'M'.join('', map { sprintf('%02x', ord) } split(//, $pfx)) .
		(defined($imap_slice) ? sprintf('-%x', $imap_slice) : '') .
		sprintf('-%x', uidvalidity($self) // 0)
}

sub thing_type { 'public inbox' }

1;
public-inbox-1.9.0/lib/PublicInbox/InboxIdle.pm000066400000000000000000000060641430031475700213250ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 

# fields:
# inot: Linux::Inotify2-like object
# pathmap => { inboxdir => [ ibx, watch1, watch2, watch3... ] } mapping
package PublicInbox::InboxIdle;
use strict;
use parent qw(PublicInbox::DS);
use PublicInbox::Syscall qw(EPOLLIN);
my $IN_MODIFY = 0x02; # match Linux inotify
my $ino_cls;
if ($^O eq 'linux' && eval { require Linux::Inotify2; 1 }) {
	$IN_MODIFY = Linux::Inotify2::IN_MODIFY();
	$ino_cls = 'Linux::Inotify2';
} elsif (eval { require PublicInbox::KQNotify }) {
	$IN_MODIFY = PublicInbox::KQNotify::NOTE_WRITE();
	$ino_cls = 'PublicInbox::KQNotify';
}
require PublicInbox::In2Tie if $ino_cls;

sub in2_arm ($$) { # PublicInbox::Config::each_inbox callback
	my ($ibx, $self) = @_;
	my $dir = $ibx->{inboxdir};
	my $inot = $self->{inot};
	my $cur = $self->{pathmap}->{$dir} //= [];
	my $lock = "$dir/".($ibx->version >= 2 ? 'inbox.lock' : 'ssoma.lock');

	# transfer old subscriptions to the current inbox, cancel the old watch
	my $old_ibx = $cur->[0];
	$cur->[0] = $ibx;
	if ($old_ibx) {
		my $u = $ibx->{unlock_subs};
		$ibx->{unlock_subs} = $old_ibx->{unlock_subs};
		%{$ibx->{unlock_subs}} = (%$u, %{$ibx->{unlock_subs}}) if $u;

		# Linux::Inotify2::Watch::name matches if watches are the
		# same, no point in replacing a watch of the same name
		if ($cur->[1]->name eq $lock) {
			$self->{on_unlock}->{$lock} = $ibx;
			return;
		}
		# rare, name changed (v1 inbox converted to v2)
		$cur->[1]->cancel; # Linux::Inotify2::Watch::cancel
	}

	if (my $w = $cur->[1] = $inot->watch($lock, $IN_MODIFY)) {
		$self->{on_unlock}->{$w->name} = $ibx;
	} else {
		warn "E: ".ref($inot)."->watch($lock, IN_MODIFY) failed: $!\n";
		if ($!{ENOSPC} && $^O eq 'linux') {
			warn <<"";
I: consider increasing /proc/sys/fs/inotify/max_user_watches

		}
	}

	# TODO: detect deleted packs (and possibly other files)
}

sub refresh {
	my ($self, $pi_cfg) = @_;
	$pi_cfg->each_inbox(\&in2_arm, $self);
}

# internal API for ease-of-use
sub watch_inbox { in2_arm($_[1], $_[0]) };

sub new {
	my ($class, $pi_cfg) = @_;
	my $self = bless {}, $class;
	my $inot;
	if ($ino_cls) {
		$inot = $ino_cls->new or die "E: $ino_cls->new: $!";
		my $io = PublicInbox::In2Tie::io($inot);
		$self->SUPER::new($io, EPOLLIN);
	} else {
		require PublicInbox::FakeInotify;
		$inot = PublicInbox::FakeInotify->new;
	}
	$self->{inot} = $inot;
	$self->{pathmap} = {}; # inboxdir => [ ibx, watch1, watch2, watch3...]
	$self->{on_unlock} = {}; # lock path => ibx
	refresh($self, $pi_cfg) if $pi_cfg;
	PublicInbox::FakeInotify::poll_once($self) if !$ino_cls;
	$self;
}

sub event_step {
	my ($self) = @_;
	eval {
		my @events = $self->{inot}->read; # Linux::Inotify2::read
		my $on_unlock = $self->{on_unlock};
		for my $ev (@events) {
			my $fn = $ev->fullname // next; # cancelled
			if (my $ibx = $on_unlock->{$fn}) {
				$ibx->on_unlock;
			}
		}
	};
	warn "{inot}->read err: $@\n" if $@;
}

# for graceful shutdown in PublicInbox::Daemon,
# just ensure the FD gets closed ASAP and subscribers
sub busy { 0 }

1;
public-inbox-1.9.0/lib/PublicInbox/InboxWritable.pm000066400000000000000000000142411430031475700222150ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors 
# License: AGPL-3.0+ 

# Extends read-only Inbox for writing
package PublicInbox::InboxWritable;
use strict;
use v5.10.1;
use parent qw(PublicInbox::Inbox Exporter);
use PublicInbox::Import;
use PublicInbox::Filter::Base qw(REJECT);
use Errno qw(ENOENT);
our @EXPORT_OK = qw(eml_from_path);
use Fcntl qw(O_RDONLY O_NONBLOCK);

use constant {
	PERM_UMASK => 0,
	OLD_PERM_GROUP => 1,
	OLD_PERM_EVERYBODY => 2,
	PERM_GROUP => 0660,
	PERM_EVERYBODY => 0664,
};

sub new {
	my ($class, $ibx, $creat_opt) = @_;
	return $ibx if ref($ibx) eq $class;
	my $self = bless $ibx, $class;

	# TODO: maybe stop supporting this
	if ($creat_opt) { # for { nproc => $N }
		$self->{-creat_opt} = $creat_opt;
		init_inbox($self) if $self->version == 1;
	}
	$self;
}

sub assert_usable_dir {
	my ($self) = @_;
	my $dir = $self->{inboxdir};
	return $dir if defined($dir) && $dir ne '';
	die "no inboxdir defined for $self->{name}\n";
}

sub _init_v1 {
	my ($self, $skip_artnum) = @_;
	if (defined($self->{indexlevel}) || defined($skip_artnum)) {
		require PublicInbox::SearchIdx;
		require PublicInbox::Msgmap;
		my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create
		$sidx->begin_txn_lazy;
		my $mm = PublicInbox::Msgmap->new_file($self, 1);
		if (defined $skip_artnum) {
			$mm->{dbh}->begin_work;
			$mm->skip_artnum($skip_artnum);
			$mm->{dbh}->commit;
		}
		undef $mm; # ->created_at set
		$sidx->commit_txn_lazy;
	} else {
		open my $fh, '>>', "$self->{inboxdir}/ssoma.lock" or
			die "$self->{inboxdir}/ssoma.lock: $!\n";
	}
}

sub init_inbox {
	my ($self, $shards, $skip_epoch, $skip_artnum) = @_;
	if ($self->version == 1) {
		my $dir = assert_usable_dir($self);
		PublicInbox::Import::init_bare($dir);
		$self->with_umask(\&_init_v1, $self, $skip_artnum);
	} else {
		my $v2w = importer($self);
		$v2w->init_inbox($shards, $skip_epoch, $skip_artnum);
	}
}

sub importer {
	my ($self, $parallel) = @_;
	my $v = $self->version;
	if ($v == 2) {
		eval { require PublicInbox::V2Writable };
		die "v2 not supported: $@\n" if $@;
		my $opt = $self->{-creat_opt};
		my $v2w = PublicInbox::V2Writable->new($self, $opt);
		$v2w->{parallel} = $parallel if defined $parallel;
		$v2w;
	} elsif ($v == 1) {
		my @arg = (undef, undef, undef, $self);
		PublicInbox::Import->new(@arg);
	} else {
		$! = 78; # EX_CONFIG 5.3.5 local configuration error
		die "unsupported inbox version: $v\n";
	}
}

sub filter {
	my ($self, $im) = @_;
	my $f = $self->{filter};
	if ($f && $f =~ /::/) {
		# v2 keeps msgmap open, which causes conflicts for filters
		# such as PublicInbox::Filter::RubyLang which overload msgmap
		# for a predictable serial number.
		if ($im && $self->version >= 2 && $self->{altid}) {
			$im->done;
		}

		my @args = (ibx => $self);
		# basic line splitting, only
		# Perhaps we can have proper quote splitting one day...
		($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;

		eval "require $f";
		if ($@) {
			warn $@;
		} else {
			# e.g: PublicInbox::Filter::Vger->new(@args)
			return $f->new(@args);
		}
	}
	undef;
}

sub eml_from_path ($) {
	my ($path) = @_;
	if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) {
		return unless -f $fh; # no FIFOs or directories
		my $str = do { local $/; <$fh> } or return;
		PublicInbox::Eml->new(\$str);
	} else { # ENOENT is common with Maildir
		warn "failed to open $path: $!\n" if $! != ENOENT;
		undef;
	}
}

sub _each_maildir_eml {
	my ($fn, $kw, $eml, $im, $self) = @_;
	return if grep(/\Adraft\z/, @$kw);
	if ($self && (my $filter = $self->filter($im))) {
		my $ret = $filter->scrub($eml) or return;
		return if $ret == REJECT();
		$eml = $ret;
	}
	$im->add($eml);
}

# XXX does anybody use this?
sub import_maildir {
	my ($self, $dir) = @_;
	foreach my $sub (qw(cur new tmp)) {
		-d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
	}
	my $im = $self->importer(1);
	my @self = $self->filter($im) ? ($self) : ();
	require PublicInbox::MdirReader;
	PublicInbox::MdirReader->new->maildir_each_eml($dir,
					\&_each_maildir_eml, $im, @self);
	$im->done;
}

sub _mbox_eml_cb { # MboxReader->mbox* callback
	my ($eml, $im, $filter) = @_;
	if ($filter) {
		my $ret = $filter->scrub($eml) or return;
		return if $ret == REJECT();
		$eml = $ret;
	}
	$im->add($eml);
}

sub import_mbox {
	my ($self, $fh, $variant) = @_;
	require PublicInbox::MboxReader;
	my $cb = PublicInbox::MboxReader->reads($variant) or
		die "$variant not supported\n";
	my $im = $self->importer(1);
	$cb->(undef, $fh, \&_mbox_eml_cb, $im, $self->filter);
	$im->done;
}

sub _read_git_config_perm {
	my ($self) = @_;
	chomp(my $perm = $self->git->qx('config', 'core.sharedRepository'));
	$perm;
}

sub _git_config_perm {
	my $self = shift;
	my $perm = scalar @_ ? $_[0] : _read_git_config_perm($self);
	return PERM_UMASK if (!defined($perm) || $perm eq '');
	return PERM_UMASK if ($perm eq 'umask');
	return PERM_GROUP if ($perm eq 'group');
	if ($perm =~ /\A(?:all|world|everybody)\z/) {
		return PERM_EVERYBODY;
	}
	return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/);
	return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/);

	my $i = oct($perm);
	return PERM_UMASK if ($i == PERM_UMASK);
	return PERM_GROUP if ($i == OLD_PERM_GROUP);
	return PERM_EVERYBODY if ($i == OLD_PERM_EVERYBODY);

	if (($i & 0600) != 0600) {
		die "core.sharedRepository mode invalid: ".
		    sprintf('%.3o', $i) . "\nOwner must have permissions\n";
	}
	($i & 0666);
}

sub _umask_for {
	my ($perm) = @_; # _git_config_perm return value
	my $rv = $perm;
	return umask if $rv == 0;

	# set +x bit if +r or +w were set
	$rv |= 0100 if ($rv & 0600);
	$rv |= 0010 if ($rv & 0060);
	$rv |= 0001 if ($rv & 0006);
	(~$rv & 0777);
}

sub with_umask {
	my ($self, $cb, @arg) = @_;
	my $old = umask($self->{umask} //= umask_prepare($self));
	my $rv = eval { $cb->(@arg) };
	my $err = $@;
	umask $old;
	die $err if $err;
	$rv;
}

sub umask_prepare {
	my ($self) = @_;
	my $perm = _git_config_perm($self);
	_umask_for($perm);
}

sub cleanup ($) {
	delete @{$_[0]}{qw(over mm git search)};
}

# v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove
sub git_dir_latest {
	my ($self, $max) = @_;
	defined($$max = $self->max_git_epoch) ?
		"$self->{inboxdir}/git/$$max.git" : undef;
}

1;
public-inbox-1.9.0/lib/PublicInbox/InputPipe.pm000066400000000000000000000017741430031475700213700ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# for reading pipes and sockets off the DS event loop
package PublicInbox::InputPipe;
use strict;
use v5.10.1;
use parent qw(PublicInbox::DS);
use PublicInbox::Syscall qw(EPOLLIN EPOLLET);

sub consume {
	my ($in, $cb, @args) = @_;
	my $self = bless { cb => $cb, args => \@args }, __PACKAGE__;
	eval { $self->SUPER::new($in, EPOLLIN|EPOLLET) };
	return $self->requeue if $@; # regular file
	$in->blocking(0); # pipe or socket
}

sub event_step {
	my ($self) = @_;
	my $r = sysread($self->{sock} // return, my $rbuf, 65536);
	if ($r) {
		$self->{cb}->(@{$self->{args} // []}, $rbuf);
		return $self->requeue; # may be regular file or pipe
	}
	if (defined($r)) { # EOF
		$self->{cb}->(@{$self->{args} // []}, '');
	} elsif ($!{EAGAIN}) {
		return;
	} else { # another error
		$self->{cb}->(@{$self->{args} // []}, undef)
	}
	$self->{sock}->blocking ? delete($self->{sock}) : $self->close
}

1;
public-inbox-1.9.0/lib/PublicInbox/Isearch.pm000066400000000000000000000071261430031475700210260ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 

# Provides everything the PublicInbox::Search object does;
# but uses global ExtSearch (->ALL) with an eidx_key query to
# emulate per-Inbox search using ->ALL.
package PublicInbox::Isearch;
use strict;
use v5.10.1;
use PublicInbox::ExtSearch;
use PublicInbox::Search;

sub new {
	my (undef, $ibx, $es) = @_;
	bless { es => $es, eidx_key => $ibx->eidx_key }, __PACKAGE__;
}

sub _ibx_id ($) {
	my ($self) = @_;
	my $sth = $self->{es}->over->dbh->prepare_cached(<<'', undef, 1);
SELECT ibx_id FROM inboxes WHERE eidx_key = ? LIMIT 1

	$sth->execute($self->{eidx_key});
	$sth->fetchrow_array //
		die "E: `$self->{eidx_key}' not in $self->{es}->{topdir}\n";
}

sub query_approxidate { $_[0]->{es}->query_approxidate($_[1], $_[2]) }

sub mset {
	my ($self, $str, $opt) = @_;
	my %opt = $opt ? %$opt : ();
	$opt{eidx_key} = $self->{eidx_key};
	if (my $uid_range = $opt{uid_range}) {
		my ($beg, $end) = @$uid_range;
		my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
		my $dbh = $self->{es}->over->dbh;
		my $sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT MIN(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?

		$sth->execute($ibx_id, $beg, $end);
		my @r = ($sth->fetchrow_array);

		$sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT MAX(docid) FROM xref3 WHERE ibx_id = ? AND xnum >= ? AND xnum <= ?

		$sth->execute($ibx_id, $beg, $end);
		$r[1] = $sth->fetchrow_array;
		if (defined($r[1]) && defined($r[0])) {
			$opt{limit} = $r[1] - $r[0] + 1;
		} else {
			$r[1] //= 0xffffffff;
			$r[0] //= 0;
		}
		$opt{uid_range} = \@r;
	}
	$self->{es}->mset($str, \%opt);
}

sub mset_to_artnums {
	my ($self, $mset, $opt) = @_;
	my $docids = PublicInbox::Search::mset_to_artnums($self->{es}, $mset);
	my $ibx_id = $self->{-ibx_id} //= _ibx_id($self);
	my $qmarks = join(',', map { '?' } @$docids);
	if ($opt && ($opt->{relevance} // 0) == -1) { # -1 => ENQ_ASCENDING
		my $range = '';
		my @r;
		if (my $r = $opt->{uid_range}) {
			$range = 'AND xnum >= ? AND xnum <= ?';
			@r = @$r;
		}
		return $self->{es}->over->dbh->
			selectcol_arrayref(<<"", undef, $ibx_id, @$docids, @r);
SELECT xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks) $range
ORDER BY xnum ASC

	}

	my $rows = $self->{es}->over->dbh->
			selectall_arrayref(<<"", undef, $ibx_id, @$docids);
SELECT docid,xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks)

	my $i = -1;
	my %order = map { $_ => ++$i } @$docids;
	my @xnums;
	for my $row (@$rows) { # @row = ($docid, $xnum)
		my $idx = delete($order{$row->[0]}) // next;
		$xnums[$idx] = $row->[1];
	}
	if (scalar keys %order) {
		warn "W: $self->{es}->{topdir} #",
			join(', ', sort { $a <=> $b } keys %order),
			" not mapped to `$self->{eidx_key}'\n";
		warn "W: $self->{es}->{topdir} may need to be reindexed\n";
		@xnums = grep { defined } @xnums;
	}
	\@xnums;
}

sub mset_to_smsg {
	my ($self, $ibx, $mset) = @_; # $ibx is a real inbox, not eidx
	my $xnums = mset_to_artnums($self, $mset);
	my $i = -1;
	my %order = map { $_ => ++$i } @$xnums;
	my $unordered = $ibx->over->get_all(@$xnums);
	my @msgs;
	for my $smsg (@$unordered) {
		my $idx = delete($order{$smsg->{num}}) // do {
			warn "W: $ibx->{inboxdir} #$smsg->{num}\n";
			next;
		};
		$msgs[$idx] = $smsg;
	}
	if (scalar keys %order) {
		warn "W: $ibx->{inboxdir} #",
			join(', ', sort { $a <=> $b } keys %order),
			" no longer valid\n";
		warn "W: $self->{es}->{topdir} may need to be reindexed\n";
	}
	wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs;
}

sub has_threadid { 1 }

sub help { $_[0]->{es}->help }

1;
public-inbox-1.9.0/lib/PublicInbox/KQNotify.pm000066400000000000000000000057561430031475700211630ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 

# implements the small subset of Linux::Inotify2 functionality we use
# using IO::KQueue on *BSD systems.
package PublicInbox::KQNotify;
use strict;
use v5.10.1;
use IO::KQueue;
use PublicInbox::DSKQXS; # wraps IO::KQueue for fork-safe DESTROY
use PublicInbox::FakeInotify qw(fill_dirlist on_dir_change);
use Time::HiRes qw(stat);

# NOTE_EXTEND detects rename(2), NOTE_WRITE detects link(2)
sub MOVED_TO_OR_CREATE () { NOTE_EXTEND|NOTE_WRITE }

sub new {
	my ($class) = @_;
	bless { dskq => PublicInbox::DSKQXS->new, watch => {} }, $class;
}

sub watch {
	my ($self, $path, $mask) = @_;
	my ($fh, $watch);
	if (-d $path) {
		opendir($fh, $path) or return;
		my @st = stat($fh);
		$watch = bless [ $fh, $path, $st[10] ],
			'PublicInbox::KQNotify::Watchdir';
	} else {
		open($fh, '<', $path) or return;
		$watch = bless [ $fh, $path ],
			'PublicInbox::KQNotify::Watch';
	}
	my $ident = fileno($fh);
	$self->{dskq}->{kq}->EV_SET($ident, # ident (fd)
		EVFILT_VNODE, # filter
		EV_ADD | EV_CLEAR, # flags
		$mask, # fflags
		0, 0); # data, udata
	if ($mask & (MOVED_TO_OR_CREATE|NOTE_DELETE|NOTE_LINK|NOTE_REVOKE)) {
		$self->{watch}->{$ident} = $watch;
		if ($mask & (NOTE_DELETE|NOTE_LINK|NOTE_REVOKE)) {
			fill_dirlist($self, $path, $fh)
		}
	} else {
		die "TODO Not implemented: $mask";
	}
	$watch;
}

# emulate Linux::Inotify::fileno
sub fileno { ${$_[0]->{dskq}->{kq}} }

# noop for Linux::Inotify2 compatibility.  Unlike inotify,
# kqueue doesn't seem to overflow since it's limited by the number of
# open FDs the process has
sub on_overflow {}

# noop for Linux::Inotify2 compatibility, we use `0' timeout for ->kevent
sub blocking {}

# behave like Linux::Inotify2->read
sub read {
	my ($self) = @_;
	my @kevents = $self->{dskq}->{kq}->kevent(0);
	my $events = [];
	my @gone;
	my $watch = $self->{watch};
	for my $kev (@kevents) {
		my $ident = $kev->[KQ_IDENT];
		my $mask = $kev->[KQ_FFLAGS];
		my ($dh, $path, $old_ctime) = @{$watch->{$ident}};
		if (!defined($old_ctime)) {
			push @$events,
				bless(\$path, 'PublicInbox::FakeInotify::Event')
		} elsif ($mask & (MOVED_TO_OR_CREATE|NOTE_DELETE|NOTE_LINK|
				NOTE_REVOKE|NOTE_RENAME)) {
			my @new_st = stat($path);
			if (!@new_st && $!{ENOENT}) {
				push @$events, bless(\$path,
						'PublicInbox::FakeInotify::'.
						'SelfGoneEvent');
				push @gone, $ident;
				delete $self->{dirlist}->{$path};
				next;
			}
			if (!@new_st) {
				warn "unhandled stat($path) error: $!\n";
				next;
			}
			$watch->{$ident}->[3] = $new_st[10]; # ctime
			rewinddir($dh);
			on_dir_change($events, $dh, $path, $old_ctime,
					$self->{dirlist});
		}
	}
	delete @$watch{@gone};
	@$events;
}

package PublicInbox::KQNotify::Watch;
use strict;

sub name { $_[0]->[1] }

sub cancel { close $_[0]->[0] or die "close: $!" }

package PublicInbox::KQNotify::Watchdir;
use strict;

sub name { $_[0]->[1] }

sub cancel { closedir $_[0]->[0] or die "closedir: $!" }

1;
public-inbox-1.9.0/lib/PublicInbox/LEI.pm000066400000000000000000001440441430031475700200620ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# Backend for `lei' (local email interface).  Unlike the C10K-oriented
# PublicInbox::Daemon, this is designed exclusively to handle trusted
# local clients with read/write access to the FS and use as many
# system resources as the local user has access to.
package PublicInbox::LEI;
use v5.12;
use parent qw(PublicInbox::DS PublicInbox::LeiExternal
	PublicInbox::LeiQuery);
use Getopt::Long ();
use Socket qw(AF_UNIX SOCK_SEQPACKET MSG_EOR pack_sockaddr_un);
use Errno qw(EPIPE EAGAIN ECONNREFUSED ENOENT ECONNRESET);
use Cwd qw(getcwd);
use POSIX qw(strftime);
use IO::Handle ();
use Fcntl qw(SEEK_SET);
use PublicInbox::Config;
use PublicInbox::Syscall qw(EPOLLIN);
use PublicInbox::DS qw(dwaitpid);
use PublicInbox::Spawn qw(spawn popen_rd);
use PublicInbox::Lock;
use PublicInbox::Eml;
use PublicInbox::Import;
use PublicInbox::ContentHash qw(git_sha);
use Time::HiRes qw(stat); # ctime comparisons for config cache
use File::Path qw(mkpath);
use File::Spec;
use Sys::Syslog qw(openlog syslog closelog);
our $quit = \&CORE::exit;
our ($current_lei, $errors_log, $listener, $oldset, $dir_idle,
	$recv_cmd, $send_cmd);
my $GLP = Getopt::Long::Parser->new;
$GLP->configure(qw(gnu_getopt no_ignore_case auto_abbrev));
my $GLP_PASS = Getopt::Long::Parser->new;
$GLP_PASS->configure(qw(gnu_getopt no_ignore_case auto_abbrev pass_through));

our %PATH2CFG; # persistent for socket daemon
our $MDIR2CFGPATH; # /path/to/maildir => { /path/to/config => [ ino watches ] }

# TBD: this is a documentation mechanism to show a subcommand
# (may) pass options through to another command:
sub pass_through { $GLP_PASS }

my $OPT;
sub opt_dash ($$) {
	my ($spec, $re_str) = @_; # 'limit|n=i', '([0-9]+)'
	my ($key) = ($spec =~ m/\A([a-z]+)/g);
	my $cb = sub { # Getopt::Long "<>" catch-all handler
		my ($arg) = @_;
		if ($arg =~ /\A-($re_str)\z/) {
			$OPT->{$key} = $1;
		} elsif ($arg eq '--') { # "--" arg separator, ignore first
			push @{$OPT->{-argv}}, $arg if $OPT->{'--'}++;
		# lone (single) dash is handled elsewhere
		} elsif (substr($arg, 0, 1) eq '-') {
			if ($OPT->{'--'}) {
				push @{$OPT->{-argv}}, $arg;
			} else {
				die "bad argument: $arg\n";
			}
		} else {
			push @{$OPT->{-argv}}, $arg;
		}
	};
	($spec, '<>' => $cb, $GLP_PASS) # for Getopt::Long
}

# rel2abs preserves symlinks in parent, unlike abs_path
sub rel2abs {
	my ($self, $p) = @_;
	if (index($p, '/') == 0) { # already absolute
		$p =~ tr!/!/!s; # squeeze redundant slashes
		chop($p) if substr($p, -1, 1) eq '/';
		return $p;
	}
	my $pwd = $self->{env}->{PWD};
	if (defined $pwd) {
		if (my @st_pwd = stat($pwd)) {
			my @st_cwd = stat($self->{3}) or die "stat({3}): $!";
			"@st_pwd[1,0]" eq "@st_cwd[1,0]" or
				$self->{env}->{PWD} = $pwd = undef;
		} else { # PWD was invalid
			$self->{env}->{PWD} = $pwd = undef;
		}
	}
	$pwd //= $self->{env}->{PWD} = getcwd() // die "getcwd: $!";
	File::Spec->rel2abs($p, $pwd);
}

# abs_path resolves symlinks in parent iff all parents exist
sub abs_path { Cwd::abs_path($_[1]) // rel2abs(@_) }

sub canonpath_harder {
	my $p = $_[-1]; # $_[0] may be self
	$p = File::Spec->canonpath($p);
	$p =~ m!(?:/*|\A)\.\.(?:/*|\z)! && -e $p ? Cwd::abs_path($p) : $p;
}

sub share_path ($) { # $HOME/.local/share/lei/$FOO
	my ($self) = @_;
	rel2abs($self, ($self->{env}->{XDG_DATA_HOME} //
		($self->{env}->{HOME} // '/nonexistent').'/.local/share')
		.'/lei');
}

sub store_path ($) { share_path($_[0]) . '/store' }

sub _config_path ($) {
	my ($self) = @_;
	rel2abs($self, ($self->{env}->{XDG_CONFIG_HOME} //
		($self->{env}->{HOME} // '/nonexistent').'/.config')
		.'/lei/config');
}

sub cache_dir ($) {
	my ($self) = @_;
	rel2abs($self, ($self->{env}->{XDG_CACHE_HOME} //
		($self->{env}->{HOME} // '/nonexistent').'/.cache')
		.'/lei');
}

sub url_folder_cache {
	my ($self) = @_;
	require PublicInbox::SharedKV; # URI => updated_at_sec_
	PublicInbox::SharedKV->new(cache_dir($self).'/uri_folder');
}

sub ale {
	my ($self) = @_;
	$self->{ale} // do {
		require PublicInbox::LeiALE;
		my $cfg = $self->_lei_cfg(1);
		$self->{ale} = $cfg->{ale} //= PublicInbox::LeiALE->new($self);
	};
}

sub index_opt {
	# TODO: drop underscore variants everywhere, they're undocumented
	qw(fsync|sync! jobs|j=i indexlevel|L=s compact
	max_size|max-size=s sequential-shard
	batch_size|batch-size=s skip-docdata)
}

my @c_opt = qw(c=s@ C=s@ quiet|q);
my @net_opt = (qw(no-torsocks torsocks=s), PublicInbox::LeiQuery::curl_opt());
my @lxs_opt = qw(remote! local! external! include|I=s@ exclude=s@ only|O=s@
	import-remote!);

# we don't support -C as an alias for --find-copies since it's already
# used for chdir
our @diff_opt = qw(unified|U=i output-indicator-new=s output-indicator-old=s
	output-indicator-context=s indent-heuristic!
	minimal patience histogram anchored=s@ diff-algorithm=s
	color-moved:s color-moved-ws=s no-color-moved no-color-moved-ws
	word-diff:s word-diff-regex=s color-words:s no-renames
	rename-empty! check ws-error-highlight=s full-index binary
	abbrev:i break-rewrites|B:s find-renames|M:s find-copies:s
	find-copies-harder irreversible-delete|D l=i diff-filter=s
	S=s G=s find-object=s pickaxe-all pickaxe-regex O=s R
	relative:s text|a ignore-cr-at-eol ignore-space-at-eol
	ignore-space-change|b ignore-all-space|w ignore-blank-lines
	inter-hunk-context=i function-context|W exit-code ext-diff
	no-ext-diff textconv! src-prefix=s dst-prefix=s no-prefix
	line-prefix=s);

# we generate shell completion + help using %CMD and %OPTDESC,
# see lei__complete() and PublicInbox::LeiHelp
# command => [ positional_args, 1-line description, Getopt::Long option spec ]
our %CMD = ( # sorted in order of importance/use:
'q' => [ '--stdin|SEARCH_TERMS...', 'search for messages matching terms',
	'stdin|', # /|\z/ must be first for lone dash
	@lxs_opt, @net_opt,
	qw(save! output|mfolder|o=s format|f=s dedupe|d=s threads|t+
	sort|s=s reverse|r offset=i pretty jobs|j=s globoff|g augment|a
	import-before! lock=s@ rsyncable alert=s@ mua=s verbose|v+
	shared color! mail-sync!), @c_opt, opt_dash('limit|n=i', '[0-9]+') ],

'up' => [ 'OUTPUT...|--all', 'update saved search',
	qw(jobs|j=s lock=s@ alert=s@ mua=s verbose|v+ exclude=s@
	remote-fudge-time=s all:s remote! local! external!), @net_opt, @c_opt ],

'lcat' => [ '--stdin|MSGID_OR_URL...', 'display local copy of message(s)',
	'stdin|', # /|\z/ must be first for lone dash
	# some of these options are ridiculous for lcat
	@lxs_opt, @net_opt,
	qw(output|mfolder|o=s format|f=s dedupe|d=s threads|t+
	sort|s=s reverse|r offset=i jobs|j=s globoff|g augment|a
	import-before! lock=s@ rsyncable alert=s@ mua=s verbose|v+
	color!), @c_opt, opt_dash('limit|n=i', '[0-9]+') ],

'blob' => [ 'OID', 'show a git blob, reconstructing from mail if necessary',
	qw(git-dir=s@ cwd! verbose|v+ mail! oid-a|A=s path-a|a=s path-b|b=s),
	@lxs_opt, @net_opt, @c_opt ],

'rediff' => [ '--stdin|LOCATION...',
		'regenerate a diff with different options',
	'stdin|', # /|\z/ must be first for lone dash
	qw(git-dir=s@ cwd! verbose|v+ color:s no-color drq:1 dequote-only:1),
	@diff_opt, @lxs_opt, @net_opt, @c_opt ],

'mail-diff' => [ '--stdin|LOCATION...', 'diff the contents of emails',
	'stdin|', # /|\z/ must be first for lone dash
	qw(verbose|v+ in-format|F=s color:s no-color raw-header),
	@diff_opt, @net_opt, @c_opt ],

'add-external' => [ 'LOCATION',
	'add/set priority of a publicinbox|extindex for extra matches',
	qw(boost=i mirror=s inbox-version=i epoch=s verbose|v+),
	@c_opt, index_opt(), @net_opt ],
'ls-external' => [ '[FILTER]', 'list publicinbox|extindex locations',
	qw(format|f=s z|0 globoff|g invert-match|v local remote), @c_opt ],
'ls-label' => [ '', 'list labels', qw(z|0 stats:s), @c_opt ],
'ls-mail-sync' => [ '[FILTER]', 'list mail sync folders',
		qw(z|0 globoff|g invert-match|v local remote), @c_opt ],
'ls-mail-source' => [ 'URL', 'list IMAP or NNTP mail source folders',
		qw(z|0 ascii l pretty url), @net_opt, @c_opt ],
'forget-external' => [ 'LOCATION...|--prune',
	'exclude further results from a publicinbox|extindex',
	qw(prune), @c_opt ],

'ls-search' => [ '[PREFIX]', 'list saved search queries',
		qw(format|f=s pretty l ascii z|0), @c_opt ],
'forget-search' => [ 'OUTPUT...|--prune', 'forget a saved search',
		qw(verbose|v+ prune:s), @c_opt ],
'edit-search' => [ 'OUTPUT', "edit saved search via `git config --edit'",
			@c_opt ],
'rm' => [ '--stdin|LOCATION...',
	'remove a message from the index and prevent reindexing',
	'stdin|', # /|\z/ must be first for lone dash
	qw(in-format|F=s lock=s@), @net_opt, @c_opt ],
'plonk' => [ '--threads|--from=IDENT',
	'exclude mail matching From: or threads from non-Message-ID searches',
	qw(stdin| threads|t from|f=s mid=s oid=s), @c_opt ],
'tag' => [ 'KEYWORDS...',
	'set/unset keywords and/or labels on message(s)',
	qw(stdin| in-format|F=s input|i=s@ oid=s@ mid=s@),
	@net_opt, @c_opt, pass_through('-kw:foo for delete') ],

'purge-mailsource' => [ 'LOCATION|--all',
	'remove imported messages from IMAP, Maildirs, and MH',
	qw(exact! all jobs:i indexed), @c_opt ],

'add-watch' => [ 'LOCATION...', 'watch for new messages and flag changes',
	qw(poll-interval=s state=s recursive|r), @c_opt ],
'rm-watch' => [ 'LOCATION...', 'remove specified watch(es)',
	qw(recursive|r), @c_opt ],
'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status',
		qw(l z|0), @c_opt ],
'pause-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote), @c_opt ],
'resume-watch' => [ '[WATCH_NUMBER_OR_FILTER]', qw(all local remote), @c_opt ],
'forget-watch' => [ '{WATCH_NUMBER|--prune}', 'stop and forget a watch',
	qw(prune), @c_opt ],

'reindex' => [ '', 'reindex all locally-indexed messages', @c_opt ],

'index' => [ 'LOCATION...', 'one-time index from URL or filesystem',
	qw(in-format|F=s kw! offset=i recursive|r exclude=s include|I=s
	verbose|v+ incremental!), @net_opt, # mainly for --proxy=
	 @c_opt ],
'import' => [ 'LOCATION...|--stdin',
	'one-time import/update from URL or filesystem',
	qw(stdin| offset=i recursive|r exclude=s include|I=s new-only
	lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!),
	@net_opt, @c_opt ],
'forget-mail-sync' => [ 'LOCATION...',
	'forget sync information for a mail folder', @c_opt ],
'refresh-mail-sync' => [ 'LOCATION...|--all',
	'prune dangling sync data for a mail folder', 'all:s',
		@net_opt, @c_opt ],
'export-kw' => [ 'LOCATION...|--all',
	'one-time export of keywords of sync sources',
	qw(all:s mode=s), @net_opt, @c_opt ],
'convert' => [ 'LOCATION...|--stdin',
	'one-time conversion from URL or filesystem to another format',
	qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!),
	@net_opt, @c_opt ],
'p2q' => [ 'LOCATION_OR_COMMIT...|--stdin',
	"use a patch to generate a query for `lei q --stdin'",
	qw(stdin| in-format|F=s want|w=s@ uri debug), @net_opt, @c_opt ],
'config' => [ '[...]', sub {
		'git-config(1) wrapper for '._config_path($_[0]);
	}, qw(config-file|system|global|file|f=s), # for conflict detection
	 qw(edit|e c=s@ C=s@), pass_through('git config') ],
'inspect' => [ 'ITEMS...|--stdin', 'inspect lei/store and/or local external',
	qw(stdin| pretty ascii dir|d=s), @c_opt ],

'init' => [ '[DIRNAME]', sub {
	"initialize storage, default: ".store_path($_[0]);
	}, @c_opt ],
'daemon-kill' => [ '[-SIGNAL]', 'signal the lei-daemon',
	# "-C DIR" conflicts with -CHLD, here, and chdir makes no sense, here
	opt_dash('signal|s=s', '[0-9]+|(?:[A-Z][A-Z0-9]+)') ],
'daemon-pid' => [ '', 'show the PID of the lei-daemon' ],
'help' => [ '[SUBCOMMAND]', 'show help' ],

# TODO
#'reorder-local-store-and-break-history' => [ '[REFNAME]',
#	'rewrite git history in an attempt to improve compression',
#	qw(gc!), @c_opt ],
#'fuse-mount' => [ 'PATHNAME', 'expose lei/store as Maildir(s)', @c_opt ],
#
# internal commands are prefixed with '_'
'_complete' => [ '[...]', 'internal shell completion helper',
		pass_through('everything') ],
); # @CMD

# switch descriptions, try to keep consistent across commands
# $spec: Getopt::Long option specification
# $spec => [@ALLOWED_VALUES (default is first), $description],
# $spec => $description
# "$SUB_COMMAND TAB $spec" => as above
my $stdin_formats = [ 'MAIL_FORMAT|eml|mboxrd|mboxcl2|mboxcl|mboxo',
			'specify message input format' ];
my $ls_format = [ 'OUT|plain|json|null', 'listing output format' ];

# we use \x{a0} (non-breaking SP) to avoid wrapping in PublicInbox::LeiHelp
my %OPTDESC = (
'help|h' => 'show this built-in help',
'c=s@' => [ 'NAME=VALUE', 'set config option' ],
'C=s@' => [ 'DIR', 'chdir to specify to directory' ],
'quiet|q' => 'be quiet',
'lock=s@' => [ 'METHOD|dotlock|fcntl|flock|none',
	'mbox(5) locking method(s) to use (default: fcntl,dotlock)' ],

'incremental!	import' => 'import already seen IMAP and NNTP articles',
'globoff|g' => "do not match locations using '*?' wildcards ".
		"and\xa0'[]'\x{a0}ranges",
'invert-match|v' => 'select non-matching lines',
'color!' => 'disable color (for --format=text)',
'verbose|v+' => 'be more verbose',
'external!' => 'do not use externals',
'mail!' => 'do not look in mail storage for OID',
'cwd!' => 'do not look in git repo of current working directory',
'oid-a|A=s' => 'pre-image OID',
'path-a|a=s' => 'pre-image pathname associated with OID',
'path-b|b=s' => 'post-image pathname associated with OID',
'git-dir=s@' => 'additional git repository to scan',
'dir|d=s	inspect' =>
	'specify a inboxdir, extindex topdir or Xapian shard',
'proxy=s' => [ 'PROTO://HOST[:PORT]', # shared with curl(1)
	"proxy for (e.g. `socks5h://0:9050')" ],
'torsocks=s' => ['VAL|auto|no|yes',
		'whether or not to wrap git and curl commands with torsocks'],
'no-torsocks' => 'alias for --torsocks=no',
'save!' =>  "do not save a search for `lei up'",
'import-remote!' => 'do not memoize remote messages into local store',

'type=s' => [ 'any|mid|git', 'disambiguate type' ],

'dedupe|d=s' => ['STRATEGY|content|oid|mid|none',
		'deduplication strategy'],
'threads|t+' =>
	'return all messages in the same threads as the actual match(es)',

'want|w=s@' => [ 'PREFIX|dfpost|dfn', # common ones in help...
		'search prefixes to extract (default: dfpost7)' ],
'uri	p2q' => [ 'URI escape output' ],

'alert=s@' => ['CMD,:WINCH,:bell,',
	'run command(s) or perform ops when done writing to output ' .
	'(default: ":WINCH,:bell" with --mua and Maildir/IMAP output, ' .
	'nothing otherwise)' ],

'augment|a' => 'augment --output destination instead of clobbering',

'output|mfolder|o=s' => [ 'MFOLDER',
	"destination (e.g.\xa0`/path/to/Maildir', ".
	"or\xa0`-'\x{a0}for\x{a0}stdout)" ],
'mua=s' => [ 'CMD',
	"MUA to run on --output Maildir or mbox (e.g.\xa0`mutt\xa0-f\xa0%f')" ],
'new-only	import' => 'only import new messages from IMAP source',

'inbox-version=i' => [ 'NUM|1|2',
		'force a public-inbox version with --mirror'],
'mirror=s' => [ 'URL', 'mirror a public-inbox'],

# public-inbox-index options
'fsync!' => 'speed up indexing after --mirror, risk index corruption',
'compact' => 'run compact index after mirroring',
'indexlevel|L=s' => [ 'LEVEL|full|medium|basic',
	"indexlevel with --mirror (default: full)" ],
'max_size|max-size=s' => [ 'SIZE',
	'do not index messages larger than SIZE (default: infinity)' ],
'batch_size|batch-size=s' => [ 'SIZE',
	'flush changes to OS after given number of bytes (default: 1m)' ],
'sequential-shard' =>
	'index Xapian shards sequentially for slow storage',
'skip-docdata' =>
	'drop compatibility w/ public-inbox <1.6 to save ~1.5% space',

'format|f=s	q' => [
	'OUT|maildir|mboxrd|mboxcl2|mboxcl|mboxo|html|json|jsonl|concatjson',
		'specify output format, default depends on --output'],
'exclude=s@	q' => [ 'LOCATION',
		'exclude specified external(s) from search' ],
'include|I=s@	q' => [ 'LOCATION',
		'include specified external(s) in search' ],
'only|O=s@	q' => [ 'LOCATION',
		'only use specified external(s) for search' ],
'jobs=s	q' => [ '[SEARCH_JOBS][,WRITER_JOBS]',
		'control number of search and writer jobs' ],
'jobs|j=i	add-external' => 'set parallelism when indexing after --mirror',

'in-format|F=s' => $stdin_formats,
'format|f=s	ls-search' => ['OUT|json|jsonl|concatjson',
			'listing output format' ],
'l	ls-search' => 'long listing format',
'l	ls-watch' => 'long listing format',
'l	ls-mail-source' => 'long listing format',
'url	ls-mail-source' => 'show full URL of newsgroup or IMAP folder',
'format|f=s	ls-external' => $ls_format,

'prune:s	forget-search' =>
	['TYPE|local|remote', 'prune all, remote or local folders' ],

'limit|n=i@' => ['NUM', 'limit on number of matches (default: 10000)' ],
'offset=i' => ['OFF', 'search result offset (default: 0)'],

'sort|s=s' => [ 'VAL|received|relevance|docid',
		"order of results is `--output'-dependent"],
'reverse|r' => 'reverse search results', # like sort(1)

'boost=i' => 'increase/decrease priority of results (default: 0)',

'local' => 'limit operations to the local filesystem',
'local!' => 'exclude results from the local filesystem',
'remote' => 'limit operations to those requiring network access',
'remote!' => 'prevent operations requiring network access',

# up, refresh-mail-sync, export-kw
'all:s' => ['TYPE|local|remote', 'all remote or local folders' ],

'remote-fudge-time=s' => [ 'INTERVAL',
	'look for mail INTERVAL older than the last successful query' ],

'mid=s' => 'specify the Message-ID of a message',
'oid=s' => 'specify the git object ID of a message',

'recursive|r' => 'scan directories/mailboxes/newsgroups recursively',
'exclude=s' => 'exclude mailboxes/newsgroups based on pattern',
'include=s' => 'include mailboxes/newsgroups based on pattern',

'exact' => 'operate on exact header matches only',
'exact!' => 'rely on content match instead of exact header matches',

'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ],

'kw!' => 'disable/enable importing keywords (aka "flags")',

# xargs, env, use "-0", git(1) uses "-z".  We support z|0 everywhere
'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines',

'signal|s=s' => [ 'SIG', 'signal to send lei-daemon (default: TERM)' ],
); # %OPTDESC

my %CONFIG_KEYS = (
	'leistore.dir' => 'top-level storage location',
);

my @WQ_KEYS = qw(lxs l2m ikw pmd wq1 lne v2w); # internal workers

sub _drop_wq {
	my ($self) = @_;
	for my $wq (grep(defined, delete(@$self{@WQ_KEYS}))) {
		$wq->wq_kill('-TERM');
		$wq->DESTROY;
	}
}

# pronounced "exit": x_it(1 << 8) => exit(1); x_it(13) => SIGPIPE
sub x_it ($$) {
	my ($self, $code) = @_;
	local $current_lei = $self;
	# make sure client sees stdout before exit
	$self->{1}->autoflush(1) if $self->{1};
	stop_pager($self);
	if ($self->{pkt_op_p}) { # worker => lei-daemon
		$self->{pkt_op_p}->pkt_do('x_it', $code);
	} elsif ($self->{sock}) { # lei->daemon => lei(1) client
		send($self->{sock}, "x_it $code", MSG_EOR);
	} elsif ($quit == \&CORE::exit) { # an admin (one-shot) command
		exit($code >> 8);
	} # else ignore if client disconnected
}

sub err ($;@) {
	my $self = shift;
	my $err = $self->{2} // ($self->{pgr} // [])->[2] // *STDERR{GLOB};
	my @eor = (substr($_[-1]//'', -1, 1) eq "\n" ? () : ("\n"));
	print $err @_, @eor and return;
	my $old_err = delete $self->{2};
	close($old_err) if $! == EPIPE && $old_err;
	$err = $self->{2} = ($self->{pgr} // [])->[2] // *STDERR{GLOB};
	print $err @_, @eor or print STDERR @_, @eor;
}

sub qerr ($;@) { $_[0]->{opt}->{quiet} or err(shift, @_) }

sub qfin { # show message on finalization (LeiFinmsg)
	my ($lei, $msg) = @_;
	return if $lei->{opt}->{quiet};
	$lei->{fmsg} ? push(@{$lei->{fmsg}}, "$msg\n") : qerr($lei, $msg);
}

sub fail_handler ($;$$) {
	my ($lei, $code, $io) = @_;
	local $current_lei = $lei;
	close($io) if $io; # needed to avoid warnings on SIGPIPE
	_drop_wq($lei);
	x_it($lei, $code // (1 << 8));
}

sub sigpipe_handler { # handles SIGPIPE from @WQ_KEYS workers
	fail_handler($_[0], 13, delete $_[0]->{1});
}

sub fail ($$;$) {
	my ($self, $msg, $exit_code) = @_;
	local $current_lei = $self;
	$self->{failed}++;
	warn(substr($msg, -1, 1) eq "\n" ? $msg : "$msg\n") if defined $msg;
	$self->{pkt_op_p}->pkt_do('fail_handler') if $self->{pkt_op_p};
	x_it($self, ($exit_code // 1) << 8);
	undef;
}

sub out ($;@) {
	my $self = shift;
	return if print { $self->{1} // return } @_; # likely
	return note_sigpipe($self, 1) if $! == EPIPE;
	my $err = "error writing to output: $!";
	delete $self->{1};
	fail($self, $err);
}

sub puts ($;@) { out(shift, map { "$_\n" } @_) }

sub child_error { # passes non-fatal curl exit codes to user
	my ($self, $child_error, $msg) = @_; # child_error is $?
	local $current_lei = $self;
	$child_error ||= 1 << 8;
	warn(substr($msg, -1, 1) eq "\n" ? $msg : "$msg\n") if defined $msg;
	if ($self->{pkt_op_p}) { # to top lei-daemon
		$self->{pkt_op_p}->pkt_do('child_error', $child_error);
	} elsif ($self->{sock}) { # to lei(1) client
		send($self->{sock}, "child_error $child_error", MSG_EOR);
	} else { # non-lei admin command
		$self->{child_error} ||= $child_error;
	} # else noop if client disconnected
}

sub note_sigpipe { # triggers sigpipe_handler
	my ($self, $fd) = @_;
	close(delete($self->{$fd})); # explicit close silences Perl warning
	$self->{pkt_op_p}->pkt_do('sigpipe_handler') if $self->{pkt_op_p};
	x_it($self, 13);
}

sub _lei_atfork_child {
	my ($self, $persist) = @_;
	# we need to explicitly close things which are on stack
	if ($persist) {
		open $self->{3}, '<', '/' or die "open(/) $!";
		fchdir($self);
		close($_) for (grep(defined, delete @$self{qw(0 1 2 sock)}));
		if (my $cfg = $self->{cfg}) {
			delete @$cfg{qw(-lei_store -watches -lei_note_event)};
		}
	} else { # worker, Net::NNTP (Net::Cmd) uses STDERR directly
		open STDERR, '+>&='.fileno($self->{2}) or warn "open $!";
		STDERR->autoflush(1);
		POSIX::setpgid(0, $$) // die "setpgid(0, $$): $!";
	}
	close($_) for (grep(defined, delete @$self{qw(old_1 au_done)}));
	delete $self->{-socks};
	if (my $op_c = delete $self->{pkt_op_c}) {
		close(delete $op_c->{sock});
	}
	if (my $pgr = delete $self->{pgr}) {
		close($_) for (@$pgr[1,2]);
	}
	close $listener if $listener;
	undef $listener;
	$dir_idle->force_close if $dir_idle;
	undef $dir_idle;
	%PATH2CFG = ();
	$MDIR2CFGPATH = {};
	eval 'no warnings; undef $PublicInbox::LeiNoteEvent::to_flush';
	undef $errors_log;
	$quit = \&CORE::exit;
	if (!$self->{-eml_noisy}) { # only "lei import" sets this atm
		my $cb = $SIG{__WARN__} // \&CORE::warn;
		$SIG{__WARN__} = sub {
			$cb->(@_) unless PublicInbox::Eml::warn_ignore(@_)
		};
	}
	$SIG{TERM} = sub { exit(128 + 15) };
	$current_lei = $persist ? undef : $self; # for SIG{__WARN__}
}

sub _delete_pkt_op { # OnDestroy callback to prevent leaks on die
	my ($self) = @_;
	if (my $op = delete $self->{pkt_op_c}) { # in case of die
		$op->close; # PublicInbox::PktOp::close
	}
	my $pkt_op_p = delete($self->{pkt_op_p}) or return;
	close $pkt_op_p->{op_p};
}

sub pkt_op_pair {
	my ($self) = @_;
	require PublicInbox::OnDestroy;
	require PublicInbox::PktOp;
	my $end = PublicInbox::OnDestroy->new($$, \&_delete_pkt_op, $self);
	@$self{qw(pkt_op_c pkt_op_p)} = PublicInbox::PktOp->pair;
	$end;
}

sub incr {
	my ($self, $field, $nr) = @_;
	$self->{counters}->{$field} += $nr;
}

sub pkt_ops {
	my ($lei, $ops) = @_;
	$ops->{fail_handler} = [ $lei ];
	$ops->{sigpipe_handler} = [ $lei ];
	$ops->{x_it} = [ $lei ];
	$ops->{child_error} = [ $lei ];
	$ops->{incr} = [ $lei ];
	$ops;
}

sub workers_start {
	my ($lei, $wq, $jobs, $ops, $flds) = @_;
	$ops //= {};
	($wq->can('net_merge_all_done') && $lei->{auth}) and
		$lei->{auth}->op_merge($ops, $wq, $lei);
	pkt_ops($lei, $ops);
	$ops->{''} //= [ $wq->can('_lei_wq_eof') || \&wq_eof, $lei ];
	my $end = $lei->pkt_op_pair;
	my $ident = $wq->{-wq_ident} // "lei-$lei->{cmd} worker";
	$flds->{lei} = $lei;
	$wq->wq_workers_start($ident, $jobs, $lei->oldset, $flds);
	delete $lei->{pkt_op_p};
	my $op_c = delete $lei->{pkt_op_c};
	@$end = ();
	$lei->event_step_init;
	$wq->wq_wait_async($wq->can('_wq_done_wait') // \&wq_done_wait, $lei);
	($op_c, $ops);
}

# call this when we're ready to wait on events and yield to other clients
sub wait_wq_events {
	my ($lei, $op_c, $ops) = @_;
	my $wq1 = $lei->{wq1};
	($wq1 && $wq1->can('net_merge_all_done') && !$lei->{auth}) and
		$wq1->net_merge_all_done;
	for my $wq (grep(defined, @$lei{qw(ikw pmd)})) { # auxiliary WQs
		$wq->wq_close;
	}
	$wq1->{lei_sock} = $lei->{sock} if $wq1;
	$op_c->{ops} = $ops;
}

sub wq1_start {
	my ($lei, $wq, $jobs) = @_;
	my ($op_c, $ops) = workers_start($lei, $wq, $jobs // 1);
	$lei->{wq1} = $wq;
	wait_wq_events($lei, $op_c, $ops); # net_merge_all_done if !{auth}
}

sub _help {
	require PublicInbox::LeiHelp;
	PublicInbox::LeiHelp::call($_[0], $_[1], \%CMD, \%OPTDESC);
}

sub optparse ($$$) {
	my ($self, $cmd, $argv) = @_;
	# allow _complete --help to complete, not show help
	return 1 if substr($cmd, 0, 1) eq '_';
	$self->{cmd} = $cmd;
	$OPT = $self->{opt} //= {};
	my $info = $CMD{$cmd} // [ '[...]' ];
	my ($proto, undef, @spec) = @$info;
	my $glp = ref($spec[-1]) eq ref($GLP) ? pop(@spec) : $GLP;
	push @spec, qw(help|h);
	my $lone_dash;
	if ($spec[0] =~ s/\|\z//s) { # "stdin|" or "clear|" allows "-" alias
		$lone_dash = $spec[0];
		$OPT->{$spec[0]} = \(my $var);
		push @spec, '' => \$var;
	}
	$glp->getoptionsfromarray($argv, $OPT, @spec) or
		return _help($self, "bad arguments or options for $cmd");
	return _help($self) if $OPT->{help};

	push @$argv, @{$OPT->{-argv}} if defined($OPT->{-argv});

	# "-" aliases "stdin" or "clear"
	$OPT->{$lone_dash} = ${$OPT->{$lone_dash}} if defined $lone_dash;

	my $i = 0;
	my $POS_ARG = '[A-Z][A-Z0-9_]+';
	my ($err, $inf);
	my @args = split(/ /, $proto);
	for my $var (@args) {
		if ($var =~ /\A$POS_ARG\.\.\.\z/o) { # >= 1 args;
			$inf = defined($argv->[$i]) and last;
			$var =~ s/\.\.\.\z//;
			$err = "$var not supplied";
		} elsif ($var =~ /\A$POS_ARG\z/o) { # required arg at $i
			$argv->[$i++] // ($err = "$var not supplied");
		} elsif ($var =~ /\.\.\.\]\z/) { # optional args start
			$inf = 1;
			last;
		} elsif ($var =~ /\A\[-?$POS_ARG\]\z/) { # one optional arg
			$i++;
		} elsif ($var =~ /\A.+?\|/) { # required FOO|--stdin
			$inf = 1 if index($var, '...') > 0;
			my @or = split(/\|/, $var);
			my $ok;
			for my $o (@or) {
				if ($o =~ /\A--([a-z0-9\-]+)/) {
					my $sw = $1;
					# assume pipe/regular file on stdin
					# w/o args means stdin
					if ($sw eq 'stdin' && !@$argv &&
							(-p $self->{0} ||
							 -f _) && -r _) {
						$OPT->{stdin} //= 1;
					}
					$ok = defined($OPT->{$sw});
					last if $ok;
				} elsif (defined($argv->[$i])) {
					$ok = 1;
					$i++;
					last;
				} # else continue looping
			}
			last if $ok;
			my $last = pop @or;
			$err = join(', ', @or) . " or $last must be set";
		} else {
			warn "BUG: can't parse `$var' in $proto";
		}
		last if $err;
	}
	if (!$inf && scalar(@$argv) > scalar(@args)) {
		$err //= 'too many arguments';
	}
	$err ? fail($self, "usage: lei $cmd $proto\nE: $err") : 1;
}

sub _tmp_cfg { # for lei -c = ...
	my ($self) = @_;
	my $cfg = _lei_cfg($self, 1);
	require File::Temp;
	my $ft = File::Temp->new(TEMPLATE => 'lei_cfg-XXXX', TMPDIR => 1);
	my $tmp = { '-f' => $ft->filename, -tmp => $ft };
	$ft->autoflush(1);
	print $ft <{-f}: $!");
[include]
	path = $cfg->{-f}
EOM
	$tmp = $self->{cfg} = bless { %$cfg, %$tmp }, ref($cfg);
	for (@{$self->{opt}->{c}}) {
		/\A([^=\.]+\.[^=]+)(?:=(.*))?\z/ or return fail($self, <='
EOM
		my $name = $1;
		my $value = $2 // 1;
		_config($self, '--add', $name, $value);
		if (defined(my $v = $tmp->{$name})) {
			if (ref($v) eq 'ARRAY') {
				push @$v, $value;
			} else {
				$tmp->{$name} = [ $v, $value ];
			}
		} else {
			$tmp->{$name} = $value;
		}
	}
}

sub lazy_cb ($$$) {
	my ($self, $cmd, $pfx) = @_;
	my $ucmd = $cmd;
	$ucmd =~ tr/-/_/;
	my $cb;
	$cb = $self->can($pfx.$ucmd) and return $cb;
	my $base = $ucmd;
	$base =~ s/_([a-z])/\u$1/g;
	my $pkg = "PublicInbox::Lei\u$base";
	($INC{"PublicInbox/Lei\u$base.pm"} // eval("require $pkg")) ?
		$pkg->can($pfx.$ucmd) : undef;
}

sub dispatch {
	my ($self, $cmd, @argv) = @_;
	fchdir($self);
	local %ENV = %{$self->{env}};
	local $current_lei = $self; # for __WARN__
	$self->{2}->autoflush(1); # keep stdout buffered until x_it|DESTROY
	return _help($self, 'no command given') unless defined($cmd);
	# do not support Getopt bundling for this
	while ($cmd eq '-C' || $cmd eq '-c') {
		my $v = shift(@argv) // return fail($self, $cmd eq '-C' ?
					'-C DIRECTORY' : '-c =');
		push @{$self->{opt}->{substr($cmd, 1, 1)}}, $v;
		$cmd = shift(@argv) // return _help($self, 'no command given');
	}
	if (my $cb = lazy_cb(__PACKAGE__, $cmd, 'lei_')) {
		optparse($self, $cmd, \@argv) or return;
		$self->{opt}->{c} and (_tmp_cfg($self) // return);
		if (my $chdir = $self->{opt}->{C}) {
			for my $d (@$chdir) {
				next if $d eq ''; # same as git(1)
				chdir $d or return fail($self, "cd $d: $!");
			}
			open $self->{3}, '<', '.' or
				return fail($self, "open . $!");
		}
		$cb->($self, @argv);
	} elsif (grep(/\A-/, $cmd, @argv)) { # --help or -h only
		$GLP->getoptionsfromarray([$cmd, @argv], {}, qw(help|h C=s@))
			or return _help($self, 'bad arguments or options');
		_help($self);
	} else {
		fail($self, "`$cmd' is not an lei command");
	}
}

sub _lei_cfg ($;$) {
	my ($self, $creat) = @_;
	return $self->{cfg} if $self->{cfg};
	my $f = _config_path($self);
	my @st = stat($f);
	my $cur_st = @st ? pack('dd', $st[10], $st[7]) : ''; # 10:ctime, 7:size
	my ($sto, $sto_dir, $watches, $lne);
	if (my $cfg = $PATH2CFG{$f}) { # reuse existing object in common case
		return ($self->{cfg} = $cfg) if $cur_st eq $cfg->{-st};
		($sto, $sto_dir, $watches, $lne) =
				@$cfg{qw(-lei_store leistore.dir -watches
					-lei_note_event)};
	}
	if (!@st) {
		unless ($creat) {
			delete $self->{cfg};
			return bless {}, 'PublicInbox::Config';
		}
		my ($cfg_dir) = ($f =~ m!(.*?/)[^/]+\z!);
		-d $cfg_dir or mkpath($cfg_dir) or die "mkpath($cfg_dir): $!\n";
		open my $fh, '>>', $f or die "open($f): $!\n";
		@st = stat($fh) or die "fstat($f): $!\n";
		$cur_st = pack('dd', $st[10], $st[7]);
		qerr($self, "# $f created") if $self->{cmd} ne 'config';
	}
	my $cfg = PublicInbox::Config->git_config_dump($f, $self->{2});
	$cfg->{-st} = $cur_st;
	$cfg->{'-f'} = $f;
	if ($sto && canonpath_harder($sto_dir // store_path($self))
			eq canonpath_harder($cfg->{'leistore.dir'} //
						store_path($self))) {
		$cfg->{-lei_store} = $sto;
		$cfg->{-lei_note_event} = $lne;
		$cfg->{-watches} = $watches if $watches;
	}
	if (scalar(keys %PATH2CFG) > 5) {
		# FIXME: use inotify/EVFILT_VNODE to detect unlinked configs
		delete(@PATH2CFG{grep(!-f, keys %PATH2CFG)});
	}
	$self->{cfg} = $PATH2CFG{$f} = $cfg;
	refresh_watches($self);
	$cfg;
}

sub _lei_store ($;$) {
	my ($self, $creat) = @_;
	my $cfg = _lei_cfg($self, $creat) // return;
	$cfg->{-lei_store} //= do {
		require PublicInbox::LeiStore;
		my $dir = $cfg->{'leistore.dir'} // store_path($self);
		return unless $creat || -d $dir;
		PublicInbox::LeiStore->new($dir, { creat => $creat });
	};
}

sub _config {
	my ($self, @argv) = @_;
	my %env = (%{$self->{env}}, GIT_CONFIG => undef);
	my $cfg = _lei_cfg($self, 1);
	my $cmd = [ qw(git config -f), $cfg->{'-f'}, @argv ];
	my %rdr = map { $_ => $self->{$_} } (0..2);
	waitpid(spawn($cmd, \%env, \%rdr), 0);
}

sub lei_daemon_pid { puts shift, $$ }

sub lei_daemon_kill {
	my ($self) = @_;
	my $sig = $self->{opt}->{signal} // 'TERM';
	kill($sig, $$) or fail($self, "kill($sig, $$): $!");
}

# Shell completion helper.  Used by lei-completion.bash and hopefully
# other shells.  Try to do as much here as possible to avoid redundancy
# and improve maintainability.
sub lei__complete {
	my ($self, @argv) = @_; # argv = qw(lei and any other args...)
	shift @argv; # ignore "lei", the entire command is sent
	@argv or return puts $self, grep(!/^_/, keys %CMD), qw(--help -h -C);
	my $cmd = shift @argv;
	my $info = $CMD{$cmd} // do { # filter matching commands
		@argv or puts $self, grep(/\A\Q$cmd\E/, keys %CMD);
		return;
	};
	my ($proto, undef, @spec) = @$info;
	my $cur = pop @argv;
	my $re = defined($cur) ? qr/\A\Q$cur\E/ : qr/./;
	if (substr(my $_cur = $cur // '-', 0, 1) eq '-') { # --switches
		# gross special case since the only git-config options
		# Consider moving to a table if we need more special cases
		# we use Getopt::Long for are the ones we reject, so these
		# are the ones we don't reject:
		if ($cmd eq 'config') {
			puts $self, grep(/$re/, keys %CONFIG_KEYS);
			@spec = qw(add z|null get get-all unset unset-all
				replace-all get-urlmatch
				remove-section rename-section
				name-only list|l edit|e
				get-color-name get-colorbool);
			# fall-through
		}
		# generate short/long names from Getopt::Long specs
		puts $self, grep(/$re/, qw(--help -h -C), map {
			if (s/[:=].+\z//) { # req/optional args, e.g output|o=i
			} elsif (s/\+\z//) { # verbose|v+
			} elsif (s/!\z//) {
				# negation: mail! => no-mail|mail
				s/([\w\-]+)/$1|no-$1/g
			}
			map {
				my $x = length > 1 ? "--$_" : "-$_";
				$x eq $_cur ? () : $x;
			} grep(!/_/, split(/\|/, $_, -1)) # help|h
		} grep { $OPTDESC{"$_\t$cmd"} || $OPTDESC{$_} } @spec);
	} elsif ($cmd eq 'config' && !@argv && !$CONFIG_KEYS{$cur}) {
		puts $self, grep(/$re/, keys %CONFIG_KEYS);
	}

	# switch args (e.g. lei q -f mbox)
	if (($argv[-1] // $cur // '') =~ /\A--?([\w\-]+)\z/) {
		my $opt = quotemeta $1;
		puts $self, map {
			my $v = $OPTDESC{$_};
			my @v = ref($v) ? split(/\|/, $v->[0]) : ();
			# get rid of ALL CAPS placeholder (e.g "OUT")
			# (TODO: completion for external paths)
			shift(@v) if scalar(@v) && uc($v[0]) eq $v[0];
			@v;
		} grep(/\A(?:[\w-]+\|)*$opt\b.*?(?:\t$cmd)?\z/, keys %OPTDESC);
	}
	if (my $cb = lazy_cb($self, $cmd, '_complete_')) {
		puts $self, $cb->($self, @argv, $cur ? ($cur) : ());
	}
	# TODO: URLs, pathnames, OIDs, MIDs, etc...  See optparse() for
	# proto parsing.
}

sub exec_buf ($$) {
	my ($argv, $env) = @_;
	my $argc = scalar @$argv;
	my $buf = 'exec '.join("\0", scalar(@$argv), @$argv);
	while (my ($k, $v) = each %$env) { $buf .= "\0$k=$v" };
	$buf;
}

sub start_mua {
	my ($self) = @_;
	if ($self->{ovv}->{fmt} =~ /\A(?:maildir)\z/) { # TODO: IMAP
		refresh_watches($self);
	}
	my $mua = $self->{opt}->{mua} // return;
	my $mfolder = $self->{ovv}->{dst};
	my (@cmd, $replaced);
	if ($mua =~ /\A(?:mutt|mailx|mail|neomutt)\z/) {
		@cmd = ($mua, '-f');
	# TODO: help wanted: other common FOSS MUAs
	} else {
		require Text::ParseWords;
		@cmd = Text::ParseWords::shellwords($mua);
		# mutt uses '%f' for open-hook with compressed mbox, we follow
		@cmd = map { $_ eq '%f' ? ($replaced = $mfolder) : $_ } @cmd;
	}
	push @cmd, $mfolder unless defined($replaced);
	if ($self->{sock}) { # lei(1) client process runs it
		# restore terminal: echo $query | lei q --stdin --mua=...
		my $io = [];
		$io->[0] = $self->{1} if $self->{opt}->{stdin} && -t $self->{1};
		send_exec_cmd($self, $io, \@cmd, {});
	}
	if ($self->{lxs} && $self->{au_done}) { # kick wait_startq
		syswrite($self->{au_done}, 'q' x ($self->{lxs}->{jobs} // 0));
	}
	return unless -t $self->{2}; # XXX how to determine non-TUI MUAs?
	$self->{opt}->{quiet} = 1;
	delete $self->{-progress};
	delete $self->{opt}->{verbose};
}

sub send_exec_cmd { # tell script/lei to execute a command
	my ($self, $io, $cmd, $env) = @_;
	my $sock = $self->{sock} // die 'lei client gone';
	my $fds = [ map { fileno($_) } @$io ];
	$send_cmd->($sock, $fds, exec_buf($cmd, $env), MSG_EOR);
}

sub poke_mua { # forces terminal MUAs to wake up and hopefully notice new mail
	my ($self) = @_;
	my $alerts = $self->{opt}->{alert} // return;
	my $sock = $self->{sock};
	while (my $op = shift(@$alerts)) {
		if ($op eq ':WINCH') {
			# hit the process group that started the MUA
			send($sock, '-WINCH', MSG_EOR) if $sock;
		} elsif ($op eq ':bell') {
			out($self, "\a");
		} elsif ($op =~ /(? 0, '/dev/stdout' => 1, '/dev/stderr' => 2);
$path_to_fd{"/dev/fd/$_"} = $_ for (0..2);

# this also normalizes the path
sub path_to_fd {
	my ($self, $path) = @_;
	$path = rel2abs($self, $path);
	$path =~ tr!/!/!s;
	$path_to_fd{$path} // (
		($path =~ m!\A/(?:dev|proc/self)/fd/[0-9]+\z!) ?
			fail($self, "cannot open $path from daemon") : -1
	);
}

# caller needs to "-t $self->{1}" to check if tty
sub start_pager {
	my ($self, $new_env) = @_;
	my $fh = popen_rd([qw(git var GIT_PAGER)]);
	chomp(my $pager = <$fh> // '');
	close($fh) or warn "`git var PAGER' error: \$?=$?";
	return if $pager eq 'cat' || $pager eq '';
	$new_env //= {};
	$new_env->{LESS} //= 'FRX';
	$new_env->{LV} //= '-c';
	$new_env->{MORE} = $new_env->{LESS} if $^O eq 'freebsd';
	pipe(my ($r, $wpager)) or return warn "pipe: $!";
	my $rdr = { 0 => $r, 1 => $self->{1}, 2 => $self->{2} };
	my $pgr = [ undef, @$rdr{1, 2} ];
	my $env = $self->{env};
	if ($self->{sock}) { # lei(1) process runs it
		delete @$new_env{keys %$env}; # only set iff unset
		send_exec_cmd($self, [ @$rdr{0..2} ], [$pager], $new_env);
	} else {
		die 'BUG: start_pager w/o socket';
	}
	$self->{1} = $wpager;
	$self->{2} = $wpager if -t $self->{2};
	$env->{GIT_PAGER_IN_USE} = 'true'; # we may spawn git
	$self->{pgr} = $pgr;
}

# display a message for user before spawning full-screen $VISUAL
sub pgr_err {
	my ($self, @msg) = @_;
	return warn(@msg) unless $self->{sock} && -t $self->{2};
	start_pager($self, { LESS => 'RX' }); # no 'F' so we prompt
	print { $self->{2} } @msg;
	$self->{2}->autoflush(1);
	stop_pager($self);
	send($self->{sock}, 'wait', MSG_EOR); # wait for user to quit pager
}

sub stop_pager {
	my ($self) = @_;
	my $pgr = delete($self->{pgr}) or return;
	$self->{2} = $pgr->[2];
	close(delete($self->{1})) if $self->{1};
	$self->{1} = $pgr->[1];
}

sub accept_dispatch { # Listener {post_accept} callback
	my ($sock) = @_; # ignore other
	$sock->autoflush(1);
	my $self = bless { sock => $sock }, __PACKAGE__;
	vec(my $rvec = '', fileno($sock), 1) = 1;
	select($rvec, undef, undef, 60) or
		return send($sock, 'timed out waiting to recv FDs', MSG_EOR);
	# (4096 * 33) >MAX_ARG_STRLEN
	my @fds = $recv_cmd->($sock, my $buf, 4096 * 33) or return; # EOF
	if (!defined($fds[0])) {
		warn(my $msg = "recv_cmd failed: $!");
		return send($sock, $msg, MSG_EOR);
	} else {
		my $i = 0;
		for my $fd (@fds) {
			open($self->{$i++}, '+<&=', $fd) and next;
			send($sock, "open(+<&=$fd) (FD=$i): $!", MSG_EOR);
		}
		$i == 4 or return send($sock, 'not enough FDs='.($i-1), MSG_EOR)
	}
	# $ENV_STR = join('', map { "\0$_=$ENV{$_}" } keys %ENV);
	# $buf = "$argc\0".join("\0", @ARGV).$ENV_STR."\0\0";
	substr($buf, -2, 2, '') eq "\0\0" or  # s/\0\0\z//
		return send($sock, 'request command truncated', MSG_EOR);
	my ($argc, @argv) = split(/\0/, $buf, -1);
	undef $buf;
	my %env = map { split(/=/, $_, 2) } splice(@argv, $argc);
	$self->{env} = \%env;
	eval { dispatch($self, @argv) };
	$self->fail($@) if $@;
}

sub dclose {
	my ($self) = @_;
	local $current_lei = $self;
	delete $self->{-progress};
	_drop_wq($self) if $self->{failed};
	$self->close if $self->{-event_init_done}; # PublicInbox::DS::close
}

# for long-running results
sub event_step {
	my ($self) = @_;
	local %ENV = %{$self->{env}};
	local $current_lei = $self;
	eval {
		my @fds = $recv_cmd->($self->{sock} // return, my $buf, 4096);
		if (scalar(@fds) == 1 && !defined($fds[0])) {
			return if $! == EAGAIN;
			die "recvmsg: $!" if $! != ECONNRESET;
			$buf = '';
			@fds = (); # for open loop below:
		}
		for (@fds) { open my $rfh, '+<&=', $_ }
		if ($buf eq '') {
			_drop_wq($self); # EOF, client disconnected
			dclose($self);
			$buf = 'TERM';
		}
		if ($buf =~ /\A(?:STOP|CONT|TERM)\z/) {
			my $sig = "-$buf";
			for my $wq (grep(defined, @$self{@WQ_KEYS})) {
				$wq->wq_kill($sig);
			}
		} else {
			die "unrecognized client signal: $buf";
		}
		my $s = $self->{-socks} // []; # lei up --all
		@$s = grep { send($_, $buf, MSG_EOR) } @$s;
	};
	if (my $err = $@) {
		eval { $self->fail($err) };
		dclose($self);
	}
}

sub event_step_init {
	my ($self) = @_;
	my $sock = $self->{sock} or return;
	$self->{-event_init_done} // do { # persist til $ops done
		$sock->blocking(0);
		$self->SUPER::new($sock, EPOLLIN);
		$self->{-event_init_done} = $sock;
	};
}

sub noop {}

sub oldset { $oldset }

sub dump_and_clear_log {
	if (defined($errors_log) && -s STDIN && seek(STDIN, 0, SEEK_SET)) {
		openlog('lei-daemon', 'pid,nowait,nofatal,ndelay', 'user');
		chomp(my @lines = );
		truncate(STDIN, 0) or
			syslog('warning', "ftruncate (%s): %m", $errors_log);
		for my $l (@lines) { syslog('warning', '%s', $l) }
		closelog(); # don't share across fork
	}
}

sub cfg2lei ($) {
	my ($cfg) = @_;
	my $lei = bless { env => { %{$cfg->{-env}} } }, __PACKAGE__;
	open($lei->{0}, '<&', \*STDIN) or die "dup 0: $!";
	open($lei->{1}, '>>&', \*STDOUT) or die "dup 1: $!";
	open($lei->{2}, '>>&', \*STDERR) or die "dup 2: $!";
	open($lei->{3}, '<', '/') or die "open /: $!";
	my ($x, $y);
	socketpair($x, $y, AF_UNIX, SOCK_SEQPACKET, 0) or die "socketpair: $!";
	$lei->{sock} = $x;
	require PublicInbox::LeiSelfSocket;
	PublicInbox::LeiSelfSocket->new($y); # adds to event loop
	$lei;
}

sub dir_idle_handler ($) { # PublicInbox::DirIdle callback
	my ($ev) = @_; # Linux::Inotify2::Event or duck type
	my $fn = $ev->fullname;
	if ($fn =~ m!\A(.+)/(new|cur)/([^/]+)\z!) { # Maildir file
		my ($mdir, $nc, $bn) = ($1, $2, $3);
		$nc = '' if $ev->IN_DELETE || $ev->IN_MOVED_FROM;
		for my $f (keys %{$MDIR2CFGPATH->{$mdir} // {}}) {
			my $cfg = $PATH2CFG{$f} // next;
			eval {
				my $lei = cfg2lei($cfg);
				$lei->dispatch('note-event',
						"maildir:$mdir", $nc, $bn, $fn);
			};
			warn "E: note-event $f: $@\n" if $@;
		}
	}
	if ($ev->can('cancel') && ($ev->IN_IGNORE || $ev->IN_UNMOUNT)) {
		$ev->cancel;
	}
	if ($fn =~ m!\A(.+)/(?:new|cur)\z! && !-e $fn) {
		delete $MDIR2CFGPATH->{$1};
	}
	if (!-e $fn) { # config file or Maildir gone
		for my $cfgpaths (values %$MDIR2CFGPATH) {
			delete $cfgpaths->{$fn};
		}
		delete $PATH2CFG{$fn};
	}
}

# lei(1) calls this when it can't connect
sub lazy_start {
	my ($path, $errno, $narg) = @_;
	local ($errors_log, $listener);
	my ($sock_dir) = ($path =~ m!\A(.+?)/[^/]+\z!);
	$errors_log = "$sock_dir/errors.log";
	my $addr = pack_sockaddr_un($path);
	my $lk = bless { lock_path => $errors_log }, 'PublicInbox::Lock';
	umask(077) // die("umask(077): $!");
	$lk->lock_acquire;
	socket($listener, AF_UNIX, SOCK_SEQPACKET, 0) or die "socket: $!";
	if ($errno == ECONNREFUSED || $errno == ENOENT) {
		return if connect($listener, $addr); # another process won
		if ($errno == ECONNREFUSED && -S $path) {
			unlink($path) or die "unlink($path): $!";
		}
	} else {
		$! = $errno; # allow interpolation to stringify in die
		die "connect($path): $!";
	}
	bind($listener, $addr) or die "bind($path): $!";
	$lk->lock_release;
	undef $lk;
	my @st = stat($path) or die "stat($path): $!";
	my $dev_ino_expect = pack('dd', $st[0], $st[1]); # dev+ino
	local $oldset = PublicInbox::DS::block_signals();
	if ($narg == 5) {
		$send_cmd = PublicInbox::Spawn->can('send_cmd4');
		$recv_cmd = PublicInbox::Spawn->can('recv_cmd4') // do {
			require PublicInbox::CmdIPC4;
			$send_cmd = PublicInbox::CmdIPC4->can('send_cmd4');
			PublicInbox::CmdIPC4->can('recv_cmd4');
		} // do {
			$send_cmd = PublicInbox::Syscall->can('send_cmd4');
			PublicInbox::Syscall->can('recv_cmd4');
		};
	}
	$recv_cmd or die <<"";
(Socket::MsgHdr || Inline::C) missing/unconfigured (narg=$narg);

	require PublicInbox::Listener;
	require PublicInbox::PktOp;
	(-p STDOUT) or die "E: stdout must be a pipe\n";
	open(STDIN, '+>>', $errors_log) or die "open($errors_log): $!";
	STDIN->autoflush(1);
	dump_and_clear_log();
	POSIX::setsid() > 0 or die "setsid: $!";
	my $pid = fork // die "fork: $!";
	return if $pid;
	$0 = "lei-daemon $path";
	local %PATH2CFG;
	local $MDIR2CFGPATH;
	$listener->blocking(0);
	my $exit_code;
	my $pil = PublicInbox::Listener->new($listener, \&accept_dispatch);
	local $quit = do {
		my (undef, $eof_p) = PublicInbox::PktOp->pair;
		sub {
			$exit_code //= shift;
			eval 'PublicInbox::LeiNoteEvent::flush_task()';
			my $lis = $pil or exit($exit_code);
			# closing eof_p triggers \&noop wakeup
			$listener = $eof_p = $pil = $path = undef;
			$lis->close; # DS::close
			PublicInbox::DS->SetLoopTimeout(1000);
		};
	};
	my $sig = {
		CHLD => \&PublicInbox::DS::enqueue_reap,
		QUIT => $quit,
		INT => $quit,
		TERM => $quit,
		HUP => \&noop,
		USR1 => \&noop,
		USR2 => \&noop,
	};
	require PublicInbox::DirIdle;
	local $dir_idle = PublicInbox::DirIdle->new(sub {
		# just rely on wakeup to hit PostLoopCallback set below
		dir_idle_handler($_[0]) if $_[0]->fullname ne $path;
	});
	$dir_idle->add_watches([$sock_dir]);
	PublicInbox::DS->SetPostLoopCallback(sub {
		my ($dmap, undef) = @_;
		if (@st = defined($path) ? stat($path) : ()) {
			if ($dev_ino_expect ne pack('dd', $st[0], $st[1])) {
				warn "$path dev/ino changed, quitting\n";
				$path = undef;
			}
		} elsif (defined($path)) { # ENOENT is common
			warn "stat($path): $!, quitting ...\n" if $! != ENOENT;
			undef $path;
			$quit->();
		}
		return 1 if defined($path);
		my $n = 0;
		for my $s (values %$dmap) {
			$s->can('busy') or next;
			if ($s->busy) {
				++$n;
			} else {
				$s->close;
			}
		}
		$n; # true: continue, false: stop
	});

	# STDIN was redirected to /dev/null above, closing STDERR and
	# STDOUT will cause the calling `lei' client process to finish
	# reading the <$daemon> pipe.
	local $SIG{__WARN__} = sub {
		$current_lei ? err($current_lei, @_) : warn(
		  strftime('%Y-%m-%dT%H:%M:%SZ', gmtime(time))," $$ ", @_);
	};
	open STDERR, '>&STDIN' or die "redirect stderr failed: $!";
	open STDOUT, '>&STDIN' or die "redirect stdout failed: $!";
	# $daemon pipe to `lei' closed, main loop begins:
	eval { PublicInbox::DS::event_loop($sig, $oldset) };
	warn "event loop error: $@\n" if $@;
	# exit() may trigger waitpid via various DESTROY, ensure interruptible
	PublicInbox::DS::sig_setmask($oldset);
	dump_and_clear_log();
	exit($exit_code // 0);
}

sub busy { 1 } # prevent daemon-shutdown if client is connected

# ensures stdout hits the FS before sock disconnects so a client
# can immediately reread it
sub DESTROY {
	my ($self) = @_;
	if (my $counters = delete $self->{counters}) {
		for my $k (sort keys %$counters) {
			my $nr = $counters->{$k};
			$self->child_error(0, "$nr $k messages");
		}
	}
	$self->{1}->autoflush(1) if $self->{1};
	stop_pager($self);
	dump_and_clear_log();
	# preserve $? for ->fail or ->x_it code
}

sub wq_done_wait { # dwaitpid callback
	my ($arg, $pid) = @_;
	my ($wq, $lei) = @$arg;
	local $current_lei = $lei;
	my $err_type = $lei->{-err_type};
	$? and $lei->child_error($?,
		$err_type ? "$err_type errors during $lei->{cmd} \$?=$?" : ());
	$lei->dclose;
}

sub fchdir {
	my ($lei) = @_;
	my $dh = $lei->{3} // die 'BUG: lei->{3} (CWD) gone';
	chdir($dh) || die "fchdir: $!";
}

sub wq_eof { # EOF callback for main daemon
	my ($lei, $wq_fld) = @_;
	local $current_lei = $lei;
	my $wq = delete $lei->{$wq_fld // 'wq1'};
	$lei->sto_done_request($wq);
	$wq // $lei->fail; # already failed
}

sub watch_state_ok ($) {
	my ($state) = $_[-1]; # $_[0] may be $self
	$state =~ /\Apause|(?:import|index|tag)-(?:ro|rw)\z/;
}

sub cancel_maildir_watch ($$) {
	my ($d, $cfg_f) = @_;
	my $w = delete $MDIR2CFGPATH->{$d}->{$cfg_f};
	scalar(keys %{$MDIR2CFGPATH->{$d}}) or
		delete $MDIR2CFGPATH->{$d};
	for my $x (@{$w // []}) { $x->cancel }
}

sub add_maildir_watch ($$) {
	my ($d, $cfg_f) = @_;
	if (!exists($MDIR2CFGPATH->{$d}->{$cfg_f})) {
		my @w = $dir_idle->add_watches(["$d/cur", "$d/new"], 1);
		push @{$MDIR2CFGPATH->{$d}->{$cfg_f}}, @w if @w;
	}
}

sub refresh_watches {
	my ($lei) = @_;
	$dir_idle or return;
	my $cfg = _lei_cfg($lei) or return;
	my $old = $cfg->{-watches};
	my $watches = $cfg->{-watches} //= {};
	my %seen;
	my $cfg_f = $cfg->{'-f'};
	for my $w (grep(/\Awatch\..+\.state\z/, keys %$cfg)) {
		my $url = substr($w, length('watch.'), -length('.state'));
		require PublicInbox::LeiWatch;
		$watches->{$url} //= PublicInbox::LeiWatch->new($url);
		$seen{$url} = undef;
		my $state = $cfg->get_1("watch.$url.state");
		if (!watch_state_ok($state)) {
			warn("watch.$url.state=$state not supported\n");
			next;
		}
		if ($url =~ /\Amaildir:(.+)/i) {
			my $d = canonpath_harder($1);
			if ($state eq 'pause') {
				cancel_maildir_watch($d, $cfg_f);
			} else {
				add_maildir_watch($d, $cfg_f);
			}
		} else { # TODO: imap/nntp/jmap
			$lei->child_error(0, "E: watch $url not supported, yet")
		}
	}

	# add all known Maildir folders as implicit watches
	my $lms = $lei->lms;
	if ($lms) {
		$lms->lms_write_prepare;
		for my $d ($lms->folders('maildir:')) {
			substr($d, 0, length('maildir:')) = '';

			# fixup old bugs while we're iterating:
			my $cd = canonpath_harder($d);
			my $f = "maildir:$cd";
			$lms->rename_folder("maildir:$d", $f) if $d ne $cd;
			next if $watches->{$f}; # may be set to pause
			require PublicInbox::LeiWatch;
			$watches->{$f} = PublicInbox::LeiWatch->new($f);
			$seen{$f} = undef;
			add_maildir_watch($cd, $cfg_f);
		}
	}
	if ($old) { # cull old non-existent entries
		for my $url (keys %$old) {
			next if exists $seen{$url};
			delete $old->{$url};
			if ($url =~ /\Amaildir:(.+)/i) {
				my $d = canonpath_harder($1);
				cancel_maildir_watch($d, $cfg_f);
			} else { # TODO: imap/nntp/jmap
				$lei->child_error(0, "E: watch $url TODO");
			}
		}
	}
	if (scalar keys %$watches) {
		$cfg->{-env} //= { %{$lei->{env}}, PWD => '/' }; # for cfg2lei
	} else {
		delete $cfg->{-watches};
	}
}

# TODO: support SHA-256
sub git_oid {
	my $eml = $_[-1];
	$eml->header_set($_) for @PublicInbox::Import::UNWANTED_HEADERS;
	git_sha(1, $eml);
}

sub lms {
	my ($lei, $creat) = @_;
	my $sto = $lei->{sto} // _lei_store($lei) // return;
	require PublicInbox::LeiMailSync;
	my $f = "$sto->{priv_eidx}->{topdir}/mail_sync.sqlite3";
	(-f $f || $creat) ? PublicInbox::LeiMailSync->new($f) : undef;
}

sub sto_done_request {
	my ($lei, $wq) = @_;
	return unless $lei->{sto};
	local $current_lei = $lei;
	my $sock = $wq ? $wq->{lei_sock} : undef;
	$sock //= $lei->{sock};
	my @io;
	push(@io, $sock) if $sock; # async wait iff possible
	eval { $lei->{sto}->wq_io_do('done', \@io) };
	warn($@) if $@;
}

sub cfg_dump ($$) {
	my ($lei, $f) = @_;
	my $ret = eval { PublicInbox::Config->git_config_dump($f, $lei->{2}) };
	return $ret if !$@;
	warn($@);
	undef;
}

sub request_umask {
	my ($lei) = @_;
	my $s = $lei->{sock} // return;
	send($s, 'umask', MSG_EOR) // die "send: $!";
	vec(my $rvec = '', fileno($s), 1) = 1;
	select($rvec, undef, undef, 2) or die 'timeout waiting for umask';
	recv($s, my $v, 5, 0) // die "recv: $!";
	(my $u, $lei->{client_umask}) = unpack('AV', $v);
	$u eq 'u' or warn "E: recv $v has no umask";
}

1;
public-inbox-1.9.0/lib/PublicInbox/LI2Wrap.pm000066400000000000000000000010071430031475700206600ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# Wrapper for Linux::Inotify2 < 2.3 which lacked ->fh and auto-close
# Remove this when supported LTS/enterprise distros are all
# Linux::Inotify2 >= 2.3
package PublicInbox::LI2Wrap;
use v5.10.1;
our @ISA = qw(Linux::Inotify2);

sub wrapclose {
	my ($inot) = @_;
	my $fd = $inot->fileno;
	open my $fh, '<&=', $fd or die "open <&= $fd $!";
	bless $inot, __PACKAGE__;
}

sub DESTROY {} # no-op

1
public-inbox-1.9.0/lib/PublicInbox/LeiALE.pm000066400000000000000000000060561430031475700205040ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# All Locals Ever: track lei/store + externals ever used as
# long as they're on an accessible FS.  Includes "lei q" --include
# and --only targets that haven't been through "lei add-external".
# Typically: ~/.cache/lei/all_locals_ever.git
package PublicInbox::LeiALE;
use strict;
use v5.10.1;
use parent qw(PublicInbox::LeiSearch PublicInbox::Lock);
use PublicInbox::Git;
use PublicInbox::Import;
use PublicInbox::LeiXSearch;
use Fcntl qw(SEEK_SET);

sub _new {
	my ($d) = @_;
	PublicInbox::Import::init_bare($d, 'ale');
	bless {
		git => PublicInbox::Git->new($d),
		lock_path => "$d/lei_ale.state", # dual-duty lock + state
		ibxish => [], # Inbox and ExtSearch (and LeiSearch) objects
	}, __PACKAGE__
}

sub new {
	my ($self, $lei) = @_;
	ref($self) or $self = _new($lei->cache_dir . '/all_locals_ever.git');
	my $lxs = PublicInbox::LeiXSearch->new;
	my $sto = $lei->_lei_store;
	$lxs->prepare_external($sto->search) if $sto;
	for my $loc ($lei->externals_each) { # locals only
		$lxs->prepare_external($loc) if -d $loc;
	}
	$self->refresh_externals($lxs, $lei);
	$self;
}

sub over {} # undef for xoids_for

sub overs_all { # for xoids_for (called only in lei workers?)
	my ($self) = @_;
	my $pid = $$;
	if (($self->{owner_pid} // $pid) != $pid) {
		delete($_->{over}) for @{$self->{ibxish}};
	}
	$self->{owner_pid} = $pid;
	grep(defined, map { $_->over } @{$self->{ibxish}});
}

sub refresh_externals {
	my ($self, $lxs, $lei) = @_;
	$self->git->cleanup;
	my $lk = $self->lock_for_scope;
	my $cur_lxs = ref($lxs)->new;
	my $orig = do {
		local $/;
		readline($self->{lockfh}) //
				die "readline($self->{lock_path}): $!";
	};
	my $new = '';
	my $old = '';
	my $gone = 0;
	my %seen_ibxish; # $dir => any-defined value
	for my $dir (split(/\n/, $orig)) {
		if (-d $dir && -r _ && $cur_lxs->prepare_external($dir)) {
			$seen_ibxish{$dir} //= length($old .= "$dir\n");
		} else {
			++$gone;
		}
	}
	my @ibxish = $cur_lxs->locals;
	for my $x ($lxs->locals) {
		my $d = $lei->canonpath_harder($x->{inboxdir} // $x->{topdir});
		$seen_ibxish{$d} //= do {
			$new .= "$d\n";
			push @ibxish, $x;
		};
	}
	if ($new ne '' || $gone) {
		$self->{lockfh}->autoflush(1);
		if ($gone) {
			seek($self->{lockfh}, 0, SEEK_SET) or die "seek: $!";
			truncate($self->{lockfh}, 0) or die "truncate: $!";
		} else {
			$old = '';
		}
		print { $self->{lockfh} } $old, $new or die "print: $!";
	}
	$new = $old = '';
	my $f = $self->git->{git_dir}.'/objects/info/alternates';
	if (open my $fh, '<', $f) {
		local $/;
		$old = <$fh> // die "readline($f): $!";
	}
	for my $x (@ibxish) {
		$new .= $lei->canonpath_harder($x->git->{git_dir})."/objects\n";
	}
	$self->{ibxish} = \@ibxish;
	return if $old eq $new;

	# this needs to be atomic since child processes may start
	# git-cat-file at any time
	my $tmp = "$f.$$.tmp";
	open my $fh, '>', $tmp or die "open($tmp): $!";
	print $fh $new or die "print($tmp): $!";
	close $fh or die "close($tmp): $!";
	rename($tmp, $f) or die "rename($tmp, $f): $!";
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiAddExternal.pm000066400000000000000000000042731430031475700222750ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# "lei add-external" command
package PublicInbox::LeiAddExternal;
use strict;
use v5.10.1;

sub _finish_add_external {
	my ($lei, $location) = @_;
	my $new_boost = $lei->{opt}->{boost} // 0;
	my $key = "external.$location.boost";
	my $cur_boost = $lei->_lei_cfg(1)->{$key};
	return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
	$lei->_config($key, $new_boost);
}

sub lei_add_external {
	my ($lei, $location) = @_;
	my $mirror = $lei->{opt}->{mirror} // do {
		my @fail;
		for my $sw ($lei->index_opt, $lei->curl_opt,
				qw(no-torsocks torsocks inbox-version)) {
			my ($f) = (split(/|/, $sw, 2))[0];
			next unless defined $lei->{opt}->{$f};
			$f = length($f) == 1 ? "-$f" : "--$f";
			push @fail, $f;
		}
		if (scalar(@fail) == 1) {
			return $lei->("@fail requires --mirror");
		} elsif (@fail) {
			my $last = pop @fail;
			my $fail = join(', ', @fail);
			return $lei->("@fail and $last require --mirror");
		}
		undef;
	};
	$location = $lei->ext_canonicalize($location);
	if (defined($mirror) && -d $location) {
		$lei->fail(<<""); # TODO: did you mean "update-external?"
--mirror destination `$location' already exists

	} elsif (-d $location) {
		index($location, "\n") >= 0 and
			return $lei->fail("`\\n' not allowed in `$location'");
	}
	if ($location !~ m!\Ahttps?://! && !-d $location) {
		$mirror // return $lei->fail("$location not a directory");
		index($location, "\n") >= 0 and
			return $lei->fail("`\\n' not allowed in `$location'");
		$mirror = $lei->ext_canonicalize($mirror);
		require PublicInbox::LeiMirror;
		PublicInbox::LeiMirror->start($lei, $mirror => $location);
	} else {
		_finish_add_external($lei, $location);
	}
}

sub _complete_add_external { # for bash, this relies on "compopt -o nospace"
	my ($lei, @argv) = @_;
	my $cfg = $lei->_lei_cfg or return ();
	my $match_cb = $lei->complete_url_prepare(\@argv);
	require URI;
	map {
		my $u = URI->new(substr($_, length('external.')));
		my ($base) = ($u->path =~ m!((?:/?.*)?/)[^/]+/?\z!);
		$u->path($base);
		$match_cb->($u->as_string);
	} grep(m!\Aexternal\.https?://!, @{$cfg->{-section_order}});
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiAddWatch.pm000066400000000000000000000024231430031475700215540ustar00rootroot00000000000000# Copyright all contributors 
# License: AGPL-3.0+ 

# "lei add-watch" command
package PublicInbox::LeiAddWatch;
use strict;
use v5.10.1;
use parent qw(PublicInbox::LeiInput);

sub lei_add_watch {
	my ($lei, @argv) = @_;
	my $cfg = $lei->_lei_cfg(1);
	my $self = bless {}, __PACKAGE__;
	$lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs
	my $state = $lei->{opt}->{'state'} // 'import-rw';
	$lei->watch_state_ok($state) or
		return $lei->fail("invalid state: $state");
	my $vmd_mod = $self->vmd_mod_extract(\@argv);
	return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
	$self->prepare_inputs($lei, \@argv) or return;
	my @vmd;
	while (my ($type, $vals) = each %$vmd_mod) {
		push @vmd, "$type:$_" for @$vals;
	}
	my $vmd0 = shift @vmd;
	for my $w (@{$self->{inputs}}) {
		# clobber existing, allow multiple
		if (defined($vmd0)) {
			$lei->_config("watch.$w.vmd", '--replace-all', $vmd0);
			for my $v (@vmd) {
				$lei->_config("watch.$w.vmd", $v);
			}
		}
		next if defined $cfg->{"watch.$w.state"};
		$lei->_config("watch.$w.state", $state);
	}
	$lei->_lei_store(1); # create
	$lei->lms(1)->lms_write_prepare->add_folders(@{$self->{inputs}});
	delete $lei->{cfg}; # force reload
	$lei->refresh_watches;
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiAuth.pm000066400000000000000000000052521430031475700210010ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# Authentication worker for anything that needs auth for read/write IMAP
# (eventually for read-only NNTP access)
#
# timelines
# lei-daemon              |  LeiAuth worker #0      | other WQ workers
# ----------------------------------------------------------
# spawns all workers ---->[ workers all start and run ipc_atfork_child ]
#                         | do_auth_atfork          | wq_worker_loop sleep
#                         | # reads .netrc          |
#                         | # queries git-credential|
#                         | send net_merge_continue |
#                         |         |               |
#                         |         v               |
# recv net_merge_continue <---------/               |
#            |            |                         |
#            v            |                         |
# broadcast net_merge_all [ all workers (including LeiAuth worker #0) ]
#                         [ LeiAuth worker #0 becomes just another WQ worker ]
#                         |
# call net_merge_all_done ->-> do per-WQ-class defined actions
package PublicInbox::LeiAuth;
use strict;
use v5.10.1;

sub do_auth_atfork { # used by IPC WQ workers
	my ($self, $wq) = @_;
	return if $wq->{-wq_worker_nr} != 0; # only first worker calls this
	my $lei = $wq->{lei};
	my $net = $lei->{net};
	if ($net->{-auth_done}) { # from previous worker... (ugly)
		$lei->{pkt_op_p}->pkt_do('net_merge_continue', $net) or
				$lei->fail("pkt_do net_merge_continue: $!");
		return;
	}
	eval { # fill auth info (may prompt user or read netrc)
		my $mics = $net->imap_common_init($lei);
		my $nn = $net->nntp_common_init($lei);
		# broadcast successful auth info to lei-daemon:
		$net->{-auth_done} = 1;
		$lei->{pkt_op_p}->pkt_do('net_merge_continue', $net) or
				die "pkt_do net_merge_continue: $!";
		$net->{mics_cached} = $mics if $mics;
		$net->{nn_cached} = $nn if $nn;
	};
	$lei->fail($@) if $@;
}

sub net_merge_all { # called in wq worker via wq_broadcast
	my ($wq, $net_new) = @_;
	my $net = $wq->{lei}->{net};
	%$net = (%$net, %$net_new);
}

# called by top-level lei-daemon when first worker is done with auth
# passes updated net auth info to current workers
sub net_merge_continue {
	my ($wq, $lei, $net_new) = @_;
	$wq->{-net_new} = $net_new; # for "lei up"
	$wq->wq_broadcast('PublicInbox::LeiAuth::net_merge_all', $net_new);
	$wq->net_merge_all_done($lei); # defined per-WQ
}

sub op_merge { # prepares PktOp->pair ops
	my ($self, $ops, $wq, $lei) = @_;
	$ops->{net_merge_continue} = [ \&net_merge_continue, $wq, $lei ];
}

sub new { bless \(my $x), __PACKAGE__ }

1;
public-inbox-1.9.0/lib/PublicInbox/LeiBlob.pm000066400000000000000000000124431430031475700207560ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# "lei blob $OID" command
# TODO: this doesn't scan submodules, but maybe it should
package PublicInbox::LeiBlob;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC);
use PublicInbox::Spawn qw(spawn popen_rd which);
use PublicInbox::DS;

sub get_git_dir ($$) {
	my ($lei, $d) = @_;
	return $d if -d "$d/objects" && -d "$d/refs" && -e "$d/HEAD";

	my $cmd = [ qw(git rev-parse --git-dir) ];
	my $opt = { '-C' => $d };
	if (defined($lei->{opt}->{cwd})) { # --cwd used, report errors
		$opt->{2} = $lei->{2};
	} else { # implicit --cwd, quiet errors
		open $opt->{2}, '>', '/dev/null' or die "open /dev/null: $!";
	}
	my ($r, $pid) = popen_rd($cmd, {GIT_DIR => undef}, $opt);
	chomp(my $gd = do { local $/; <$r> });
	waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd ($!)";
	$? == 0 ? $gd : undef;
}

sub solver_user_cb { # called by solver when done
	my ($res, $self) = @_;
	my $lei = $self->{lei};
	my $log_buf = delete $lei->{'log_buf'};
	$$log_buf =~ s/^/# /sgm;
	ref($res) eq 'ARRAY' or return $lei->child_error(0, $$log_buf);
	$lei->qerr($$log_buf);
	my ($git, $oid, $type, $size, $di) = @$res;
	my $gd = $git->{git_dir};

	# don't try to support all the git-show(1) options for non-blob,
	# this is just a convenience:
	$type ne 'blob' and
		warn "# $oid is a $type of $size bytes in:\n#\t$gd\n";

	my $cmd = [ 'git', "--git-dir=$gd", 'show', $oid ];
	my $rdr = { 1 => $lei->{1}, 2 => $lei->{2} };
	waitpid(spawn($cmd, $lei->{env}, $rdr), 0);
	$lei->child_error($?) if $?;
}

sub do_solve_blob { # via wq_do
	my ($self) = @_;
	my $lei = $self->{lei};
	my $git_dirs = $lei->{opt}->{'git-dir'};
	my $hints = {};
	for my $x (qw(oid-a path-a path-b)) {
		my $v = $lei->{opt}->{$x} // next;
		$x =~ tr/-/_/;
		$hints->{$x} = $v;
	}
	open my $log, '+>', \(my $log_buf = '') or die "PerlIO::scalar: $!";
	$lei->{log_buf} = \$log_buf;
	my $git = $lei->{ale}->git;
	my @rmt = map {
		PublicInbox::LeiRemote->new($lei, $_)
	} $self->{lxs}->remotes;
	my $solver = bless {
		gits => [ map {
				PublicInbox::Git->new($lei->rel2abs($_))
			} @$git_dirs ],
		user_cb => \&solver_user_cb,
		uarg => $self,
		# -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks
		inboxes => [ $self->{lxs}->locals, @rmt ],
	}, 'PublicInbox::SolverGit';
	local $PublicInbox::DS::in_loop = 0; # waitpid synchronously
	$solver->solve($lei->{env}, $log, $self->{oid_b}, $hints);
}

sub cat_attach_i { # Eml->each_part callback
	my ($part, $depth, $idx) = @{$_[0]};
	my $lei = $_[1];
	my $want = $lei->{-attach_idx} // return;
	return if $idx ne $want; # [0-9]+(?:\.[0-9]+)+
	delete $lei->{-attach_idx};
	$lei->out($part->body);
}

sub extract_attach ($$$) {
	my ($lei, $blob, $bref) = @_;
	my $eml = PublicInbox::Eml->new($bref);
	$eml->each_part(\&cat_attach_i, $lei, 1);
	my $idx = delete $lei->{-attach_idx};
	defined($idx) and return $lei->fail(<start_pager if -t $lei->{1};
	my $opt = $lei->{opt};
	my $has_hints = grep(defined, @$opt{qw(oid-a path-a path-b)});
	my $lxs;
	if ($blob =~ s/:([0-9\.]+)\z//) {
		$lei->{-attach_idx} = $1;
		$opt->{mail} = 1;
	}

	# first, see if it's a blob returned by "lei q" JSON output:k
	if ($opt->{mail} // ($has_hints ? 0 : 1)) {
		if (grep(defined, @$opt{qw(include only)})) {
			$lxs = $lei->lxs_prepare;
			$lei->ale->refresh_externals($lxs, $lei);
		}
		my $rdr = {};
		if ($opt->{mail}) {
			open $rdr->{2}, '+>', undef or die "open: $!";
		} else {
			open $rdr->{2}, '>', '/dev/null' or die "open: $!";
		}
		my $cmd = [ 'git', '--git-dir='.$lei->ale->git->{git_dir},
				'cat-file', 'blob', $blob ];
		if (defined $lei->{-attach_idx}) {
			my $fh = popen_rd($cmd, $lei->{env}, $rdr);
			require PublicInbox::Eml;
			my $buf = do { local $/; <$fh> };
			return extract_attach($lei, $blob, \$buf) if close($fh);
		} else {
			$rdr->{1} = $lei->{1};
			waitpid(spawn($cmd, $lei->{env}, $rdr), 0);
		}
		my $ce = $?;
		return if $ce == 0;
		my $lms = $lei->lms;
		if (my $bref = $lms ? $lms->local_blob($blob, 1) : undef) {
			defined($lei->{-attach_idx}) and
				return extract_attach($lei, $blob, $bref);
			return $lei->out($$bref);
		} elsif ($opt->{mail}) {
			my $eh = $rdr->{2};
			seek($eh, 0, 0);
			return $lei->child_error($ce, do { local $/; <$eh> });
		} # else: fall through to solver below
	}

	# maybe it's a non-email (code) blob from a coderepo
	my $git_dirs = $opt->{'git-dir'} //= [];
	if ($opt->{'cwd'} // 1) {
		my $cgd = get_git_dir($lei, '.');
		unshift(@$git_dirs, $cgd) if defined $cgd;
	}
	return $lei->fail('no --git-dir to try') unless @$git_dirs;
	unless ($lxs) {
		$lxs = $lei->lxs_prepare or return;
		$lei->ale->refresh_externals($lxs, $lei);
	}
	if ($lxs->remotes) {
		require PublicInbox::LeiRemote;
		$lei->{curl} //= which('curl') or return
			$lei->fail('curl needed for', $lxs->remotes);
		$lei->_lei_store(1)->write_prepare($lei);
	}
	require PublicInbox::SolverGit;
	my $self = bless { lxs => $lxs, oid_b => $blob }, __PACKAGE__;
	my ($op_c, $ops) = $lei->workers_start($self, 1);
	$lei->{wq1} = $self;
	$self->wq_io_do('do_solve_blob', []);
	$self->wq_close;
	$lei->wait_wq_events($op_c, $ops);
}

sub ipc_atfork_child {
	my ($self) = @_;
	$self->{lei}->_lei_atfork_child;
	$self->SUPER::ipc_atfork_child;
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiConfig.pm000066400000000000000000000024311430031475700213010ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 
package PublicInbox::LeiConfig;
use strict;
use v5.10.1;
use PublicInbox::PktOp;

sub cfg_do_edit ($;$) {
	my ($self, $reason) = @_;
	my $lei = $self->{lei};
	$lei->pgr_err($reason) if defined $reason;
	my $cmd = [ qw(git config --edit -f), $self->{-f} ];
	my $env = { GIT_CONFIG => $self->{-f} };
	$self->cfg_edit_begin if $self->can('cfg_edit_begin');
	# run in script/lei foreground
	my ($op_c, $op_p) = PublicInbox::PktOp->pair;
	# $op_p will EOF when $EDITOR is done
	$op_c->{ops} = { '' => [\&cfg_edit_done, $self] };
	$lei->send_exec_cmd([ @$lei{qw(0 1 2)}, $op_p->{op_p} ], $cmd, $env);
}

sub cfg_edit_done { # PktOp
	my ($self) = @_;
	eval {
		my $cfg = $self->{lei}->cfg_dump($self->{-f}, $self->{lei}->{2})
			// return cfg_do_edit($self, "\n");
		$self->cfg_verify($cfg) if $self->can('cfg_verify');
	};
	$self->{lei}->fail($@) if $@;
}

sub lei_config {
	my ($lei, @argv) = @_;
	$lei->{opt}->{'config-file'} and return $lei->fail(
		"config file switches not supported by `lei config'");
	return $lei->_config(@argv) unless $lei->{opt}->{edit};
	my $f = $lei->_lei_cfg(1)->{-f};
	my $self = bless { lei => $lei, -f => $f }, __PACKAGE__;
	cfg_do_edit($self);
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiConvert.pm000066400000000000000000000045231430031475700215200ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# front-end for the "lei convert" sub-command
package PublicInbox::LeiConvert;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
use PublicInbox::LeiOverview;
use PublicInbox::DS;

# /^input_/ subs are used by PublicInbox::LeiInput

sub input_mbox_cb { # MboxReader callback
	my ($eml, $self) = @_;
	my $kw = PublicInbox::MboxReader::mbox_keywords($eml);
	$eml->header_set($_) for qw(Status X-Status);
	$self->{wcb}->(undef, { kw => $kw }, $eml);
}

sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
	my ($self, $eml) = @_;
	$self->{wcb}->(undef, {}, $eml);
}

sub input_maildir_cb {
	my (undef, $kw, $eml, $self) = @_; # $_[0] $filename ignored
	$self->{wcb}->(undef, { kw => $kw }, $eml);
}

sub process_inputs { # via wq_do
	my ($self) = @_;
	local $PublicInbox::DS::in_loop = 0; # force synchronous dwaitpid
	$self->SUPER::process_inputs;
	my $lei = $self->{lei};
	delete $lei->{1};
	delete $self->{wcb}; # commit
	my $nr_w = delete($lei->{-nr_write}) // 0;
	my $d = (delete($lei->{-nr_seen}) // 0) - $nr_w;
	$d = $d ? " ($d duplicates)" : '';
	$lei->qerr("# converted $nr_w messages$d");
}

sub lei_convert { # the main "lei convert" method
	my ($lei, @inputs) = @_;
	$lei->{opt}->{kw} //= 1;
	$lei->{opt}->{dedupe} //= 'none';
	my $self = bless {}, __PACKAGE__;
	my $ovv = PublicInbox::LeiOverview->new($lei, 'out-format');
	$lei->{l2m} or return
		$lei->fail('--output unspecified or is not a mail destination');
	my $devfd = $lei->path_to_fd($ovv->{dst}) // return;
	$lei->{opt}->{augment} = 1 if $devfd < 0;
	$self->prepare_inputs($lei, \@inputs) or return;
	# n.b. {net} {auth} is handled by l2m worker
	my ($op_c, $ops) = $lei->workers_start($self, 1);
	$lei->{wq1} = $self;
	$self->wq_io_do('process_inputs', []);
	$self->wq_close;
	$lei->wait_wq_events($op_c, $ops);
}

sub ipc_atfork_child {
	my ($self) = @_;
	my $lei = $self->{lei};
	$lei->_lei_atfork_child;
	my $l2m = delete $lei->{l2m};
	if (my $net = $lei->{net}) { # may prompt user once
		$net->{mics_cached} = $net->imap_common_init($lei);
		$net->{nn_cached} = $net->nntp_common_init($lei);
	}
	$l2m->pre_augment($lei);
	$l2m->do_augment($lei);
	$l2m->post_augment($lei);
	$self->{wcb} = $l2m->write_cb($lei);
	$self->SUPER::ipc_atfork_child;
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiCurl.pm000066400000000000000000000054511430031475700210060ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# common option and torsocks(1) wrapping for curl(1)
# Eventually, we may support using libcurl via Inline::C and/or
# WWW::Curl; but curl(1) is most prevalent and widely-installed.
# n.b. curl may support a daemon/client model like lei someday:
#   https://github.com/curl/curl/wiki/curl-tool-master-client
package PublicInbox::LeiCurl;
use strict;
use v5.10.1;
use PublicInbox::Spawn qw(which);
use PublicInbox::Config;

# Ensures empty strings are quoted, we don't need more
# sophisticated quoting than for empty strings: curl -d ''
use overload '""' => sub {
	join(' ', map { $_ eq '' ?  "''" : $_ } @{$_[0]});
};

my %lei2curl = (
	'curl-config=s@' => 'config|K=s@',
);

# prepares a common command for curl(1) based on $lei command
sub new {
	my ($cls, $lei, $curl) = @_;
	$curl //= which('curl') // return $lei->fail('curl not found');
	my $opt = $lei->{opt};
	my @cmd = ($curl, qw(-Sf));
	$cmd[-1] .= 's' if $opt->{quiet}; # already the default for "lei q"
	$cmd[-1] .= 'v' if $opt->{verbose}; # we use ourselves, too
	for my $o ($lei->curl_opt) {
		if (my $lei_spec = $lei2curl{$o}) {
			$o = $lei_spec;
		}
		$o =~ s/\|[a-z0-9]\b//i; # remove single char short option
		if ($o =~ s/=[is]@\z//) {
			my $ary = $opt->{$o} or next;
			push @cmd, map { ("--$o", $_) } @$ary;
		} elsif ($o =~ s/=[is]\z//) {
			my $val = $opt->{$o} // next;
			push @cmd, "--$o", $val;
		} elsif ($opt->{$o}) {
			push @cmd, "--$o";
		}
	}
	push @cmd, '-v' if $opt->{verbose}; # lei uses this itself
	bless \@cmd, $cls;
}

sub torsocks { # useful for "git clone" and "git fetch", too
	my ($self, $lei, $uri)= @_;
	my $opt = $lei->{opt};
	$opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
	my $torsocks = $opt->{torsocks} //= 'auto';
	if ($torsocks eq 'auto' && substr($uri->host, -6) eq '.onion' &&
		($PublicInbox::Config::LD_PRELOAD//'') !~ m!/libtorsocks\b!) {
		# "auto" continues anyways if torsocks is missing;
		# a proxy may be specified via CLI, curlrc,
		# environment variable, or even firewall rule
		[ ($lei->{torsocks} //= which('torsocks')) // () ]
	} elsif (PublicInbox::Config::git_bool($torsocks)) {
		my $x = $lei->{torsocks} //= which('torsocks');
		$x or return $lei->fail(<scheme =~ /\Ahttps?\z/i) {
		my $cfg = $lei->_lei_cfg;
		my $p = $cfg ? $cfg->urlmatch('http.Proxy', $$uri) : undef;
		push(@opt, "--proxy=$p") if defined($p);
	}
	bless [ @$pfx, @$self, @opt, $uri->as_string ], ref($self);
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiDedupe.pm000066400000000000000000000065411430031475700213100ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 
package PublicInbox::LeiDedupe;
use strict;
use v5.10.1;
use PublicInbox::ContentHash qw(content_hash git_sha);
use Digest::SHA ();

# n.b. mutt sets most of these headers not sure about Bytes
our @OID_IGNORE = qw(Status X-Status Content-Length Lines Bytes);

# best-effort regeneration of OID when augmenting existing results
sub _regen_oid ($) {
	my ($eml) = @_;
	my @stash; # stash away headers we shouldn't have in git
	for my $k (@OID_IGNORE) {
		my @v = $eml->header_raw($k) or next;
		push @stash, [ $k, \@v ];
		$eml->header_set($k); # restore below
	}
	my $dig = git_sha(1, $eml);
	for my $kv (@stash) { # restore stashed headers
		my ($k, @v) = @$kv;
		$eml->header_set($k, @v);
	}
	$dig->digest;
}

sub _oidbin ($) { defined($_[0]) ? pack('H*', $_[0]) : undef }

sub smsg_hash ($) {
	my ($smsg) = @_;
	my $dig = Digest::SHA->new(256);
	my $x = join("\0", @$smsg{qw(from to cc ds subject references mid)});
	utf8::encode($x);
	$dig->add($x);
	$dig->digest;
}

# the paranoid option
sub dedupe_oid ($) {
	my ($skv) = @_;
	(sub { # may be called in a child process
		my ($eml, $oidhex) = @_;
		$skv->set_maybe(_oidbin($oidhex) // _regen_oid($eml), '');
	}, sub {
		my ($smsg) = @_;
		$skv->set_maybe(_oidbin($smsg->{blob}), '');
	});
}

# dangerous if there's duplicate messages with different Message-IDs
sub dedupe_mid ($) {
	my ($skv) = @_;
	(sub { # may be called in a child process
		my ($eml, $oidhex) = @_;
		# lei supports non-public drafts w/o Message-ID
		my $mid = $eml->header_raw('Message-ID') // _oidbin($oidhex) //
			content_hash($eml);
		$skv->set_maybe($mid, '');
	}, sub {
		my ($smsg) = @_;
		my $mid = $smsg->{mid};
		$mid = undef if $mid eq '';
		$mid //= smsg_hash($smsg) // _oidbin($smsg->{blob});
		$skv->set_maybe($mid, '');
	});
}

# our default deduplication strategy (used by v2, also)
sub dedupe_content ($) {
	my ($skv) = @_;
	(sub { # may be called in a child process
		my ($eml) = @_; # $oidhex = $_[1], ignored
		$skv->set_maybe(content_hash($eml), '');
	}, sub {
		my ($smsg) = @_;
		$skv->set_maybe(smsg_hash($smsg), '');
	});
}

# no deduplication at all
sub true { 1 }
sub dedupe_none ($) { (\&true, \&true) }

sub new {
	my ($cls, $lei) = @_;
	my $dd = $lei->{opt}->{dedupe} // 'content';
	my $dst = $lei->{ovv}->{dst};

	# allow "none" to bypass Eml->new if writing to directory:
	return if ($dd eq 'none' && substr($dst // '', -1) eq '/');
	my $m = "dedupe_$dd";
	$cls->can($m) or die "unsupported dedupe strategy: $dd\n";
	my $skv;
	if ($dd ne 'none') {
		require PublicInbox::SharedKV;
		$skv = PublicInbox::SharedKV->new;
	}
	# [ $skv, $eml_cb, $smsg_cb, "dedupe_$dd" ]
	bless [ $skv, undef, undef, $m ], $cls;
}

# returns true on seen messages according to the deduplication strategy,
# returns false if unseen
sub is_dup {
	my ($self, $eml, $smsg) = @_;
	!$self->[1]->($eml, $smsg ? $smsg->{blob} : undef);
}

sub is_smsg_dup {
	my ($self, $smsg) = @_;
	!$self->[2]->($smsg);
}

sub prepare_dedupe {
	my ($self) = @_;
	my $skv = $self->[0];
	$self->[1] or @$self[1,2] = $self->can($self->[3])->($skv);
	$skv ? $skv->dbh : undef;
}

sub pause_dedupe {
	my ($self) = @_;
	my $skv = $self->[0] or return;
	$skv->dbh_release;
	delete($skv->{dbh}) if $skv;
}

sub has_entries {
	my $skv = $_[0]->[0] or return undef;
	$skv->has_entries;
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiEditSearch.pm000066400000000000000000000046311430031475700221130ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# "lei edit-search" edit a saved search following "lei q --save"
package PublicInbox::LeiEditSearch;
use strict;
use v5.10.1;
use PublicInbox::LeiSavedSearch;
use PublicInbox::LeiUp;
use parent qw(PublicInbox::LeiConfig);

sub cfg_edit_begin {
	my ($self) = @_;
	if (ref($self->{lss}->{-cfg}->{'lei.q.output'})) {
		delete $self->{lss}->{-cfg}->{'lei.q.output'}; # invalid
		$self->{lei}->pgr_err(<{lss}->{-f} has multiple values of lei.q.output
please remove redundant ones
EOM
	}
}

sub cfg_verify {
	my ($self, $cfg) = @_;
	my $new_out = $cfg->{'lei.q.output'} // '';
	return $self->cfg_do_edit(<{-f} has multiple values of lei.q.output
EOM
	return $self->cfg_do_edit(<{-f} needs lei.q.output
EOM
	my $lss = $self->{lss};
	my $old_out = $lss->{-cfg}->{'lei.q.output'} // return;
	return if $old_out eq $new_out;
	my $lei = $self->{lei};
	my $old_path = $old_out;
	my $new_path = $new_out;
	s!$PublicInbox::LeiSavedSearch::LOCAL_PFX!! for ($old_path, $new_path);
	my $dir_old = $lss->can('lss_dir_for')->($lei, \$old_path, 1);
	my $dir_new = $lss->can('lss_dir_for')->($lei, \$new_path);
	return if $dir_new eq $dir_old;

	($old_out =~ m!\Av2:!i || $new_out =~ m!\Av2:!) and
		return $self->cfg_do_edit(<cfg_do_edit(<puts("lei.q.output changed from $old_sq to $new_sq");
	$lei->qerr("# lei convert $old_sq -o $new_sq");
	my $v = !$lei->{opt}->{quiet};
	$lei->{opt} = { output => $new_out, verbose => $v };
	require PublicInbox::LeiConvert;
	PublicInbox::LeiConvert::lei_convert($lei, $old_out);

	$lei->fail(<up($lei, $out) or return;
	my $f = $lss->{-f};
	my $self = bless { lei => $lei, lss => $lss, -f => $f }, __PACKAGE__;
	$self->cfg_do_edit;
}

*_complete_edit_search = \&PublicInbox::LeiUp::_complete_up;

1;
public-inbox-1.9.0/lib/PublicInbox/LeiExportKw.pm000066400000000000000000000111041430031475700216540ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# front-end for the "lei export-kw" sub-command
package PublicInbox::LeiExportKw;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
use Errno qw(EEXIST ENOENT);
use PublicInbox::Syscall qw(rename_noreplace);

sub export_kw_md { # LeiMailSync->each_src callback
	my ($oidbin, $id, $self, $mdir) = @_;
	my $sto_kw = $self->{lse}->oidbin_keywords($oidbin) or return;
	my $bn = $$id;
	my ($md_kw, $unknown, @try);
	if ($bn =~ s/:2,([a-zA-Z]*)\z//) {
		($md_kw, $unknown) = PublicInbox::MdirReader::flags2kw($1);
		@try = qw(cur new);
	} else {
		$unknown = [];
		@try = qw(new cur);
	}
	if ($self->{-merge_kw} && $md_kw) { # merging keywords is the default
		@$sto_kw{keys %$md_kw} = values(%$md_kw);
	}
	$bn .= ':2,'.
		PublicInbox::LeiToMail::kw2suffix([keys %$sto_kw], @$unknown);
	return if $bn eq $$id;
	my $dst = "$mdir/cur/$bn";
	my $lei = $self->{lei};
	for my $d (@try) {
		my $src = "$mdir/$d/$$id";
		if (rename_noreplace($src, $dst)) { # success
			$self->{lms}->mv_src("maildir:$mdir",
						$oidbin, $id, $bn);
			return; # success
		} elsif ($! == EEXIST) { # lost race with lei/store?
			return;
		} elsif ($! != ENOENT) {
			$lei->child_error(1,
				"E: rename_noreplace($src -> $dst): $!");
		} # else loop @try
	}
	my $e = $!;
	# both tries failed
	my $oidhex = unpack('H*', $oidbin);
	my $src = "$mdir/{".join(',', @try)."}/$$id";
	$lei->child_error(1, "rename_noreplace($src -> $dst) ($oidhex): $e");
	for (@try) { return if -e "$mdir/$_/$$id" }
	$self->{lms}->clear_src("maildir:$mdir", $id);
}

sub export_kw_imap { # LeiMailSync->each_src callback
	my ($oidbin, $id, $self, $mic) = @_;
	my $sto_kw = $self->{lse}->oidbin_keywords($oidbin) or return;
	$self->{imap_mod_kw}->($self->{nwr}, $mic, $id, [ keys %$sto_kw ]);
}

# overrides PublicInbox::LeiInput::input_path_url
sub input_path_url {
	my ($self, $input, @args) = @_;
	$self->{lms}->lms_write_prepare;
	if ($input =~ /\Amaildir:(.+)/i) {
		my $mdir = $1;
		require PublicInbox::LeiToMail; # kw2suffix
		$self->{lms}->each_src($input, \&export_kw_md, $self, $mdir);
	} elsif ($input =~ m!\Aimaps?://!i) {
		my $uri = PublicInbox::URIimap->new($input);
		my $mic = $self->{nwr}->mic_for_folder($uri);
		if ($mic && !$self->{nwr}->can_store_flags($mic)) {
			my $m = "$input does not support PERMANENTFLAGS";
			if (defined $self->{lei}->{opt}->{all}) {
				$self->{lei}->qerr("# $m");
			} else { # set error code if user explicitly requested
				$self->{lei}->child_error(0, "E: $m");
			}
			return;
		}
		if ($mic) {
			$self->{lms}->each_src($$uri, \&export_kw_imap,
						$self, $mic);
			$mic->expunge;
		} else {
			$self->{lei}->child_error(0, "$input unavailable: $@");
		}
	} else { die "BUG: $input not supported" }
}

sub lei_export_kw {
	my ($lei, @folders) = @_;
	my $sto = $lei->_lei_store or return $lei->fail(<lms or return $lei->fail(<{opt}->{all})) { # --all=
		$lms->group2folders($lei, $all, \@folders) or return;
		@folders = grep(/\A(?:maildir|imaps?):/i, @folders);
	} else {
		$lms->arg2folder($lei, \@folders); # may die
	}
	$lms->lms_pause;
	my $self = bless { lse => $sto->search, lms => $lms }, __PACKAGE__;
	$lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs
	$self->prepare_inputs($lei, \@folders) or return;
	if (my @ro = grep(!/\A(?:maildir|imaps?):/i, @folders)) {
		return $lei->fail("cannot export to read-only folders: @ro");
	}
	my $m = $lei->{opt}->{mode} // 'merge';
	if ($m eq 'merge') { # default
		$self->{-merge_kw} = 1;
	} elsif ($m eq 'set') {
	} else {
		return $lei->fail(<{net}) {
		require PublicInbox::NetWriter;
		$self->{nwr} = bless $net, 'PublicInbox::NetWriter';
		$self->{imap_mod_kw} = $net->can($self->{-merge_kw} ?
					'imap_add_kw' : 'imap_set_kw');
		$self->{nwr}->{-skip_creat} = 1;
	}
	$lei->{-err_type} = 'non-fatal';
	$lei->wq1_start($self);
}

sub _complete_export_kw {
	my ($lei, @argv) = @_;
	my $lms = $lei->lms or return ();
	my $match_cb = $lei->complete_url_prepare(\@argv);
	# filter-out read-only sources:
	my @k = grep(m!(?:maildir|imaps?):!,
			$lms->folders($argv[-1] // undef, 1));
	my @m = map { $match_cb->($_) } @k;
	@m ? @m : @k;
}

no warnings 'once';

*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;

1;
public-inbox-1.9.0/lib/PublicInbox/LeiExternal.pm000066400000000000000000000106501430031475700216600ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 

# *-external commands of lei
package PublicInbox::LeiExternal;
use strict;
use v5.10.1;
use PublicInbox::Config;

sub externals_each {
	my ($self, $cb, @arg) = @_;
	my $cfg = $self->_lei_cfg;
	my %boost;
	for my $sec (grep(/\Aexternal\./, @{$cfg->{-section_order}})) {
		my $loc = substr($sec, length('external.'));
		$boost{$loc} = $cfg->{"$sec.boost"};
	}
	return \%boost if !wantarray && !$cb;

	# highest boost first, but stable for alphabetic tie break
	use sort 'stable';
	my @order = sort { $boost{$b} <=> $boost{$a} } sort keys %boost;
	if (ref($cb) eq 'CODE') {
		for my $loc (@order) {
			$cb->(@arg, $loc, $boost{$loc});
		}
	} elsif (ref($cb) eq 'HASH') {
		%$cb = %boost;
	}
	@order; # scalar or array
}

sub ext_canonicalize {
	my $location = $_[-1]; # $_[0] may be $lei
	if ($location !~ m!\Ahttps?://!) {
		PublicInbox::Config::rel2abs_collapsed($location);
	} else {
		require URI;
		my $uri = URI->new($location)->canonical;
		my $path = $uri->path . '/';
		$path =~ tr!/!/!s; # squeeze redundant '/'
		$uri->path($path);
		$uri->as_string;
	}
}

# TODO: we will probably extract glob2re into a separate module for
# PublicInbox::Filter::Base and maybe other places
my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
		'[' => '[', ']' => ']', ',' => ',' );

sub glob2re {
	my $re = $_[-1]; # $_[0] may be $lei
	my $p = '';
	my $in_bracket = 0;
	my $qm = 0;
	my $schema_host_port = '';

	# don't glob URL-looking things that look like IPv6
	if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) {
		$schema_host_port = quotemeta $1; # "http://[::1]:1234"
	}
	my $changes = ($re =~ s!(.)!
		$re_map{$p eq '\\' ? '' : do {
			if ($1 eq '[') { ++$in_bracket }
			elsif ($1 eq ']') { --$in_bracket }
			elsif ($1 eq ',') { ++$qm } # no change
			$p = $1;
		}} // do {
			$p = $1;
			($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
		}!sge);
	# bashism (also supported by curl): {a,b,c} => (a|b|c)
	$changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/
			(my $in_braces = $2) =~ tr!,!|!;
			$1."($in_braces)";
			/sge);
	($changes - $qm) ? $schema_host_port.$re : undef;
}

# get canonicalized externals list matching $loc
# $is_exclude denotes it's for --exclude
# otherwise it's for --only/--include is assumed
sub get_externals {
	my ($self, $loc, $is_exclude) = @_;
	return (ext_canonicalize($loc)) if -e $loc;
	my @m;
	my @cur = externals_each($self);
	my $do_glob = !$self->{opt}->{globoff}; # glob by default
	if ($do_glob && (my $re = glob2re($loc))) {
		@m = grep(m!$re!, @cur);
		return @m if scalar(@m);
	} elsif (index($loc, '/') < 0) { # exact basename match:
		@m = grep(m!/\Q$loc\E/?\z!, @cur);
		return @m if scalar(@m) == 1;
	} elsif ($is_exclude) { # URL, maybe:
		my $canon = ext_canonicalize($loc);
		@m = grep(m!\A\Q$canon\E\z!, @cur);
		return @m if scalar(@m) == 1;
	} else { # URL:
		return (ext_canonicalize($loc));
	}
	if (scalar(@m) == 0) {
		die "`$loc' is unknown\n";
	} else {
		die("`$loc' is ambiguous:\n", map { "\t$_\n" } @m, "\n");
	}
}

sub canonicalize_excludes {
	my ($lei, $excludes) = @_;
	my %x;
	for my $loc (@$excludes) {
		my @l = get_externals($lei, $loc, 1);
		$x{$_} = 1 for @l;
	}
	\%x;
}

# returns an anonymous sub which returns an array of potential results
sub complete_url_prepare {
	my $argv = $_[-1]; # $_[0] may be $lei
	# Workaround bash word-splitting URLs to ['https', ':', '//' ...]
	# Maybe there's a better way to go about this in
	# contrib/completion/lei-completion.bash
	my $re = '';
	my $cur = pop(@$argv) // '';
	if (@$argv) {
		my @x = @$argv;
		if ($cur eq ':' && @x) {
			push @x, $cur;
			$cur = '';
		}
		while (@x > 2 && $x[0] !~ /\A(?:http|nntp|imap)s?\z/i &&
				$x[1] ne ':') {
			shift @x;
		}
		if (@x >= 2) { # qw(https : hostname : 443) or qw(http :)
			$re = join('', @x);
		} else { # just filter out the flags and hope for the best
			$re = join('', grep(!/^-/, @$argv));
		}
		$re = quotemeta($re);
	}
	my $match_cb = sub {
		# the "//;" here (for AUTH=ANONYMOUS) interacts badly with
		# bash tab completion, strip it out for now since our commands
		# work w/o it.  Not sure if there's a better solution...
		$_[0] =~ s!//;AUTH=ANONYMOUS\@!//!i;
		$_[0] =~ s!;!\\;!g;
		# only return the part specified on the CLI
		# don't duplicate if already 100% completed
		$_[0] =~ /\A$re(\Q$cur\E.*)/ ? ($cur eq $1 ? () : $1) : ()
	};
	wantarray ? ($re, $cur, $match_cb) : $match_cb;
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiFinmsg.pm000066400000000000000000000010431430031475700213150ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# Finalization messages, used to queue up a bunch of messages which
# only get written out on ->DESTROY
package PublicInbox::LeiFinmsg;
use strict;
use v5.10.1;

sub new {
	my ($cls, $lei) = @_;
	bless [ @$lei{qw(2 sock)}, $$ ], $cls;
}

sub DESTROY {
	my ($self) = @_;
	my ($stderr, $sock, $pid) = splice(@$self, 0, 3);
	print $stderr @$self if $pid == $$;
	# script/lei disconnects when $sock SvREFCNT drops to zero
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiForgetExternal.pm000066400000000000000000000024121430031475700230240ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# "lei forget-external" command
package PublicInbox::LeiForgetExternal;
use strict;
use v5.10.1;

sub lei_forget_external {
	my ($lei, @locations) = @_;
	my $cfg = $lei->_lei_cfg or
		return $lei->fail('no externals configured');
	my %seen;
	for my $loc (@locations) {
		for my $l ($loc, $lei->ext_canonicalize($loc)) {
			next if $seen{$l}++;
			my $key = "external.$l.boost";
			delete($cfg->{$key});
			$lei->_config('--unset', $key);
			if ($? == 0) {
				$lei->qerr("# $l forgotten ");
			} elsif (($? >> 8) == 5) {
				warn("# $l not found\n");
			} else {
				$lei->child_error($?, "# --unset $key error");
			}
		}
	}
}

# shell completion helper called by lei__complete
sub _complete_forget_external {
	my ($lei, @argv) = @_;
	my $cfg = $lei->_lei_cfg or return ();
	my ($cur, $re, $match_cb) = $lei->complete_url_prepare(\@argv);
	# FIXME: bash completion off "http:" or "https:" when the last
	# character is a colon doesn't work properly even if we're
	# returning "//$HTTP_HOST/$PATH_INFO/", not sure why, could
	# be a bash issue.
	map {
		$match_cb->(substr($_, length('external.')));
	} grep(/\Aexternal\.$re\Q$cur/, @{$cfg->{-section_order}});
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiForgetMailSync.pm000066400000000000000000000014071430031475700227640ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# "lei forget-mail-sync" drop synchronization information
# TODO: figure out what to do about "lei index" users having
# dangling references.  Perhaps just documenting "lei index"
# use being incompatible with "forget-mail-sync" use is
# sufficient.

package PublicInbox::LeiForgetMailSync;
use strict;
use v5.10.1;
use PublicInbox::LeiRefreshMailSync;

sub lei_forget_mail_sync {
	my ($lei, @folders) = @_;
	my $lms = $lei->lms or return;
	$lms->lms_write_prepare;
	$lms->arg2folder($lei, \@folders); # may die
	$lms->forget_folders(@folders);
}

*_complete_forget_mail_sync =
	\&PublicInbox::LeiRefreshMailSync::_complete_refresh_mail_sync;

1;
public-inbox-1.9.0/lib/PublicInbox/LeiForgetSearch.pm000066400000000000000000000042321430031475700224510ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# "lei forget-search" forget/remove a saved search "lei q --save"
package PublicInbox::LeiForgetSearch;
use strict;
use v5.10.1;
use parent qw(PublicInbox::LeiUp);
use PublicInbox::LeiSavedSearch;
use File::Path ();
use SelectSaver;

sub do_forget_search {
	my ($lei, @outs) = @_;
	my @dirs; # paths in ~/.local/share/lei/saved-search/
	my $cwd;
	for my $o (@outs) {
		my $d = PublicInbox::LeiSavedSearch::lss_dir_for($lei, \$o, 1);
		if (-e $d) {
			push @dirs, $d
		} else { # keep going, like rm(1):
			$cwd //= $lei->rel2abs('.');
			warn "--save was not used with $o cwd=$cwd\n";
		}
	}
	my $save;
	my $opt = { safe => 1 };
	if ($lei->{opt}->{verbose}) {
		$opt->{verbose} = 1;
		$save = SelectSaver->new($lei->{2});
	}
	File::Path::remove_tree(@dirs, $opt);
	$lei->child_error(0) if defined $cwd;
}

sub lei_forget_search {
	my ($lei, @outs) = @_;
	my $prune = $lei->{opt}->{prune};
	$prune // return do_forget_search($lei, @outs);
	return $lei->fail("--prune and @outs incompatible") if @outs;
	my @tmp = PublicInbox::LeiSavedSearch::list($lei);
	my $self = bless { -mail_sync => 1 }, __PACKAGE__;
	$self->filter_lss($lei, $prune) // return
			$lei->fail("only --prune=$prune not understood");
	if ($self->{o_remote}) { # setup lei->{auth}
		$self->prepare_inputs($lei, $self->{o_remote}) or return;
	}
	$lei->wq1_start($self);
}

sub do_prune {
	my ($self) = @_;
	my $lei = $self->{lei};
	for my $o (@{$self->{o_local} // []}) {
		next if -e $o;
		$lei->qerr("# pruning $o");
		eval { do_forget_search($lei, $o) };
		$lei->child_error(0, "E: $@") if $@;
	}
	for my $o (@{$self->{o_remote} // []}) {
		my $uri = PublicInbox::URIimap->new($o);
		next if $lei->{net}->mic_for_folder($uri);
		$lei->qerr("# pruning $uri");
		eval { do_forget_search($lei, $o) };
		$lei->child_error(0, "E: $@") if $@;
	}
}

# called in top-level lei-daemon when LeiAuth is done
sub net_merge_all_done {
	my ($self) = @_;
	$self->wq_do('do_prune');
	$self->wq_close;
}

*_wq_done_wait = \&PublicInbox::LEI::wq_done_wait;
*_complete_forget_search = \&PublicInbox::LeiUp::_complete_up;

1;
public-inbox-1.9.0/lib/PublicInbox/LeiHelp.pm000066400000000000000000000061641430031475700207730ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 

# -h/--help support for lei
package PublicInbox::LeiHelp;
use strict;
use v5.10.1;
use Text::Wrap qw(wrap);

my %NOHELP = map { $_ => 1 } qw(mfolder);

sub call {
	my ($self, $errmsg, $CMD, $OPTDESC) = @_;
	my $cmd = $self->{cmd} // 'COMMAND';
	my @info = @{$CMD->{$cmd} // [ '...', '...' ]};
	my @top = ($cmd, shift(@info) // ());
	my $cmd_desc = shift(@info);
	$cmd_desc = $cmd_desc->($self) if ref($cmd_desc) eq 'CODE';
	$cmd_desc =~ s/default: /default:\xa0/;
	my @opt_desc;
	my $lpad = 2;
	for my $sw (grep { !ref } @info) { # ("prio=s", "z", $GLP_PASS)
		my $desc = $OPTDESC->{"$sw\t$cmd"} // $OPTDESC->{$sw} // next;
		my $arg_vals = '';
		($arg_vals, $desc) = @$desc if ref($desc) eq 'ARRAY';

		# lower-case is a keyword (e.g. `content', `oid'),
		# ALL_CAPS is a string description (e.g. `PATH')
		if ($desc !~ /default/ && $arg_vals =~ /\b([a-z]+)[,\|]/) {
			$desc .= " (default:\xa0`$1')";
		} else {
			$desc =~ s/default: /default:\xa0/;
		}
		my (@vals, @s, @l);
		my $x = $sw;
		if ($x =~ s/!\z//) { # solve! => --no-solve
			$x =~ s/(\A|\|)/$1no-/g
		} elsif ($x =~ s/\+\z//) { # verbose|v+
		} elsif ($x =~ s/:.+//) { # optional args: $x = "mid:s"
			@vals = (' [', undef, ']');
		} elsif ($x =~ s/=.+//) { # required arg: $x = "type=s"
			@vals = (' ', undef);
		} # else: no args $x = 'threads|t'

		# we support underscore options from public-inbox-* commands;
		# but they've never been documented and will likely go away.
		# $x = help|h
		for (grep { !/_/ && !$NOHELP{$_} } split(/\|/, $x)) {
			length($_) > 1 ? push(@l, "--$_") : push(@s, "-$_");
		}
		if (!scalar(@vals)) { # no args 'threads|t'
		} elsif ($arg_vals =~ s/\A([A-Z_=]+)\b//) { # "NAME"
			$vals[1] = $1;
		} else {
			$vals[1] = uc(substr($l[0], 2)); # "--type" => "TYPE"
		}
		if ($arg_vals =~ /([,\|])/) {
			my $sep = $1;
			my @allow = split(/\Q$sep\E/, $arg_vals);
			my $must = $sep eq '|' ? 'Must' : 'Can';
			@allow = map { length $_ ? "`$_'" : () } @allow;
			my $last = pop @allow;
			$desc .= "\n$must be one of: " .
				join(', ', @allow) . " or $last";
		}
		my $lhs = join(', ', @s, @l) . join('', @vals);
		if ($x =~ /\|\z/) { # "stdin|" or "clear|"
			$lhs =~ s/\A--/- , --/;
		} else {
			$lhs =~ s/\A--/    --/; # pad if no short options
		}
		$lpad = length($lhs) if length($lhs) > $lpad;
		push @opt_desc, $lhs, $desc;
	}
	my $msg = $errmsg ? "E: $errmsg\n" : '';
	$msg .= <start_pager if -t $self->{$fd};
	$msg =~ s/\xa0/ /gs; # convert NBSP to SP
	print { $self->{$fd} } $msg;
	$self->x_it($errmsg ? (1 << 8) : 0); # stderr => failure
	undef;
}

# the "lei help" command
sub lei_help { $_[0]->_help }

1;
public-inbox-1.9.0/lib/PublicInbox/LeiImport.pm000066400000000000000000000107431430031475700213530ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# front-end for the "lei import" sub-command
package PublicInbox::LeiImport;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
use PublicInbox::InboxWritable qw(eml_from_path);

# /^input_/ subs are used by (or override) PublicInbox::LeiInput superclass

sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
	my ($self, $eml, $vmd) = @_;
	my $xoids = $self->{lei}->{ale}->xoids_for($eml);
	if (my $all_vmd = $self->{all_vmd}) {
		@$vmd{keys %$all_vmd} = values %$all_vmd;
	}
	$self->{lei}->{sto}->wq_do('set_eml', $eml, $vmd, $xoids);
}

sub input_mbox_cb { # MboxReader callback
	my ($eml, $self) = @_;
	my $vmd;
	if ($self->{-import_kw}) {
		my $kw = PublicInbox::MboxReader::mbox_keywords($eml);
		$vmd = { kw => $kw } if scalar(@$kw);
	}
	input_eml_cb($self, $eml, $vmd);
}

sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn
	my ($self, $f, $fl) = @_;
	my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or
		die "BUG: $f was not from a Maildir?\n";
	my $kw = PublicInbox::MdirReader::flags2kw($fl);
	substr($folder, 0, 0) = 'maildir:'; # add prefix
	my $lse = $self->{lse} //= $self->{lei}->{sto}->search;
	my $lms = $self->{-lms_rw} //= $self->{lei}->lms; # may be 0 or undef
	my @oidbin = $lms ? $lms->name_oidbin($folder, $bn) : ();
	@oidbin > 1 and warn("W: $folder/*/$$bn not unique:\n",
				map { "\t".unpack('H*', $_)."\n" } @oidbin);
	my %seen;
	my @docids = sort { $a <=> $b } grep { !$seen{$_}++ }
			map { $lse->over->oidbin_exists($_) } @oidbin;
	my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
	if (scalar @docids) {
		$lse->kw_changed(undef, $kw, \@docids) or return;
	}
	if (my $eml = eml_from_path($f)) {
		$vmd->{sync_info} = [ $folder, \$bn ] if $self->{-mail_sync};
		$self->input_eml_cb($eml, $vmd);
	}
}

sub input_net_cb { # imap_each / nntp_each
	my ($uri, $uid, $kw, $eml, $self) = @_;
	if (defined $eml) {
		my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
		$vmd->{sync_info} = [ $$uri, $uid ] if $self->{-mail_sync};
		$self->input_eml_cb($eml, $vmd);
	} elsif (my $ikw = $self->{lei}->{ikw}) { # old message, kw only
		# we send $uri as a bare SCALAR and not a URIimap ref to
		# reduce socket traffic:
		$ikw->wq_io_do('ck_update_kw', [], $$uri, $uid, $kw);
	}
}

sub do_import_index ($$@) {
	my ($self, $lei, @inputs) = @_;
	my $sto = $lei->_lei_store(1);
	$sto->write_prepare($lei);
	$self->{-import_kw} = $lei->{opt}->{kw} // 1;
	my $vmd_mod = $self->vmd_mod_extract(\@inputs);
	return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
	$self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod;
	$lei->ale; # initialize for workers to read (before LeiPmdir->new)
	$self->{-mail_sync} = $lei->{opt}->{'mail-sync'} // 1;
	$self->prepare_inputs($lei, \@inputs) or return;

	my $j = $lei->{opt}->{jobs} // 0;
	$j =~ /\A([0-9]+),[0-9]+\z/ and $j = $1 + 0;
	$j ||= scalar(@{$self->{inputs}}) || 1;
	my $ikw;
	my $net = $lei->{net};
	if ($net) {
		# $j = $net->net_concurrency($j); TODO
		if ($lei->{opt}->{incremental} // 1) {
			$net->{incremental} = 1;
			$net->{-lms_rw} = $lei->lms // 0;
			if ($self->{-import_kw} && $net->{-lms_rw} &&
					!$lei->{opt}->{'new-only'} &&
					$net->{imap_order}) {
				require PublicInbox::LeiImportKw;
				$ikw = PublicInbox::LeiImportKw->new($lei);
				$net->{each_old} = 1;
			}
		}
	} else {
		my $nproc = $self->detect_nproc;
		$j = $nproc if $j > $nproc;
	}
	($lei->{opt}->{'new-only'} && (!$net || !$net->{imap_order})) and
		warn "# --new-only is only for IMAP\n";
	$lei->{-eml_noisy} = 1;
	$lei->{-err_type} = 'non-fatal';
	$lei->wq1_start($self, $j);
}

sub lei_import { # the main "lei import" method
	my ($lei, @inputs) = @_;
	my $self = bless {}, __PACKAGE__;
	do_import_index($self, $lei, @inputs);
}

sub _complete_import {
	my ($lei, @argv) = @_;
	my ($re, $cur, $match_cb) = $lei->complete_url_prepare(\@argv);
	my @k = $lei->url_folder_cache->keys($argv[-1] // undef, 1);
	my @L = eval { $lei->_lei_store->search->all_terms('L') };
	push(@k, map { "+L:$_" } @L);
	my @m = map { $match_cb->($_) } @k;
	my %f = map { $_ => 1 } (@m ? @m : @k);
	if (my $lms = $lei->lms) {
		@k = $lms->folders($argv[-1] // undef, 1);
		@m = map { $match_cb->($_) } @k;
		if (@m) { @f{@m} = @m } else { @f{@k} = @k }
	}
	keys %f;
}

no warnings 'once';
*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;

1;
public-inbox-1.9.0/lib/PublicInbox/LeiImportKw.pm000066400000000000000000000032571430031475700216570ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# WQ worker for dealing with LeiImport IMAP flags on already-imported messages
# WQ key: {ikw}
package PublicInbox::LeiImportKw;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC);

sub new {
	my ($cls, $lei) = @_;
	my $self = bless { -wq_ident => 'lei import_kw worker' }, $cls;
	my $j = $self->detect_nproc // 4;
	$j = 4 if $j > 4;
	my ($op_c, $ops) = $lei->workers_start($self, $j);
	$op_c->{ops} = $ops; # for PktOp->event_step
	$self->{lei_sock} = $lei->{sock};
	$lei->{ikw} = $self;
}

sub ipc_atfork_child {
	my ($self) = @_;
	my $lei = $self->{lei};
	$lei->_lei_atfork_child;
	my $net = delete $lei->{net} // die 'BUG: no lei->{net}';
	$self->{sto} = $lei->{sto} // die 'BUG: no lei->{sto}';
	$self->{verbose} = $lei->{opt}->{verbose};
	$self->{lse} = $self->{sto}->search;
	$self->{over} = $self->{lse}->over;
	$self->{-lms_rw} = $net->{-lms_rw} || die 'BUG: net->{-lms_rw} FALSE';
	$self->SUPER::ipc_atfork_child;
}

sub ck_update_kw { # via wq_io_do
	my ($self, $url, $uid, $kw) = @_;
	my @oidbin = $self->{-lms_rw}->num_oidbin($url, $uid);
	my $uid_url = "$url/;UID=$uid";
	@oidbin > 1 and warn("W: $uid_url not unique:\n",
				map { "\t".unpack('H*', $_)."\n" } @oidbin);
	my %seen;
	my @docids = sort { $a <=> $b } grep { !$seen{$_}++ }
		map { $self->{over}->oidbin_exists($_) } @oidbin;
	$self->{lse}->kw_changed(undef, $kw, \@docids) or return;
	$self->{verbose} and $self->{lei}->qerr("# $uid_url => @$kw\n");
	$self->{sto}->wq_do('set_eml_vmd', undef, { kw => $kw }, \@docids);
}

sub _lei_wq_eof { # EOF callback for main lei daemon
	$_[0]->wq_eof('ikw');
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiIndex.pm000066400000000000000000000027211430031475700211450ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# front-end for the "lei index" sub-command, this is similar to
# "lei import" but doesn't put a git blob into ~/.local/share/lei/store
package PublicInbox::LeiIndex;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
use PublicInbox::LeiImport;

# /^input_/ subs are used by (or override) PublicInbox::LeiInput superclass
sub input_eml_cb { # used by input_maildir_cb and input_net_cb
	my ($self, $eml, $vmd) = @_;
	my $xoids = $self->{lei}->{ale}->xoids_for($eml);
	if (my $all_vmd = $self->{all_vmd}) {
		@$vmd{keys %$all_vmd} = values %$all_vmd;
	}
	$self->{lei}->{sto}->wq_do('index_eml_only', $eml, $vmd, $xoids);
}

sub input_fh { # overrides PublicInbox::LeiInput::input_fh
	my ($self, $ifmt, $fh, $input, @args) = @_;
	$self->{lei}->child_error(0, <{opt}->{'mail-sync'} = 1;
	my $self = bless {}, __PACKAGE__;
	PublicInbox::LeiImport::do_import_index($self, $lei, @argv);
}

no warnings 'once';
no strict 'refs';
for my $m (qw(pmdir_cb input_net_cb)) {
	*$m = PublicInbox::LeiImport->can($m);
}

*_complete_index = \&PublicInbox::LeiImport::_complete_import;
*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;

1;
public-inbox-1.9.0/lib/PublicInbox/LeiInit.pm000066400000000000000000000021531430031475700210000ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# for the "lei init" command, not sure if it's even needed...
package PublicInbox::LeiInit;
use v5.10.1;

sub lei_init {
	my ($self, $dir) = @_;
	my $cfg = $self->_lei_cfg(1);
	my $cur = $cfg->{'leistore.dir'};
	$dir //= $self->store_path;
	$dir = $self->rel2abs($dir);
	my @cur = stat($cur) if defined($cur);
	$cur = $self->canonpath_harder($cur // $dir);
	my @dir = stat($dir);
	my $exists = "# leistore.dir=$cur already initialized" if @dir;
	if (@cur) {
		if ($cur eq $dir) {
			$self->_lei_store(1)->done;
			return $self->qerr($exists);
		}

		# some folks like symlinks and bind mounts :P
		if (@dir && "@cur[1,0]" eq "@dir[1,0]") {
			$self->_config('leistore.dir', $dir);
			$self->_lei_store(1)->done;
			return $self->qerr("$exists (as $cur)");
		}
		return $self->fail(<<"");
E: leistore.dir=$cur already initialized and it is not $dir

	}
	$self->_config('leistore.dir', $dir);
	$self->_lei_store(1)->done;
	$exists //= "# leistore.dir=$dir newly initialized";
	$self->qerr($exists);
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiInput.pm000066400000000000000000000340421430031475700211760ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# parent class for LeiImport, LeiConvert, LeiIndex
package PublicInbox::LeiInput;
use strict;
use v5.10.1;
use PublicInbox::DS;
use PublicInbox::Spawn qw(which popen_rd);
use PublicInbox::InboxWritable qw(eml_from_path);
use PublicInbox::AutoReap;

# JMAP RFC 8621 4.1.1
# https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml
our @KW = (qw(seen answered flagged draft), # widely-compatible
	qw(forwarded), # IMAP + Maildir
	qw(phishing junk notjunk)); # rarely supported

# note: RFC 8621 states "Users may add arbitrary keywords to an Email",
# but is it good idea?  Stick to the system and reserved ones, for now.
# The widely-compatible ones map to IMAP system flags, Maildir flags
# and mbox Status/X-Status headers.
my %KW = map { $_ => 1 } @KW;
my $L_MAX = 244; # Xapian term limit - length('L')

# RFC 8621, sec 2 (Mailboxes) a "label" for us is a JMAP Mailbox "name"
# "Servers MAY reject names that violate server policy"
my %ERR = (
	L => sub {
		my ($label) = @_;
		length($label) >= $L_MAX and
			return "`$label' too long (must be <= $L_MAX)";
		$label =~ m{\A[a-z0-9_](?:[a-z0-9_\-\./\@,]*[a-z0-9])?\z} ?
			undef : "`$label' is invalid";
	},
	kw => sub {
		my ($kw) = @_;
		$KW{$kw} ? undef : <{opt}->{$opt_key};
	if (!$fmt) {
		my $err = $files ? "regular file(s):\n@$files" : '--stdin';
		return $lei->fail("--$opt_key unset for $err");
	}
	return 1 if $fmt eq 'eml';
	require PublicInbox::MboxLock if $files;
	require PublicInbox::MboxReader;
	PublicInbox::MboxReader->reads($fmt) or
		return $lei->fail("--$opt_key=$fmt unrecognized");
	1;
}

sub input_mbox_cb { # base MboxReader callback
	my ($eml, $self) = @_;
	$eml->header_set($_) for (qw(Status X-Status));
	$self->input_eml_cb($eml);
}

sub input_maildir_cb {
	my ($fn, $kw, $eml, $self) = @_;
	$self->input_eml_cb($eml);
}

sub input_net_cb { # imap_each, nntp_each cb
	my ($url, $uid, $kw, $eml, $self) = @_;
	$self->input_eml_cb($eml);
}

# import a single file handle of $name
# Subclass must define ->input_eml_cb and ->input_mbox_cb
sub input_fh {
	my ($self, $ifmt, $fh, $name, @args) = @_;
	if ($ifmt eq 'eml') {
		my $buf = do { local $/; <$fh> } //
			return $self->{lei}->child_error(0, <<"");
error reading $name: $!

		# mutt pipes single RFC822 messages with a "From " line,
		# but no Content-Length or "From " escaping.
		# "git format-patch" also generates such files by default.
		$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;

		# a user may feed just a body: git diff | lei rediff -U9
		if ($self->{-force_eml}) {
			my $eml = PublicInbox::Eml->new($buf);
			substr($buf, 0, 0) = "\n\n" if !$eml->{bdy};
		}
		$self->input_eml_cb(PublicInbox::Eml->new(\$buf), @args);
	} else {
		# prepare_inputs already validated $ifmt
		my $cb = PublicInbox::MboxReader->reads($ifmt) //
				die "BUG: bad fmt=$ifmt";
		$cb->(undef, $fh, $self->can('input_mbox_cb'), $self, @args);
	}
}

# handles mboxrd endpoints described in Documentation/design_notes.txt
sub handle_http_input ($$@) {
	my ($self, $url, @args) = @_;
	my $lei = $self->{lei} or die 'BUG: {lei} missing';
	my $curl_opt = delete $self->{"-curl-$url"} or
				die("BUG: $url curl options not prepared");
	my $uri = pop @$curl_opt;
	my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return;
	push @$curl, '-s', @$curl_opt;
	my $cmd = $curl->for_uri($lei, $uri);
	$lei->qerr("# $cmd");
	my ($fh, $pid) = popen_rd($cmd, undef, { 2 => $lei->{2} });
	my $ar = PublicInbox::AutoReap->new($pid);
	grep(/\A--compressed\z/, @$curl) or
		$fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
	eval { $self->input_fh('mboxrd', $fh, $url, @args) };
	my @err = ($@ ? $@ : ());
	$ar->join;
	push(@err, "\$?=$?") if $?;
	$lei->child_error($?, "@$cmd failed: @err") if @err;
}

sub input_path_url {
	my ($self, $input, @args) = @_;
	my $lei = $self->{lei};
	my $ifmt = lc($lei->{opt}->{'in-format'} // '');
	# TODO auto-detect?
	if ($input =~ m!\Aimaps?://!i) {
		$lei->{net}->imap_each($input, $self->can('input_net_cb'),
						$self, @args);
		return;
	} elsif ($input =~ m!\A(?:nntps?|s?news)://!i) {
		$lei->{net}->nntp_each($input, $self->can('input_net_cb'),
						$self, @args);
		return;
	} elsif ($input =~ m!\Ahttps?://!i) {
		handle_http_input($self, $input, @args);
		return;
	}

	# local-only below
	my $ifmt_pfx = '';
	if ($input =~ s!\A([a-z0-9]+):!!i) {
		$ifmt_pfx = "$1:";
		$ifmt = lc($1);
	} elsif ($input =~ /\.(?:patch|eml)\z/i) {
		$ifmt = 'eml';
	} elsif (-f $input && $input =~ m{\A(?:.+)/(?:new|cur)/([^/]+)\z}) {
		my $bn = $1;
		my $fl = PublicInbox::MdirReader::maildir_basename_flags($bn);
		return if index($fl, 'T') >= 0;
		return $self->pmdir_cb($input, $fl) if $self->can('pmdir_cb');
		my $eml = eml_from_path($input) or return
			$lei->qerr("# $input not readable");
		my $kw = PublicInbox::MdirReader::flags2kw($fl);
		$self->can('input_maildir_cb')->($input, $kw, $eml, $self);
		return;
	}
	my $devfd = $lei->path_to_fd($input) // return;
	if ($devfd >= 0) {
		$self->input_fh($ifmt, $lei->{$devfd}, $input, @args);
	} elsif (-f $input && $ifmt eq 'eml') {
		open my $fh, '<', $input or
					return $lei->fail("open($input): $!");
		$self->input_fh($ifmt, $fh, $input, @args);
	} elsif (-f _) {
		my $m = $lei->{opt}->{'lock'} //
			PublicInbox::MboxLock->defaults;
		my $mbl = PublicInbox::MboxLock->acq($input, 0, $m);
		my $zsfx = PublicInbox::MboxReader::zsfx($input);
		if ($zsfx) {
			my $in = delete $mbl->{fh};
			$mbl->{fh} =
			     PublicInbox::MboxReader::zsfxcat($in, $zsfx, $lei);
		}
		local $PublicInbox::DS::in_loop = 0 if $zsfx; # dwaitpid
		$self->input_fh($ifmt, $mbl->{fh}, $input, @args);
	} elsif (-d _ && (-d "$input/cur" || -d "$input/new")) {
		return $lei->fail(<new;
		if (my $pmd = $self->{pmd}) {
			$mdr->maildir_each_file($input,
						$pmd->can('each_mdir_fn'),
						$pmd, @args);
		} else {
			$mdr->maildir_each_eml($input,
						$self->can('input_maildir_cb'),
						$self, @args);
		}
	} elsif ($self->{missing_ok} && !-e $input) { # don't ->fail
		if ($lei->{cmd} eq 'p2q') {
			my $fp = [ qw(git format-patch --stdout -1), $input ];
			my $rdr = { 2 => $lei->{2} };
			my $fh = popen_rd($fp, undef, $rdr);
			eval { $self->input_fh('eml', $fh, $input, @args) };
			my @err = ($@ ? $@ : ());
			close($fh) or push @err, "\$?=$?";
			$lei->child_error($?, "@$fp failed: @err") if @err;
		} else {
			$self->folder_missing("$ifmt:$input");
		}
	} else {
		$lei->fail("$ifmt_pfx$input unsupported (TODO)");
	}
}

# subclasses should overrride this (see LeiRefreshMailSync)
sub folder_missing { die "BUG: ->folder_missing undefined for $_[0]" }

sub bad_http ($$;$) {
	my ($lei, $url, $alt) = @_;
	my $x = $alt ? "did you mean <$alt>?" : 'download and import manually';
	$lei->fail("E: <$url> not recognized, $x");
}

sub prepare_http_input ($$$) {
	my ($self, $lei, $url) = @_;
	require URI;
	require PublicInbox::MboxReader;
	require PublicInbox::LeiCurl;
	require IO::Uncompress::Gunzip;
	$self->{curl} //= which('curl') or
				return $lei->fail("curl missing for <$url>");
	my $uri = URI->new($url);
	my $path = $uri->path;
	my %qf = $uri->query_form;
	my @curl_opt;
	if ($path =~ m!/(?:t\.mbox\.gz|all\.mbox\.gz)\z!) {
		# OK
	} elsif ($path =~ m!/raw\z!) {
		push @curl_opt, '--compressed';
	# convert search query to mboxrd request since they require POST
	# this is only intended for PublicInbox::WWW, and will false-positive
	# on many other search engines... oh well
	} elsif (defined $qf{'q'}) {
		$qf{x} = 'm';
		$uri->query_form(\%qf);
		push @curl_opt, '-d', '';
		$$uri ne $url and $lei->qerr(<<"");
# <$url> rewritten to <$$uri> with HTTP POST

	# try to provide hints for /$INBOX/$MSGID/T/ and /$INBOX/
	} elsif ($path =~ s!/[tT]/\z!/t.mbox.gz! ||
			$path =~ s!/t\.atom\z!/t.mbox.gz! ||
			$path =~ s!/([^/]+\@[^/]+)/\z!/$1/raw!) {
		$uri->path($path);
		return bad_http($lei, $url, $$uri);
	} else {
		return bad_http($lei, $url);
	}
	$self->{"-curl-$url"} = [ @curl_opt, $uri ]; # for handle_http_input
}

sub prepare_inputs { # returns undef on error
	my ($self, $lei, $inputs) = @_;
	my $in_fmt = $lei->{opt}->{'in-format'};
	my $sync = $lei->{opt}->{'mail-sync'} ? {} : undef; # using LeiMailSync
	my $may_sync = $sync || $self->{-mail_sync};
	if ($lei->{opt}->{stdin}) {
		@$inputs and return
			$lei->fail("--stdin and @$inputs do not mix");
		check_input_format($lei) or return;
		push @$inputs, '/dev/stdin';
		push @{$sync->{no}}, '/dev/stdin' if $sync;
	}
	my $net = $lei->{net}; # NetWriter may be created by l2m
	my (@f, @md);
	# e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
	for my $input (@$inputs) {
		my $input_path = $input;
		if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) {
			require PublicInbox::NetReader;
			$net //= PublicInbox::NetReader->new;
			$net->add_url($input, $self->{-ls_ok});
			push @{$sync->{ok}}, $input if $sync;
		} elsif ($input_path =~ m!\Ahttps?://!i) { # mboxrd.gz
			# TODO: how would we detect r/w JMAP?
			push @{$sync->{no}}, $input if $sync;
			prepare_http_input($self, $lei, $input_path) or return;
		} elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
			my $ifmt = lc $1;
			if (($in_fmt // $ifmt) ne $ifmt) {
				return $lei->fail(<<"");
--in-format=$in_fmt and `$ifmt:' conflict

			}
			if ($ifmt =~ /\A(?:maildir|mh)\z/i) {
				push @{$sync->{ok}}, $input if $sync;
			} else {
				push @{$sync->{no}}, $input if $sync;
			}
			my $devfd = $lei->path_to_fd($input_path) // return;
			if ($devfd >= 0 || (-f $input_path || -p _)) {
				require PublicInbox::MboxLock;
				require PublicInbox::MboxReader;
				PublicInbox::MboxReader->reads($ifmt) or return
					$lei->fail("$ifmt not supported");
			} elsif (-d $input_path) {
				$ifmt eq 'maildir' or return
					$lei->fail("$ifmt not supported");
				$may_sync and $input = 'maildir:'.
						$lei->abs_path($input_path);
				push @md, $input;
			} elsif ($self->{missing_ok} && !-e _) {
				# for "lei rm-watch" on missing Maildir
				$may_sync and $input = 'maildir:'.
						$lei->abs_path($input_path);
			} else {
				my $m = "Unable to handle $input";
				$input =~ /\A(?:L|kw):/ and
					$m .= ", did you mean +$input?";
				return $lei->fail($m);
			}
		} elsif ($input =~ /\.(?:eml|patch)\z/i && -f $input) {
			lc($in_fmt//'eml') eq 'eml' or return $lei->fail(<<"");
$input is `eml', not --in-format=$in_fmt

			push @{$sync->{no}}, $input if $sync;
		} elsif (-f $input && $input =~ m{\A(.+)/(new|cur)/([^/]+)\z}) {
			# single file in a Maildir
			my ($mdir, $nc, $bn) = ($1, $2, $3);
			my $other = $mdir . ($nc eq 'new' ? '/cur' : '/new');
			return $lei->fail(<fail(<<"");
$input is `eml', not --in-format=$in_fmt

			if ($sync) {
				$input = $lei->abs_path($mdir) . "/$nc/$bn";
				push @{$sync->{ok}}, $input if $sync;
			}
			require PublicInbox::MdirReader;
		} else {
			my $devfd = $lei->path_to_fd($input) // return;
			if ($devfd >= 0 || -f $input || -p _) {
				push @{$sync->{no}}, $input if $sync;
				push @f, $input;
			} elsif (-d "$input/new" && -d "$input/cur") {
				if ($may_sync) {
					$input = 'maildir:'.
						$lei->abs_path($input);
					push @{$sync->{ok}}, $input if $sync;
				}
				push @md, $input;
			} elsif ($self->{missing_ok} && !-e $input) {
				if ($lei->{cmd} eq 'p2q') {
					# will run "git format-patch"
				} elsif ($may_sync) { # for lei rm-watch
					$input = 'maildir:'.
						$lei->abs_path($input);
				}
			} else {
				return $lei->fail("Unable to handle $input")
			}
		}
	}
	if (@f) { check_input_format($lei, \@f) or return }
	if ($sync && $sync->{no}) {
		return $lei->fail(<<"") if !$sync->{ok};
--mail-sync specified but no inputs support it

		# non-fatal if some inputs support support sync
		warn("# --mail-sync will only be used for @{$sync->{ok}}\n");
		warn("# --mail-sync is not supported for: @{$sync->{no}}\n");
	}
	if ($net) {
		$net->{-can_die} = 1;
		if (my $err = $net->errors($lei)) {
			return $lei->fail($err);
		}
		$net->{quiet} = $lei->{opt}->{quiet};
		require PublicInbox::LeiAuth;
		$lei->{auth} //= PublicInbox::LeiAuth->new;
		$lei->{net} //= $net;
	}
	if (scalar(@md)) {
		require PublicInbox::MdirReader;
		if ($self->can('pmdir_cb')) {
			require PublicInbox::LeiPmdir;
			$self->{pmd} = PublicInbox::LeiPmdir->new($lei, $self);
		}

		# start watching Maildirs ASAP
		if ($may_sync && $lei->{sto}) {
			grep(!m!\Amaildir:/!i, @md) and die "BUG: @md (no pfx)";
			$lei->lms(1)->lms_write_prepare->add_folders(@md);
			$lei->refresh_watches;
		}
	}
	$self->{inputs} = $inputs;
}

sub process_inputs {
	my ($self) = @_;
	my $err;
	for my $input (@{$self->{inputs}}) {
		eval { $self->input_path_url($input) };
		next unless $@;
		$err = "$input: $@";
		last;
	}
	# always commit first, even on error partial work is acceptable for
	# lei 
	my $wait = $self->{lei}->{sto}->wq_do('done') if $self->{lei}->{sto};
	$self->{lei}->fail($err) if $err;
}

sub input_only_atfork_child {
	my ($self) = @_;
	my $lei = $self->{lei};
	$lei->_lei_atfork_child;
	PublicInbox::IPC::ipc_atfork_child($self);
	$lei->{auth}->do_auth_atfork($self) if $lei->{auth};
	undef;
}

# alias this as "net_merge_all_done" to use as an LeiAuth callback
sub input_only_net_merge_all_done {
	my ($self) = @_;
	$self->wq_io_do('process_inputs');
	$self->wq_close;
}

# like Getopt::Long, but for +kw:FOO and -kw:FOO to prepare
# for update_xvmd -> update_vmd
# returns something like { "+L" => [ @Labels ], ... }
sub vmd_mod_extract {
	my $argv = $_[-1];
	my $vmd_mod = {};
	my @new_argv;
	for my $x (@$argv) {
		if ($x =~ /\A(\+|\-)(kw|L):(.+)\z/) {
			my ($op, $pfx, $val) = ($1, $2, $3);
			if (my $err = $ERR{$pfx}->($val)) {
				push @{$vmd_mod->{err}}, $err;
			} else { # set "+kw", "+L", "-L", "-kw"
				push @{$vmd_mod->{$op.$pfx}}, $val;
			}
		} else {
			push @new_argv, $x;
		}
	}
	@$argv = @new_argv;
	$vmd_mod;
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiInspect.pm000066400000000000000000000203741430031475700215070ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# "lei inspect" general purpose inspector for stuff in SQLite and
# Xapian.  Will eventually be useful with plain public-inboxes,
# not just lei/store.  This is totally half-baked at the moment
# but useful for testing.
package PublicInbox::LeiInspect;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC);
use PublicInbox::Config;
use PublicInbox::MID qw(mids);
use PublicInbox::NetReader qw(imap_uri nntp_uri);
use POSIX qw(strftime);
use PublicInbox::LeiOverview;
*iso8601 = \&PublicInbox::LeiOverview::iso8601;

sub _json_prep ($) {
	my ($smsg) = @_;
	$smsg->{$_} += 0 for qw(bytes lines); # integerize
	$smsg->{dt} = iso8601($smsg->{ds}) if defined($smsg->{ds});
	$smsg->{rt} = iso8601($smsg->{ts}) if defined($smsg->{ts});
	+{ %$smsg } # unbless and scalarize
}

sub inspect_blob ($$) {
	my ($lei, $oidhex) = @_;
	my $ent = {};
	if (my $lse = $lei->{lse}) {
		my $oidbin = pack('H*', $oidhex);
		my @docids = $lse ? $lse->over->oidbin_exists($oidbin) : ();
		$ent->{'lei/store'} = \@docids if @docids;
		my $lms = $lei->lms;
		if (my $loc = $lms ? $lms->locations_for($oidbin) : undef) {
			$ent->{'mail-sync'} = $loc;
		}
	}
	$ent;
}

sub inspect_imap_uid ($$) {
	my ($lei, $uid_uri) = @_;
	my $ent = {};
	my $lms = $lei->lms or return $ent;
	my @oidhex = $lms->imap_oidhex($lei, $uid_uri);
	$ent->{$$uid_uri} = @oidhex == 1 ? $oidhex[0] :
			((@oidhex == 0) ? undef : \@oidhex);
	$ent;
}

sub inspect_nntp_range {
	my ($lei, $uri) = @_;
	my ($ng, $beg, $end) = $uri->group;
	$uri = $uri->clone;
	$uri->group($ng);
	my $ent = {};
	my $ret = { "$uri" => $ent };
	my $lms = $lei->lms or return $ret;
	my $folders = [ $$uri ];
	eval { $lms->arg2folder($lei, $folders) };
	$lei->qerr("# no folders match $$uri (non-fatal)") if $@;
	$end //= $beg;
	for my $art ($beg..$end) {
		my @oidhex = map { unpack('H*', $_) }
			$lms->num_oidbin($folders->[0], $art);
		$ent->{$art} = @oidhex == 1 ? $oidhex[0] :
				((@oidhex == 0) ? undef : \@oidhex);
	}
	$ret;
}

sub inspect_sync_folder ($$) {
	my ($lei, $folder) = @_;
	my $ent = {};
	my $lms = $lei->lms or return $ent;
	my $folders = [ $folder ];
	eval { $lms->arg2folder($lei, $folders) };
	$lei->qerr("# no folders match $folder (non-fatal)") if $@;
	for my $f (@$folders) {
		$ent->{$f} = $lms->location_stats($f); # may be undef
	}
	$ent
}

sub _inspect_doc ($$) {
	my ($ent, $doc) = @_;
	my $data = $doc->get_data;
	$ent->{data_length} = length($data);
	$ent->{description} = $doc->get_description;
	$ent->{$_} = $doc->$_ for (qw(termlist_count values_count));
	my $cur = $doc->termlist_begin;
	my $end = $doc->termlist_end;
	for (; $cur != $end; $cur++) {
		my $tn = $cur->get_termname;
		$tn =~ s/\A([A-Z]+)// or warn "$tn no prefix! (???)";
		my $term = ($1 // '');
		push @{$ent->{terms}->{$term}}, $tn;
	}
	@$_ = sort(@$_) for values %{$ent->{terms} // {}};
	$cur = $doc->values_begin;
	$end = $doc->values_end;
	for (; $cur != $end; $cur++) {
		my $n = $cur->get_valueno;
		my $v = $cur->get_value;
		my $iv = PublicInbox::Search::sortable_unserialise($v);
		$v = $iv + 0 if defined $iv;
		# not using ->[$n] since we may have large gaps in $n
		$ent->{'values'}->{$n} = $v;
	}
	$ent;
}

sub inspect_docid ($$;$) {
	my ($lei, $docid, $ent) = @_;
	require PublicInbox::Search;
	$ent //= {};
	my $xdb;
	if ($xdb = delete $ent->{xdb}) { # from inspect_num
	} elsif (defined(my $dir = $lei->{opt}->{dir})) {
		no warnings 'once';
		$xdb = $PublicInbox::Search::X{Database}->new($dir);
	} elsif ($lei->{lse}) {
		$xdb = $lei->{lse}->xdb;
	}
	$xdb or return $lei->fail('no Xapian DB');
	my $doc = $xdb->get_document($docid); # raises
	$ent->{docid} = $docid;
	_inspect_doc($ent, $doc);
}

sub dir2ibx ($$) {
	my ($lei, $dir) = @_;
	if (-f "$dir/ei.lock") {
		require PublicInbox::ExtSearch;
		PublicInbox::ExtSearch->new($dir);
	} elsif (-f "$dir/inbox.lock" || -d "$dir/public-inbox") {
		require PublicInbox::Inbox; # v2, v1
		bless { inboxdir => $dir }, 'PublicInbox::Inbox';
	} else {
		$lei->fail("no (indexed) inbox or extindex at $dir");
	}
}

sub inspect_num ($$) {
	my ($lei, $num) = @_;
	my ($docid, $ibx);
	my $ent = { num => $num };
	if (defined(my $dir = $lei->{opt}->{dir})) {
		$ibx = dir2ibx($lei, $dir) or return;
		if (my $srch = $ibx->search) {
			$ent->{xdb} = $srch->xdb and
				$docid = $srch->num2docid($num);
		}
	} elsif ($lei->{lse}) {
		$ibx = $lei->{lse};
		$lei->{lse}->xdb; # set {nshard} for num2docid
		$docid = $lei->{lse}->num2docid($num);
	}
	if ($ibx && $ibx->over) {
		my $smsg = $ibx->over->get_art($num);
		$ent->{smsg} = _json_prep($smsg) if $smsg;
	}
	defined($docid) ? inspect_docid($lei, $docid, $ent) : $ent;
}

sub inspect_mid ($$) {
	my ($lei, $mid) = @_;
	my $ibx;
	my $ent = { mid => $mid };
	if (defined(my $dir = $lei->{opt}->{dir})) {
		$ibx = dir2ibx($lei, $dir)
	} else {
		$ibx = $lei->{lse};
	}
	if ($ibx && $ibx->over) {
		my ($id, $prev);
		while (my $smsg = $ibx->over->next_by_mid($mid, \$id, \$prev)) {
			push @{$ent->{smsg}}, _json_prep($smsg);
		}
	}
	if ($ibx && $ibx->search) {
		my $mset = $ibx->search->mset(qq{mid:"$mid"});
		for (sort { $a->get_docid <=> $b->get_docid } $mset->items) {
			my $tmp = { docid => $_->get_docid };
			_inspect_doc($tmp, $_->get_document);
			push @{$ent->{xdoc}}, $tmp;
		}
	}
	$ent;
}

sub inspect1 ($$$) {
	my ($lei, $item, $more) = @_;
	my $ent;
	if ($item =~ /\Ablob:(.+)/) {
		$ent = inspect_blob($lei, $1);
	} elsif ($item =~ m!\A(?:maildir|mh):!i || -d $item) {
		$ent = inspect_sync_folder($lei, $item);
	} elsif ($item =~ m!\Adocid:([0-9]+)\z!) {
		$ent = inspect_docid($lei, $1 + 0);
	} elsif ($item =~ m!\Anum:([0-9]+)\z!) {
		$ent = inspect_num($lei, $1 + 0);
	} elsif ($item =~ m!\A(?:mid|m):(.+)\z!) {
		$ent = inspect_mid($lei, $1);
	} elsif (my $iuri = imap_uri($item)) {
		if (defined($iuri->uid)) {
			$ent = inspect_imap_uid($lei, $iuri);
		} else {
			$ent = inspect_sync_folder($lei, $item);
		}
	} elsif (my $nuri = nntp_uri($item)) {
		if (defined(my $mid = $nuri->message)) {
			$ent = inspect_mid($lei, $mid);
		} else {
			my ($group, $beg, $end) = $nuri->group;
			if (defined($beg)) {
				$ent = inspect_nntp_range($lei, $nuri);
			} else {
				$ent = inspect_sync_folder($lei, $item);
			}
		}
	} else { # TODO: more things
		return $lei->fail("$item not understood");
	}
	$lei->out($lei->{json}->encode($ent));
	$lei->out(',') if $more;
	1;
}

sub inspect_argv { # via wq_do
	my ($self) = @_;
	my ($lei, $argv) = delete @$self{qw(lei argv)};
	my $multi = scalar(@$argv) > 1;
	$lei->{1}->autoflush(0);
	$lei->out('[') if $multi;
	while (defined(my $x = shift @$argv)) {
		eval { inspect1($lei, $x, scalar(@$argv)) or return };
		warn "E: $@\n" if $@;
	}
	$lei->out(']') if $multi;
}

sub inspect_start ($$) {
	my ($lei, $argv) = @_;
	my $self = bless { lei => $lei, argv => $argv }, __PACKAGE__;
	my ($op_c, $ops) = $lei->workers_start($self, 1);
	$lei->{wq1} = $self;
	$lei->wait_wq_events($op_c, $ops);
	$self->wq_do('inspect_argv');
	$self->wq_close;
}

sub ins_add { # InputPipe->consume callback
	my ($lei) = @_; # $_[1] = $rbuf
	if (defined $_[1]) {
		$_[1] eq '' and return eval {
			my $str = delete $lei->{istr};
			$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
			my $eml = PublicInbox::Eml->new(\$str);
			inspect_start($lei, [
				'blob:'.$lei->git_oid($eml)->hexdigest,
				map { "mid:$_" } @{mids($eml)} ]);
		};
		$lei->{istr} .= $_[1];
	} else {
		$lei->fail("error reading stdin: $!");
	}
}

sub lei_inspect {
	my ($lei, @argv) = @_;
	$lei->{json} = ref(PublicInbox::Config::json())->new->utf8->canonical;
	$lei->{lse} = ($lei->{opt}->{external} // 1) ? do {
		my $sto = $lei->_lei_store;
		$sto ? $sto->search : undef;
	} : undef;
	my $isatty = -t $lei->{1};
	$lei->{json}->pretty(1)->indent(2) if $lei->{opt}->{pretty} || $isatty;
	$lei->start_pager if $isatty;
	if ($lei->{opt}->{stdin}) {
		return $lei->fail(<<'') if @argv;
no args allowed on command-line with --stdin

		require PublicInbox::InputPipe;
		PublicInbox::InputPipe::consume($lei->{0}, \&ins_add, $lei);
	} else {
		inspect_start($lei, \@argv);
	}
}

sub _complete_inspect {
	require PublicInbox::LeiRefreshMailSync;
	PublicInbox::LeiRefreshMailSync::_complete_refresh_mail_sync(@_);
	# TODO: message-ids?, blobs? could get expensive...
}

sub ipc_atfork_child {
	my ($self) = @_;
	$self->{lei}->_lei_atfork_child;
	$self->SUPER::ipc_atfork_child;
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiLcat.pm000066400000000000000000000112541430031475700207620ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# lcat: local cat, display a local message by Message-ID or blob,
# extracting from URL necessary
# "lei lcat "
package PublicInbox::LeiLcat;
use strict;
use v5.10.1;
use PublicInbox::LeiViewText;
use URI::Escape qw(uri_unescape);
use PublicInbox::MID qw($MID_EXTRACT);

sub lcat_folder ($$;$$) {
	my ($lei, $folder, $beg, $end) = @_;
	my $lms = $lei->{-lms_rw} //= $lei->lms // return;
	my $folders = [ $folder ];
	eval { $lms->arg2folder($lei, $folders) };
	return $lei->child_error(0, "# unknown folder: $folder") if $@;
	my %range;
	if (defined($beg)) { # NNTP article range
		$range{min} = $beg;
		$range{max} = $end // $beg;
	}
	for my $f (@$folders) {
		my $fid = $lms->fid_for($f);
		push @{$lei->{lcat_todo}}, { fid => $fid, %range };
	}
}

sub lcat_imap_uri ($$) {
	my ($lei, $uri) = @_;
	# cf. LeiXSearch->lcat_dump
	my $lms = $lei->{-lms_rw} //= $lei->lms // return;
	if (defined $uri->uid) {
		push @{$lei->{lcat_todo}}, $lms->imap_oidhex($lei, $uri);
	} elsif (defined(my $fid = $lms->fid_for($$uri))) {
		push @{$lei->{lcat_todo}}, { fid => $fid };
	} else {
		lcat_folder($lei, $$uri);
	}
}

sub lcat_nntp_uri ($$) {
	my ($lei, $uri) = @_;
	my $mid = $uri->message; # already unescaped by URI::news
	return "mid:$mid" if defined($mid);
	my $lms = $lei->{-lms_rw} //= $lei->lms // return;
	my ($ng, $beg, $end) = $uri->group;
	$uri->group($ng);
	lcat_folder($lei, $$uri, $beg, $end);
	'""';
}

sub extract_1 ($$) {
	my ($lei, $x) = @_;
	if ($x =~ m!\b(maildir:.+)!i) {
		lcat_folder($lei, $1);
		'""'; # blank query, using {lcat_todo}
	} elsif ($x =~ m!\b(([a-z]+)://\S+)!i) {
		my ($u, $scheme) = ($1, $2);
		$u =~ s/[\>\]\)\,\.\;]+\z//;
		if ($scheme =~ m!\A(imaps?)\z!i) {
			require PublicInbox::URIimap;
			lcat_imap_uri($lei, PublicInbox::URIimap->new($u));
			return '""'; # blank query, using {lcat_todo}
		} elsif ($scheme =~ m!\A(?:nntps?|s?news)\z!i) {
			require PublicInbox::URInntps;
			$u = PublicInbox::URInntps->new($u);
			return lcat_nntp_uri($lei, $u);
		} # http, or something else:
		require URI;
		$u = URI->new($u);
		my $p = $u->path;
		my $term;
		if ($p =~ m!([^/]+\@[^/]+)!) { # common msgid pattern
			$term = 'mid:'.uri_unescape($1);

			# is it a URL which returns the full thread?
			if ($u->scheme =~ /\Ahttps?/i &&
				$p =~ m!/(?:T/?|t/?|t\.mbox\.gz|t\.atom)\b!) {

				$lei->{mset_opt}->{threads} = 1;
			}
		} elsif ($u->scheme =~ /\Ahttps?/i &&
				# some msgids don't have '@', see if it looks like
				# a public-inbox URL:
				$p =~ m!/([^/]+)/(raw|t/?|T/?|
					t\.mbox\.gz|t\.atom)\z!x) {
			$lei->{mset_opt}->{threads} = 1 if $2 && $2 ne 'raw';
			$term = 'mid:'.uri_unescape($1);
		}
		$term;
	} elsif ($x =~ $MID_EXTRACT) { # <$MSGID>
		"mid:$1";
	} elsif ($x =~ /\b((?:m|mid):\S+)/) { # our own prefixes (and mairix)
		$1;
	} elsif ($x =~ /\bid:(\S+)/) { # notmuch convention
		"mid:$1";
	} elsif ($x =~ /\bblob:([0-9a-f]{7,})\b/) {
		push @{$lei->{lcat_todo}}, $1; # cf. LeiToMail->wq_atexit_child
		'""'; # blank query
	} else {
		undef;
	}
}

sub extract_all {
	my ($lei, @argv) = @_;
	my $strict = !$lei->{opt}->{stdin};
	my @q;
	for my $x (@argv) {
		if (my $term = extract_1($lei, $x)) {
			push @q, $term;
		} elsif ($strict) {
			return $lei->fail(<<"");
could not extract Message-ID from $x

		}
	}
	delete $lei->{-lms_rw};
	@q ? join(' OR ', @q) : $lei->fail("no Message-ID in: @argv");
}

sub _stdin { # PublicInbox::InputPipe::consume callback for --stdin
	my ($lei) = @_; # $_[1] = $rbuf
	$_[1] // return $lei->fail("error reading stdin: $!");
	return $lei->{mset_opt}->{qstr} .= $_[1] if $_[1] ne '';
	eval {
		$lei->fchdir;
		my @argv = split(/\s+/, $lei->{mset_opt}->{qstr});
		$lei->{mset_opt}->{qstr} = extract_all($lei, @argv) or return;
		$lei->_start_query;
	};
	$lei->fail($@) if $@;
}

sub lei_lcat {
	my ($lei, @argv) = @_;
	my $lxs = $lei->lxs_prepare or return;
	$lei->ale->refresh_externals($lxs, $lei);
	$lei->_lei_store(1);
	my $opt = $lei->{opt};
	my %mset_opt;
	$mset_opt{asc} = $opt->{'reverse'} ? 1 : 0;
	$opt->{sort} //= 'relevance';
	$mset_opt{relevance} = 1;
	$lei->{mset_opt} = \%mset_opt;
	$opt->{'format'} //= 'text' unless defined($opt->{output});
	if ($lei->{opt}->{stdin}) {
		return $lei->fail(<<'') if @argv;
no args allowed on command-line with --stdin

		require PublicInbox::InputPipe;
		PublicInbox::InputPipe::consume($lei->{0}, \&_stdin, $lei);
		return;
	}
	$lei->{mset_opt}->{qstr} = extract_all($lei, @argv) or return;
	$lei->_start_query;
}

sub _complete_lcat {
	require PublicInbox::LeiRefreshMailSync;
	PublicInbox::LeiRefreshMailSync::_complete_refresh_mail_sync(@_);
	# TODO: message-ids?, blobs? could get expensive...
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiLsExternal.pm000066400000000000000000000020211430031475700221500ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# "lei ls-external" command
package PublicInbox::LeiLsExternal;
use strict;
use v5.10.1;

# TODO: does this need JSON output?
sub lei_ls_external {
	my ($lei, $filter) = @_;
	my $do_glob = !$lei->{opt}->{globoff}; # glob by default
	my ($OFS, $ORS) = $lei->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
	$filter //= '*';
	my $re = $do_glob ? $lei->glob2re($filter) : undef;
	$re //= index($filter, '/') < 0 ?
			qr!/\Q$filter\E/?\z! : # exact basename match
			qr/\Q$filter\E/; # grep -F semantics
	my @ext = $lei->externals_each(my $boost = {});
	@ext = $lei->{opt}->{'invert-match'} ? grep(!/$re/, @ext)
					: grep(/$re/, @ext);
	if ($lei->{opt}->{'local'} && !$lei->{opt}->{remote}) {
		@ext = grep(!m!\A[a-z\+]+://!, @ext);
	} elsif ($lei->{opt}->{remote} && !$lei->{opt}->{'local'}) {
		@ext = grep(m!\A[a-z\+]+://!, @ext);
	}
	for my $loc (@ext) {
		$lei->out($loc, $OFS, 'boost=', $boost->{$loc}, $ORS);
	}
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiLsLabel.pm000066400000000000000000000007231430031475700214140ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# "lei ls-label" command
package PublicInbox::LeiLsLabel;
use strict;
use v5.10.1;

sub lei_ls_label { # the "lei ls-label" method
	my ($lei, @argv) = @_;
	# TODO: document stats/counts (expensive)
	my @L = eval { $lei->_lei_store->search->all_terms('L') };
	my $ORS = $lei->{opt}->{z} ? "\0" : "\n";
	$lei->out(map { $_.$ORS } @L);
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiLsMailSource.pm000066400000000000000000000075471430031475700224530ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# command for listing NNTP groups and IMAP folders,
# handy for users with git-credential-helper configured
# TODO: list JMAP labels
package PublicInbox::LeiLsMailSource;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC PublicInbox::LeiInput);

sub input_path_url { # overrides LeiInput version
	my ($self, $url) = @_;
	# TODO: support ndjson and other JSONs we support elsewhere
	my $lei = $self->{lei};
	my $json = $lei->{json};
	my $ORS = $self->{lei}->{opt}->{z} ? "\0" : "\n";
	my @f;
	if ($url =~ m!\Aimaps?://!i) {
		my $uri = PublicInbox::URIimap->new($url);
		my $sec = $lei->{net}->can('uri_section')->($uri);
		my $mic = $lei->{net}->mic_get($uri);
		my $l = $mic->folders_hash($uri->path); # server-side filter
		@$l = map { $_->[2] } # undo Schwartzian transform below:
			sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] }
			map { # prepare to sort -imapd slices numerically
				$_->{name} =~ /\A(.+?)\.([0-9]+)\z/ ?
				[ $1, $2 + 0, $_ ] : [ $_->{name}, -1, $_ ];
			} @$l;
		@f = map { "$sec/$_->{name}" } @$l;
		if ($json) {
			$_->{url} = "$sec/$_->{name}" for @$l;
			$lei->puts($json->encode($l));
		} else {
			if ($self->{lei}->{opt}->{url}) {
				$_->{name} = "$sec/$_->{name}" for @$l;
			}
			$lei->out(join($ORS, (map { $_->{name} } @$l), ''));
		}
	} elsif ($url =~ m!\A(?:nntps?|s?news)://!i) {
		my $uri = PublicInbox::URInntps->new($url);
		my $nn = $lei->{net}->nn_get($uri);
		my $l = $nn->newsgroups($uri->group); # name => description
		my $sec = $lei->{net}->can('uri_section')->($uri);
		if ($json) {
			my $all = $nn->list;
			my @x;
			for my $ng (sort keys %$l) {
				my $desc = $l->{$ng};

# we need to drop CR ourselves iff using IO::Socket::SSL since
# Net::Cmd::getline doesn't get used by Net::NNTP if TLS is in play, noted in:
# 
				$desc =~ s/\r\z//;

				my ($high, $low, $status) = @{$all->{$ng}};
				push @x, { name => $ng, url => "$sec/$ng",
					low => $low + 0,
					high => $high + 0, status => $status,
					description => $desc };
			}
			@f = map { "$sec/$_" } keys %$all;
			$lei->puts($json->encode(\@x));
		} else {
			@f = map { "$sec/$_" } keys %$l;
			if ($self->{lei}->{opt}->{url}) {
				$lei->out(join($ORS, sort(@f), ''));
			} else {
				$lei->out(join($ORS, sort(keys %$l), ''));
			}
		}
	} else { die "BUG: $url not supported" }
	if (@f) {
		my $fc = $lei->url_folder_cache;
		my $lk = $fc->lock_for_scope;
		$fc->dbh->begin_work;
		my $now = time;
		$fc->set($_, $now) for @f;
		$fc->dbh->commit;
	}
}

sub lei_ls_mail_source {
	my ($lei, $url, $pfx) = @_;
	$url =~ m!\A(?:imaps?|nntps?|s?news)://!i or return
		$lei->fail('only NNTP and IMAP URLs supported');
	my $self = bless { pfx => $pfx, -ls_ok => 1 }, __PACKAGE__;
	$self->{cfg} = $lei->_lei_cfg; # may be undef
	$self->prepare_inputs($lei, [ $url ]) or return;
	my $isatty = -t $lei->{1};
	if ($lei->{opt}->{l}) {
		my $json = ref(PublicInbox::Config->json)->new->utf8->canonical;
		$lei->{json} = $json;
		$json->ascii(1) if $lei->{opt}->{ascii};
		$json->pretty(1)->indent(2) if $isatty || $lei->{opt}->{pretty};
	}
	$lei->start_pager if $isatty;
	$lei->{-err_type} = 'non-fatal';
	$lei->wq1_start($self);
}

sub _complete_ls_mail_source {
	my ($lei, @argv) = @_;
	my $match_cb = $lei->complete_url_prepare(\@argv);
	my @k = $lei->url_folder_cache->keys($argv[-1] // undef, 1);
	my @m = map { $match_cb->($_) } @k;
	my %f = map { $_ => 1 } (@m ? @m : @k);
	if (my $lms = $lei->lms) {
		@k = $lms->folders($argv[-1] // undef, 1);
		@m = map { $match_cb->($_) } grep(m!\A[a-z]+://!, @k);
		if (@m) { @f{@m} = @m } else { @f{@k} = @k }
	}
	keys %f;
}

no warnings 'once';
*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;

1;
public-inbox-1.9.0/lib/PublicInbox/LeiLsMailSync.pm000066400000000000000000000014411430031475700221120ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# front-end for the "lei ls-mail-sync" sub-command
package PublicInbox::LeiLsMailSync;
use strict;
use v5.10.1;
use PublicInbox::LeiMailSync;

sub lei_ls_mail_sync {
	my ($lei, $filter) = @_;
	my $lms = $lei->lms or return;
	my $opt = $lei->{opt};
	my $re = $opt->{globoff} ? undef : $lei->glob2re($filter // '*');
	$re //= qr/\Q$filter\E/;
	my @f = $lms->folders;
	@f = $opt->{'invert-match'} ? grep(!/$re/, @f) : grep(/$re/, @f);
	if ($opt->{'local'} && !$opt->{remote}) {
		@f = grep(!m!\A[a-z\+]+://!i, @f);
	} elsif ($opt->{remote} && !$opt->{'local'}) {
		@f = grep(m!\A[a-z\+]+://!i, @f);
	}
	my $ORS = $opt->{z} ? "\0" : "\n";
	$lei->out(join($ORS, @f, ''));
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiLsSearch.pm000066400000000000000000000055531430031475700216100ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# "lei ls-search" to display results saved via "lei q --save"
package PublicInbox::LeiLsSearch;
use strict;
use v5.10.1;
use PublicInbox::LeiSavedSearch;
use parent qw(PublicInbox::IPC);

sub do_ls_search_long {
	my ($self, $pfx) = @_;
	# TODO: share common JSON output code with LeiOverview
	my $json = $self->{json}->new->utf8->canonical;
	my $lei = $self->{lei};
	$json->ascii(1) if $lei->{opt}->{ascii};
	my $fmt = $lei->{opt}->{'format'};
	$lei->{1}->autoflush(0);
	my $ORS = "\n";
	my $pretty = $lei->{opt}->{pretty};
	my $EOR;  # TODO: compact pretty like "lei q"
	if ($fmt =~ /\A(concat)?json\z/ && $pretty) {
		$EOR = ($1//'') eq 'concat' ? "\n}" : "\n},";
	}
	if ($fmt eq 'json') {
		$lei->out('[');
		$ORS = ",\n";
	}
	my @x = sort(grep(/\A\Q$pfx/, PublicInbox::LeiSavedSearch::list($lei)));
	while (my $x = shift @x) {
		$ORS = '' if !scalar(@x);
		my $lss = PublicInbox::LeiSavedSearch->up($lei, $x) or next;
		my $cfg = $lss->{-cfg};
		my $ent = {
			q => $cfg->get_all('lei.q'),
			output => $cfg->{'lei.q.output'},
		};
		for my $k ($lss->ARRAY_FIELDS) {
			my $ary = $cfg->get_all("lei.q.$k") // next;
			$ent->{$k} = $ary;
		}
		for my $k ($lss->BOOL_FIELDS) {
			my $val = $cfg->{"lei.q.$k"} // next;
			$ent->{$k} = $val;
		}
		if (defined $EOR) { # pretty, but compact
			$EOR = "\n}" if !scalar(@x);
			my $buf = "{\n";
			$buf .= join(",\n", map {;
				my $f = $_;
				if (my $v = $ent->{$f}) {
					$v = $json->encode([$v]);
					qq{  "$f": }.substr($v, 1, -1);
				} else {
					();
				}
			# key order by importance
			} (qw(output q), $lss->ARRAY_FIELDS,
				$lss->BOOL_FIELDS) );
			$lei->out($buf .= $EOR);
		} else {
			$lei->out($json->encode($ent), $ORS);
		}
	}
	if ($fmt eq 'json') {
		$lei->out("]\n");
	} elsif ($fmt eq 'concatjson') {
		$lei->out("\n");
	}
}

sub bg_worker ($$$) {
	my ($lei, $pfx, $json) = @_;
	my $self = bless { json => $json }, __PACKAGE__;
	my ($op_c, $ops) = $lei->workers_start($self, 1);
	$lei->{wq1} = $self;
	$self->wq_io_do('do_ls_search_long', [], $pfx);
	$self->wq_close;
	$lei->wait_wq_events($op_c, $ops);
}

sub lei_ls_search {
	my ($lei, $pfx) = @_;
	my $fmt = $lei->{opt}->{'format'} // '';
	if ($lei->{opt}->{l}) {
		$lei->{opt}->{'format'} //= $fmt = 'json';
	}
	my $json;
	my $tty = -t $lei->{1};
	$lei->start_pager if $tty;
	if ($fmt =~ /\A(ldjson|ndjson|jsonl|(?:concat)?json)\z/) {
		$lei->{opt}->{pretty} //= $tty;
		$json = ref(PublicInbox::Config->json);
	} elsif ($fmt ne '') {
		return $lei->fail("unknown format: $fmt");
	}
	my $ORS = "\n";
	if ($lei->{opt}->{z}) {
		return $lei->fail('-z and --format do not mix') if $json;
		$ORS = "\0";
	}
	$pfx //= '';
	return bg_worker($lei, $pfx, $json) if $json;
	for (sort(grep(/\A\Q$pfx/, PublicInbox::LeiSavedSearch::list($lei)))) {
		$lei->out($_, $ORS);
	}
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiLsWatch.pm000066400000000000000000000005461430031475700214460ustar00rootroot00000000000000# Copyright all contributors 
# License: AGPL-3.0+ 

package PublicInbox::LeiLsWatch;
use strict;
use v5.10.1;

sub lei_ls_watch {
	my ($lei) = @_;
	my $cfg = $lei->_lei_cfg or return;
	my @w = (join("\n", keys %$cfg) =~ m/^watch\.(.+?)\.state$/sgm);
	$lei->puts(join("\n", @w)) if @w;
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiMailDiff.pm000066400000000000000000000061761430031475700215610ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# The "lei mail-diff" sub-command, diffs input contents against
# the first message of input
package PublicInbox::LeiMailDiff;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
use File::Temp 0.19 (); # 0.19 for ->newdir
use PublicInbox::Spawn qw(spawn which);
use PublicInbox::MsgIter qw(msg_part_text);
use File::Path qw(remove_tree);
use PublicInbox::ContentHash qw(content_digest);
require PublicInbox::LeiRediff;
use Data::Dumper ();

sub write_part { # Eml->each_part callback
	my ($ary, $self) = @_;
	my ($part, $depth, $idx) = @$ary;
	if ($idx ne '1' || $self->{lei}->{opt}->{'raw-header'}) {
		open my $fh, '>', "$self->{curdir}/$idx.hdr" or die "open: $!";
		print $fh ${$part->{hdr}} or die "print $!";
		close $fh or die "close $!";
	}
	my $ct = $part->content_type || 'text/plain';
	my ($s, $err) = msg_part_text($part, $ct);
	my $sfx = defined($s) ? 'txt' : 'bin';
	open my $fh, '>', "$self->{curdir}/$idx.$sfx" or die "open: $!";
	print $fh ($s // $part->body) or die "print $!";
	close $fh or die "close $!";
}

sub dump_eml ($$$) {
	my ($self, $dir, $eml) = @_;
	local $self->{curdir} = $dir;
	mkdir $dir or die "mkdir($dir): $!";
	$eml->each_part(\&write_part, $self);

	open my $fh, '>', "$dir/content_digest" or die "open: $!";
	my $dig = PublicInbox::ContentDigestDbg->new($fh);
	local $Data::Dumper::Useqq = 1;
	local $Data::Dumper::Terse = 1;
	content_digest($eml, $dig);
	print $fh "\n", $dig->hexdigest, "\n" or die "print $!";
	close $fh or die "close: $!";
}

sub prep_a ($$) {
	my ($self, $eml) = @_;
	$self->{tmp} = File::Temp->newdir('lei-mail-diff-XXXX', TMPDIR => 1);
	dump_eml($self, "$self->{tmp}/a", $eml);
}

sub diff_a ($$) {
	my ($self, $eml) = @_;
	++$self->{nr};
	my $dir = "$self->{tmp}/N$self->{nr}";
	dump_eml($self, $dir, $eml);
	my $cmd = [ qw(git diff --no-index) ];
	my $lei = $self->{lei};
	PublicInbox::LeiRediff::_lei_diff_prepare($lei, $cmd);
	push @$cmd, qw(-- a), "N$self->{nr}";
	my $rdr = { -C => "$self->{tmp}" };
	@$rdr{1, 2} = @$lei{1, 2};
	my $pid = spawn($cmd, $lei->{env}, $rdr);
	waitpid($pid, 0);
	$lei->child_error($?) if $?; # for git diff --exit-code
	File::Path::remove_tree($self->{curdir});
}

sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
	my ($self, $eml) = @_;
	$self->{tmp} ? diff_a($self, $eml) : prep_a($self, $eml);
}

sub lei_mail_diff {
	my ($lei, @argv) = @_;
	my $self = bless {}, __PACKAGE__;
	$self->prepare_inputs($lei, \@argv) or return;
	my $isatty = -t $lei->{1};
	$lei->{opt}->{color} //= $isatty;
	$lei->start_pager if $isatty;
	$lei->{-err_type} = 'non-fatal';
	$lei->wq1_start($self);
}

no warnings 'once';
*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;

package PublicInbox::ContentDigestDbg; # cf. PublicInbox::ContentDigest
use strict;
use v5.10.1;
use Data::Dumper;

sub new { bless { dig => Digest::SHA->new(256), fh => $_[1] }, __PACKAGE__ }

sub add {
	$_[0]->{dig}->add($_[1]);
	print { $_[0]->{fh} } Dumper([split(/^/sm, $_[1])]) or die "print $!";
}

sub hexdigest { $_[0]->{dig}->hexdigest; }

1;
public-inbox-1.9.0/lib/PublicInbox/LeiMailSync.pm000066400000000000000000000455721430031475700216300ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# for maintaining synchronization between lei/store <=> Maildir|MH|IMAP|JMAP
package PublicInbox::LeiMailSync;
use strict;
use v5.10.1;
use parent qw(PublicInbox::Lock);
use DBI qw(:sql_types); # SQL_BLOB
use PublicInbox::ContentHash qw(git_sha);
use Carp ();

sub dbh_new {
	my ($self) = @_;
	my $f = $self->{filename};
	my $creat = !-s $f;
	if ($creat) {
		require PublicInbox::Syscall;
		open my $fh, '+>>', $f or Carp::croak "open($f): $!";
		PublicInbox::Syscall::nodatacow_fh($fh);
	}
	my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', {
		AutoCommit => 1,
		RaiseError => 1,
		PrintError => 0,
		sqlite_use_immediate_transaction => 1,
	});
	# no sqlite_unicode, here, all strings are binary
	create_tables($self, $dbh);
	$dbh->do('PRAGMA journal_mode = WAL') if $creat;
	$dbh->do('PRAGMA case_sensitive_like = ON');
	$dbh;
}

sub new {
	my ($cls, $f) = @_;
	bless {
		filename => $f,
		fmap => {},
		lock_path => "$f.flock",
	}, $cls;
}

sub lms_write_prepare { ($_[0]->{dbh} //= dbh_new($_[0])); $_[0] }

sub lms_pause {
	my ($self) = @_;
	$self->{fmap} = {};
	my $dbh = delete $self->{dbh};
	eval { $dbh->do('PRAGMA optimize') } if $dbh;
}

sub create_tables {
	my ($self, $dbh) = @_;
	my $lk = $self->lock_for_scope;

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS folders (
	fid INTEGER PRIMARY KEY,
	loc VARBINARY NOT NULL, /* URL;UIDVALIDITY=$N or $TYPE:/pathname */
	UNIQUE (loc)
)

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS blob2num (
	oidbin VARBINARY NOT NULL,
	fid INTEGER NOT NULL, /* folder ID */
	uid INTEGER NOT NULL, /* NNTP article number, IMAP UID, MH number */
	/* not UNIQUE(fid, uid), since we may have broken servers */
	UNIQUE (oidbin, fid, uid)
)

	# speeds up LeiImport->ck_update_kw (for "lei import") by 5-6x:
	$dbh->do(<<'');
CREATE INDEX IF NOT EXISTS idx_fid_uid ON blob2num(fid,uid)

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS blob2name (
	oidbin VARBINARY NOT NULL,
	fid INTEGER NOT NULL, /* folder ID */
	name VARBINARY NOT NULL, /* Maildir basename, JMAP blobId */
	/* not UNIQUE(fid, name), since we may have broken software */
	UNIQUE (oidbin, fid, name)
)

	# speeds up LeiImport->pmdir_cb (for "lei import") by ~6x:
	$dbh->do(<<'');
CREATE INDEX IF NOT EXISTS idx_fid_name ON blob2name(fid,name)

}

# used to fixup pre-1.7.0 folders
sub update_fid ($$$) {
	my ($dbh, $fid, $loc) = @_;
	my $sth = $dbh->prepare(<<'');
UPDATE folders SET loc = ? WHERE fid = ?

	$sth->bind_param(1, $loc, SQL_BLOB);
	$sth->bind_param(2, $fid);
	$sth->execute;
}

sub get_fid ($$$) {
	my ($sth, $folder, $dbh) = @_;
	$sth->bind_param(1, $folder, SQL_BLOB);
	$sth->execute;
	my ($fid) = $sth->fetchrow_array;
	if (defined $fid) { # for downgrade+upgrade (1.8 -> 1.7 -> 1.8)
		my $del = $dbh->prepare_cached(<<'');
DELETE FROM folders WHERE loc = ? AND fid != ?

		$del->execute($folder, $fid);
	} else {
		$sth->bind_param(1, $folder, SQL_VARCHAR);
		$sth->execute; # fixup old stuff
		($fid) = $sth->fetchrow_array;
		update_fid($dbh, $fid, $folder) if defined($fid);
	}
	$fid;
}

sub fid_for {
	my ($self, $folder, $creat) = @_;
	my $dbh = $self->{dbh} //= dbh_new($self);
	my $sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT fid FROM folders WHERE loc = ? LIMIT 1

	my $fid = get_fid($sth, $folder, $dbh);
	return $fid if defined($fid);

	# caller had trailing slash (LeiToMail)
	if ($folder =~ s!\A((?:maildir|mh):.*?)/+\z!$1!i) {
		$fid = get_fid($sth, $folder, $dbh);
		if (defined $fid) {
			update_fid($dbh, $fid, $folder);
			return $fid;
		}
	# sometimes we stored trailing slash..
	} elsif ($folder =~ m!\A(?:maildir|mh):!i) {
		$fid = get_fid($sth, $folder, $dbh);
		if (defined $fid) {
			update_fid($dbh, $fid, $folder);
			return $fid;
		}
	} elsif ($creat && $folder =~ m!\Aimaps?://!i) {
		require PublicInbox::URIimap;
		my $uri = PublicInbox::URIimap->new($folder);
		$uri->uidvalidity //
			Carp::croak("BUG: $folder has no UIDVALIDITY");
		defined($uri->uid) and Carp::confess("BUG: $folder has UID");
	}

	return unless $creat;
	($fid) = $dbh->selectrow_array('SELECT MAX(fid) FROM folders');

	$fid += 1;
	# in case we're reusing, clobber existing stale refs:
	$dbh->do('DELETE FROM blob2name WHERE fid = ?', undef, $fid);
	$dbh->do('DELETE FROM blob2num WHERE fid = ?', undef, $fid);

	$sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)');
	$sth->bind_param(1, $fid);
	$sth->bind_param(2, $folder, SQL_BLOB);
	$sth->execute;

	$fid;
}

sub add_folders {
	my ($self, @folders) = @_;
	my $lk = $self->lock_for_scope;
	for my $f (@folders) { $self->{fmap}->{$f} //= fid_for($self, $f, 1) }
}

sub set_src {
	my ($self, $oidbin, $folder, $id) = @_;
	my $lk = $self->lock_for_scope;
	my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
	my $dbh = $self->{dbh};
	my ($sth, @param3, $del_old);
	if (ref($id)) { # scalar name
		@param3 = ($$id, SQL_BLOB);
		$sth = $dbh->prepare_cached(<<'');
INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)

		$del_old = $dbh->prepare_cached(<<'');
DELETE FROM blob2name WHERE oidbin = ? AND fid = ? AND name = ?

	} else { # numeric ID (IMAP UID, MH number)
		@param3 = ($id);
		$sth = $dbh->prepare_cached(<<'');
INSERT OR IGNORE INTO blob2num (oidbin, fid, uid) VALUES (?, ?, ?)

		$del_old = $dbh->prepare_cached(<<'');
DELETE FROM blob2num WHERE oidbin = ? AND fid = ? AND uid = ?

	}
	$sth->bind_param(1, $oidbin, SQL_BLOB);
	$sth->bind_param(2, $fid);
	$sth->bind_param(3, @param3);
	my $ret = $sth->execute;
	$del_old->execute($oidbin, $fid, $param3[0]);
	$ret;
}

sub clear_src {
	my ($self, $folder, $id) = @_;
	my $lk = $self->lock_for_scope;
	my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
	my ($sth, @param3);
	if (ref($id)) { # scalar name
		@param3 = ($$id, SQL_BLOB);
		$sth = $self->{dbh}->prepare_cached(<<'');
DELETE FROM blob2name WHERE fid = ? AND name = ?

	} else {
		@param3 = ($id);
		$sth = $self->{dbh}->prepare_cached(<<'');
DELETE FROM blob2num WHERE fid = ? AND uid = ?

	}
	$sth->bind_param(1, $fid);
	$sth->bind_param(2, @param3);
	my $ret = $sth->execute;

	# older versions may not have used SQL_BLOB:
	if (defined($ret) && $ret == 0 && scalar(@param3) == 2) {
		$sth->bind_param(1, $fid);
		$sth->bind_param(2, $param3[0]);
		$ret = $sth->execute;
	}
	$ret;
}

# Maildir-only
sub mv_src {
	my ($self, $folder, $oidbin, $id, $newbn) = @_;
	my $lk = $self->lock_for_scope;
	my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
	$self->{dbh}->begin_work;
	my $sth = $self->{dbh}->prepare_cached(<<'');
UPDATE blob2name SET name = ? WHERE fid = ? AND oidbin = ? AND name = ?

	# eval since unique constraint may fail due to race
	$sth->bind_param(1, $newbn, SQL_BLOB);
	$sth->bind_param(2, $fid);
	$sth->bind_param(3, $oidbin, SQL_BLOB);
	$sth->bind_param(4, $$id, SQL_BLOB);
	my $nr = eval { $sth->execute };
	if (!defined($nr) || $nr == 0) { # $nr may be `0E0'
		# delete from old, pre-SQL_BLOB rows:
		my $del_old = $self->{dbh}->prepare_cached(<<'');
DELETE FROM blob2name WHERE fid = ? AND oidbin = ? AND name = ?

		$del_old->execute($fid, $oidbin, $$id); # missing-OK
		$del_old->execute($fid, $oidbin, $newbn); # ditto

		# may race with a clear_src, ensure new value exists
		$sth = $self->{dbh}->prepare_cached(<<'');
INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)

		$sth->bind_param(1, $oidbin, SQL_BLOB);
		$sth->bind_param(2, $fid);
		$sth->bind_param(3, $newbn, SQL_BLOB);
		$sth->execute;
	}
	$self->{dbh}->commit;
}

# read-only, iterates every oidbin + UID or name for a given folder
sub each_src {
	my ($self, $folder, $cb, @args) = @_;
	my $dbh = $self->{dbh} //= dbh_new($self);
	my ($fid, @rng);
	my $and_ge_le = '';
	if (ref($folder) eq 'HASH') {
		$fid = $folder->{fid} // die "BUG: no `fid'";
		@rng = grep(defined, @$folder{qw(min max)});
		$and_ge_le = 'AND uid >= ? AND uid <= ?' if @rng;
	} else {
		$fid = $self->{fmap}->{$folder} //=
			fid_for($self, $folder) // return;
	}

	# minimize implicit txn time to avoid blocking writers by
	# batching SELECTs.  This looks wonky but is necessary since
	# $cb-> may access the DB on its own.
	my $ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng);
SELECT _rowid_,oidbin,uid FROM blob2num WHERE fid = ? $and_ge_le
ORDER BY _rowid_ ASC LIMIT 1000

	my $min = @$ary ? $ary->[-1]->[0] : undef;
	while (defined $min) {
		for my $row (@$ary) { $cb->($row->[1], $row->[2], @args) }

		$ary = $dbh->selectall_arrayref(<<"", undef, $fid, @rng, $min);
SELECT _rowid_,oidbin,uid FROM blob2num
WHERE fid = ? $and_ge_le AND _rowid_ > ?
ORDER BY _rowid_ ASC LIMIT 1000

		$min = @$ary ? $ary->[-1]->[0] : undef;
	}

	$ary = $dbh->selectall_arrayref(<<'', undef, $fid);
SELECT _rowid_,oidbin,name FROM blob2name WHERE fid = ?
ORDER BY _rowid_ ASC LIMIT 1000

	$min = @$ary ? $ary->[-1]->[0] : undef;
	while (defined $min) {
		for my $row (@$ary) { $cb->($row->[1], \($row->[2]), @args) }

		$ary = $dbh->selectall_arrayref(<<'', undef, $fid, $min);
SELECT _rowid_,oidbin,name FROM blob2name WHERE fid = ? AND _rowid_ > ?
ORDER BY _rowid_ ASC LIMIT 1000

		$min = @$ary ? $ary->[-1]->[0] : undef;
	}
}

sub location_stats {
	my ($self, $folder) = @_;
	my $dbh = $self->{dbh} //= dbh_new($self);
	my $fid;
	my $ret = {};
	$fid = $self->{fmap}->{$folder} //= fid_for($self, $folder) // return;
	my ($row) = $dbh->selectrow_array(<<"", undef, $fid);
SELECT COUNT(name) FROM blob2name WHERE fid = ?

	$ret->{'name.count'} = $row if $row;
	my $ntype = ($folder =~ m!\A(?:nntps?|s?news)://!i) ? 'article' :
		(($folder =~ m!\Aimaps?://!i) ? 'uid' : "TODO<$folder>");
	for my $op (qw(count min max)) {
		($row) = $dbh->selectrow_array(<<"", undef, $fid);
SELECT $op(uid) FROM blob2num WHERE fid = ?

		$row or last;
		$ret->{"$ntype.$op"} = $row;
	}
	$ret;
}

# returns a { location => [ list-of-ids-or-names ] } mapping
sub locations_for {
	my ($self, $oidbin) = @_;
	my ($fid, $sth, $id, %fid2id, %seen);
	my $dbh = $self->{dbh} //= dbh_new($self);
	$sth = $dbh->prepare('SELECT fid,uid FROM blob2num WHERE oidbin = ?');
	$sth->bind_param(1, $oidbin, SQL_BLOB);
	$sth->execute;
	while (my ($fid, $uid) = $sth->fetchrow_array) {
		push @{$fid2id{$fid}}, $uid;
		$seen{"$uid.$fid"} = 1;
	}

	# deal with 1.7.0 DBs :<
	$sth->bind_param(1, $oidbin, SQL_VARCHAR);
	$sth->execute;
	while (my ($fid, $uid) = $sth->fetchrow_array) {
		next if $seen{"$uid.$fid"};
		push @{$fid2id{$fid}}, $uid;
	}

	%seen = ();
	$sth = $dbh->prepare('SELECT fid,name FROM blob2name WHERE oidbin = ?');
	$sth->bind_param(1, $oidbin, SQL_BLOB);
	$sth->execute;
	while (my ($fid, $name) = $sth->fetchrow_array) {
		push @{$fid2id{$fid}}, $name;
		$seen{"$fid.$name"} = 1;
	}

	# deal with 1.7.0 DBs :<
	$sth->bind_param(1, $oidbin, SQL_VARCHAR);
	$sth->execute;
	while (my ($fid, $name) = $sth->fetchrow_array) {
		next if $seen{"$fid.$name"};
		push @{$fid2id{$fid}}, $name;
	}

	$sth = $dbh->prepare('SELECT loc FROM folders WHERE fid = ? LIMIT 1');
	my $ret = {};
	while (my ($fid, $ids) = each %fid2id) {
		$sth->execute($fid);
		my ($loc) = $sth->fetchrow_array;
		unless (defined $loc) {
			my $oidhex = unpack('H*', $oidbin);
			warn "E: fid=$fid for $oidhex unknown:\n", map {
					'E: '.(ref() ? $$_ : "#$_")."\n";
				} @$ids;
			next;
		}
		$ret->{$loc} = $ids;
	}
	scalar(keys %$ret) ? $ret : undef;
}

# returns a list of folders used for completion
sub folders {
	my ($self, @pfx) = @_;
	my $sql = 'SELECT loc FROM folders';
	my $re;
	if (defined($pfx[0])) {
		$sql .= ' WHERE loc REGEXP ?'; # DBD::SQLite uses perlre
		$re = !!$pfx[1] ? '.*' : '';
		$re .= quotemeta($pfx[0]);
		$re .= '.*';
	}
	my $sth = ($self->{dbh} //= dbh_new($self))->prepare($sql);
	$sth->bind_param(1, $re) if defined($re);
	$sth->execute;
	map { $_->[0] } @{$sth->fetchall_arrayref};
}

sub local_blob {
	my ($self, $oidhex, $vrfy) = @_;
	my $dbh = $self->{dbh} //= dbh_new($self);
	my $b2n = $dbh->prepare(<<'');
SELECT f.loc,b.name FROM blob2name b
LEFT JOIN folders f ON b.fid = f.fid
WHERE b.oidbin = ?

	$b2n->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
	$b2n->execute;
	while (my ($d, $n) = $b2n->fetchrow_array) {
		substr($d, 0, length('maildir:')) = '';
		# n.b. both mbsync and offlineimap use ":2," as a suffix
		# in "new/", despite (from what I understand of reading
		# ), the ":2," only
		# applies to files in "cur/".
		my @try = $n =~ /:2,[a-zA-Z]+\z/ ? qw(cur new) : qw(new cur);
		for my $x (@try) {
			my $f = "$d/$x/$n";
			open my $fh, '<', $f or next;
			# some (buggy) Maildir writers are non-atomic:
			next unless -s $fh;
			local $/;
			my $raw = <$fh>;
			if ($vrfy) {
				my $got = git_sha(1, \$raw)->hexdigest;
				if ($got ne $oidhex) {
					warn "$f changed $oidhex => $got\n";
					next;
				}
			}
			return \$raw;
		}
	}
	undef;
}

sub match_imap_url {
	my ($self, $url, $all) = @_; # $all = [ $lms->folders ];
	$all //= [ $self->folders ];
	require PublicInbox::URIimap;
	my $want = PublicInbox::URIimap->new($url)->canonical;
	my ($s, $h, $mb) = ($want->scheme, $want->host, $want->mailbox);
	my @uri = map { PublicInbox::URIimap->new($_)->canonical }
		grep(m!\A\Q$s\E://.*?\Q$h\E\b.*?/\Q$mb\E\b!, @$all);
	my @match;
	for my $x (@uri) {
		next if $x->mailbox ne $want->mailbox;
		next if $x->host ne $want->host;
		next if $x->port != $want->port;
		my $x_uidval = $x->uidvalidity;
		next if ($want->uidvalidity // $x_uidval) != $x_uidval;

		# allow nothing in want to possibly match ";AUTH=ANONYMOUS"
		if (defined($x->auth) && !defined($want->auth) &&
				!defined($want->user)) {
			push @match, $x;
		# or maybe user was forgotten on CLI:
		} elsif (defined($x->user) && !defined($want->user)) {
			push @match, $x;
		} elsif (($x->user//"\0") eq ($want->user//"\0")) {
			push @match, $x;
		}
	}
	return @match if wantarray;
	scalar(@match) <= 1 ? $match[0] :
			"E: `$url' is ambiguous:\n\t".join("\n\t", @match)."\n";
}

sub match_nntp_url ($$$) {
	my ($self, $url, $all) = @_; # $all = [ $lms->folders ];
	$all //= [ $self->folders ];
	require PublicInbox::URInntps;
	my $want = PublicInbox::URInntps->new($url)->canonical;
	my ($s, $h, $p) = ($want->scheme, $want->host, $want->port);
	my $ng = $want->group; # force scalar (no article ranges)
	my @uri = map { PublicInbox::URInntps->new($_)->canonical }
		grep(m!\A\Q$s\E://.*?\Q$h\E\b.*?/\Q$ng\E\b!, @$all);
	my @match;
	for my $x (@uri) {
		next if $x->group ne $ng || $x->host ne $h || $x->port != $p;
		# maybe user was forgotten on CLI:
		if (defined($x->userinfo) && !defined($want->userinfo)) {
			push @match, $x;
		} elsif (($x->userinfo//"\0") eq ($want->userinfo//"\0")) {
			push @match, $x;
		}
	}
	return @match if wantarray;
	scalar(@match) <= 1 ? $match[0] :
			"E: `$url' is ambiguous:\n\t".join("\n\t", @match)."\n";
}

# returns undef on failure, number on success
sub group2folders {
	my ($self, $lei, $all, $folders) = @_;
	return $lei->fail(< $_ } split(/,/, $all);
	my @ok = grep(defined, delete(@x{qw(local remote), ''}));
	push(@ok, '') if $all eq '';
	my @no = keys %x;
	if (@no) {
		@no = (join(',', @no));
		return $lei->fail(<folders;
	for my $ok (@ok) {
		if ($ok eq 'local') {
			@inc = grep(!m!\A[a-z0-9\+]+://!i, @all);
		} elsif ($ok eq 'remote') {
			@inc = grep(m!\A[a-z0-9\+]+://!i, @all);
		} elsif ($ok ne '') {
			return $lei->fail("--all=$all not understood");
		} else {
			@inc = @all;
		}
		push(@$folders, (grep { !$seen{$_}++ } @inc));
	}
	scalar(@$folders) || $lei->fail(<folders;
	my %all = map { $_ => 1 } @all;
	my @no;
	for (@$folders) {
		next if $all{$_}; # ok
		if (m!\A(maildir|mh):(.+)!i) {
			my $type = lc $1;
			my $d = "$type:".$lei->abs_path($2);
			push(@no, $_) unless $all{$d};
			$_ = $d;
		} elsif (-d "$_/new" && -d "$_/cur") {
			my $d = 'maildir:'.$lei->abs_path($_);
			push(@no, $_) unless $all{$d};
			$_ = $d;
		} elsif (m!\Aimaps?://!i) {
			my $orig = $_;
			my $res = match_imap_url($self, $orig, \@all);
			if (ref $res) {
				$_ = $$res;
				$lei->qerr(<qerr(<lock_for_scope;
	for my $folder (@folders) {
		my $fid = delete($self->{fmap}->{$folder}) //
			fid_for($self, $folder) // next;
		for my $t (qw(blob2name blob2num folders)) {
			$self->{dbh}->do("DELETE FROM $t WHERE fid = ?",
					undef, $fid);
		}
	}
}

# only used for changing canonicalization errors
sub rename_folder {
	my ($self, $old, $new) = @_;
	my $lk = $self->lock_for_scope;
	my $ofid = delete($self->{fmap}->{$old}) //
		fid_for($self, $old) // return;
	eval {
		$self->{dbh}->do(<{fmap}->{$new} // fid_for($self, $new);
		for my $t (qw(blob2name blob2num)) {
			$self->{dbh}->do(<{dbh}->do(<{fmap}->{$url} //= fid_for($self, $url) // return ();
	my $sth = $self->{dbh}->prepare_cached(<execute($fid, $uid);
	my %uniq; # for public-inbox <= 1.7.0
	grep { !$uniq{$_}++ } map { $_->[0] } @{$sth->fetchall_arrayref};
}

sub name_oidbin ($$$) {
	my ($self, $mdir, $nm) = @_;
	my $fid = $self->{fmap}->{$mdir} //= fid_for($self, $mdir) // return;
	my $sth = $self->{dbh}->prepare_cached(<bind_param(1, $fid);
	$sth->bind_param(2, $nm, SQL_BLOB);
	$sth->execute;
	my @bin = map { $_->[0] } @{$sth->fetchall_arrayref};
	$sth->bind_param(1, $fid);
	$sth->bind_param(2, $nm, SQL_VARCHAR);
	$sth->execute;
	my @old = map { $_->[0] } @{$sth->fetchall_arrayref};
	my %uniq; # for public-inbox <= 1.7.0
	grep { !$uniq{$_}++ } (@bin, @old);
}

sub imap_oidhex {
	my ($self, $lei, $uid_uri) = @_;
	my $mailbox_uri = $uid_uri->clone;
	$mailbox_uri->uid(undef);
	my $folders = [ $$mailbox_uri ];
	eval { $self->arg2folder($lei, $folders) };
	$lei->qerr("# no sync information for $mailbox_uri") if $@;
	map { unpack('H*',$_) } num_oidbin($self, $folders->[0], $uid_uri->uid)
}

1;
public-inbox-1.9.0/lib/PublicInbox/LeiMirror.pm000066400000000000000000000375701430031475700213620ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# "lei add-external --mirror" support (also "public-inbox-clone");
package PublicInbox::LeiMirror;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC);
use PublicInbox::Config;
use PublicInbox::AutoReap;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use IO::Compress::Gzip qw(gzip $GzipError);
use PublicInbox::Spawn qw(popen_rd spawn);
use File::Temp ();
use Fcntl qw(SEEK_SET O_CREAT O_EXCL O_WRONLY);
use Carp qw(croak);

sub _wq_done_wait { # dwaitpid callback (via wq_eof)
	my ($arg, $pid) = @_;
	my ($mrr, $lei) = @$arg;
	my $f = "$mrr->{dst}/mirror.done";
	if ($?) {
		$lei->child_error($?);
	} elsif (!unlink($f)) {
		warn("unlink($f): $!\n") unless $!{ENOENT};
	} else {
		if ($lei->{cmd} ne 'public-inbox-clone') {
			$lei->lazy_cb('add-external', '_finish_'
					)->($lei, $mrr->{dst});
		}
		$lei->qerr("# mirrored $mrr->{src} => $mrr->{dst}");
	}
	$lei->dclose;
}

# for old installations without manifest.js.gz
sub try_scrape {
	my ($self) = @_;
	my $uri = URI->new($self->{src});
	my $lei = $self->{lei};
	my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
	my $cmd = $curl->for_uri($lei, $uri, '--compressed');
	my $opt = { 0 => $lei->{0}, 2 => $lei->{2} };
	my $fh = popen_rd($cmd, undef, $opt);
	my $html = do { local $/; <$fh> } // die "read(curl $uri): $!";
	close($fh) or return $lei->child_error($?, "@$cmd failed");

	# we grep with URL below, we don't want Subject/From headers
	# making us clone random URLs
	my @html = split(/
/, $html); my @urls = ($html[-1] =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g); my $url = $uri->as_string; chop($url) eq '/' or die "BUG: $uri not canonicalized"; # since this is for old instances w/o manifest.js.gz, try v1 first return clone_v1($self) if grep(m!\A\Q$url\E/*\z!, @urls); if (my @v2_urls = grep(m!\A\Q$url\E/[0-9]+\z!, @urls)) { my %v2_epochs = map { my ($n) = (m!/([0-9]+)\z!); $n => URI->new($_) } @v2_urls; # uniq return clone_v2($self, \%v2_epochs); } # filter out common URLs served by WWW (e.g /$MSGID/T/) if (@urls && $url =~ s!/+[^/]+\@[^/]+/.*\z!! && grep(m!\A\Q$url\E/*\z!, @urls)) { die <<""; E: confused by scraping <$uri>, did you mean <$url>? } @urls and die <<""; E: confused by scraping <$uri>, got ambiguous results: @urls die "E: scraping <$uri> revealed nothing\n"; } sub clone_cmd { my ($lei, $opt) = @_; my @cmd = qw(git); $opt->{$_} = $lei->{$_} for (0..2); # we support "-c $key=$val" for arbitrary git config options # e.g.: git -c http.proxy=socks5h://127.0.0.1:9050 push(@cmd, '-c', $_) for @{$lei->{opt}->{c} // []}; push @cmd, qw(clone --mirror); push @cmd, '-q' if $lei->{opt}->{quiet}; push @cmd, '-v' if $lei->{opt}->{verbose}; # XXX any other options to support? # --reference is tricky with multiple epochs... @cmd; } sub ft_rename ($$$) { my ($ft, $dst, $open_mode) = @_; my $fn = $ft->filename; my @st = stat($dst); my $mode = @st ? ($st[2] & 07777) : ($open_mode & ~umask); chmod($mode, $ft) or croak "E: chmod $fn: $!"; rename($fn, $dst) or croak "E: rename($fn => $ft): $!"; $ft->unlink_on_destroy(0); } sub _get_txt { # non-fatal my ($self, $endpoint, $file, $mode) = @_; my $uri = URI->new($self->{src}); my $lei = $self->{lei}; my $path = $uri->path; chop($path) eq '/' or die "BUG: $uri not canonicalized"; $uri->path("$path/$endpoint"); my $ft = File::Temp->new(TEMPLATE => "$file-XXXX", DIR => $self->{dst}); my $opt = { 0 => $lei->{0}, 1 => $lei->{1}, 2 => $lei->{2} }; my $cmd = $self->{curl}->for_uri($lei, $uri, qw(--compressed -R -o), $ft->filename); my $cerr = run_reap($lei, $cmd, $opt); return "$uri missing" if ($cerr >> 8) == 22; return "# @$cmd failed (non-fatal)" if $cerr; ft_rename($ft, "$self->{dst}/$file", $mode); undef; # success } # tries the relatively new /$INBOX/_/text/config/raw endpoint sub _try_config { my ($self) = @_; my $dst = $self->{dst}; if (!-d $dst || !mkdir($dst)) { require File::Path; File::Path::mkpath($dst); -d $dst or die "mkpath($dst): $!\n"; } my $err = _get_txt($self, qw(_/text/config/raw inbox.config.example), 0444); return warn($err, "\n") if $err; my $f = "$self->{dst}/inbox.config.example"; my $cfg = PublicInbox::Config->git_config_dump($f, $self->{lei}->{2}); my $ibx = $self->{ibx} = {}; for my $sec (grep(/\Apublicinbox\./, @{$cfg->{-section_order}})) { for (qw(address newsgroup nntpmirror)) { $ibx->{$_} = $cfg->{"$sec.$_"}; } } } sub set_description ($) { my ($self) = @_; my $f = "$self->{dst}/description"; open my $fh, '+>>', $f or die "open($f): $!"; seek($fh, 0, SEEK_SET) or die "seek($f): $!"; chomp(my $d = do { local $/; <$fh> } // die "read($f): $!"); if ($d eq '($INBOX_DIR/description missing)' || $d =~ /^Unnamed repository/ || $d !~ /\S/) { seek($fh, 0, SEEK_SET) or die "seek($f): $!"; truncate($fh, 0) or die "truncate($f): $!"; print $fh "mirror of $self->{src}\n" or die "print($f): $!"; close $fh or die "close($f): $!"; } } sub index_cloned_inbox { my ($self, $iv) = @_; my $lei = $self->{lei}; my $err = _get_txt($self, qw(description description), 0666); warn($err, "\n") if $err; # non fatal eval { set_description($self) }; warn $@ if $@; # n.b. public-inbox-clone works w/o (SQLite || Xapian) # lei is useless without Xapian + SQLite if ($lei->{cmd} ne 'public-inbox-clone') { my $ibx = delete($self->{ibx}) // { address => [ 'lei@example.com' ], version => $iv, }; $ibx->{inboxdir} = $self->{dst}; PublicInbox::Inbox->new($ibx); PublicInbox::InboxWritable->new($ibx); my $opt = {}; for my $sw ($lei->index_opt) { my ($k) = ($sw =~ /\A([\w-]+)/); $opt->{$k} = $lei->{opt}->{$k}; } # force synchronous dwaitpid for v2: local $PublicInbox::DS::in_loop = 0; my $cfg = PublicInbox::Config->new(undef, $lei->{2}); my $env = PublicInbox::Admin::index_prepare($opt, $cfg); local %ENV = (%ENV, %$env) if $env; PublicInbox::Admin::progress_prepare($opt, $lei->{2}); PublicInbox::Admin::index_inbox($ibx, undef, $opt); } open my $x, '>', "$self->{dst}/mirror.done"; # for _wq_done_wait } sub run_reap { my ($lei, $cmd, $opt) = @_; $lei->qerr("# @$cmd"); my $ar = PublicInbox::AutoReap->new(spawn($cmd, undef, $opt)); $ar->join; my $ret = $?; $? = 0; # don't let it influence normal exit $ret; } sub clone_v1 { my ($self) = @_; my $lei = $self->{lei}; my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; my $uri = URI->new($self->{src}); defined($lei->{opt}->{epoch}) and die "$uri is a v1 inbox, --epoch is not supported\n"; my $pfx = $curl->torsocks($lei, $uri) or return; my $cmd = [ @$pfx, clone_cmd($lei, my $opt = {}), $uri->as_string, $self->{dst} ]; my $cerr = run_reap($lei, $cmd, $opt); return $lei->child_error($cerr, "@$cmd failed") if $cerr; _try_config($self); write_makefile($self->{dst}, 1); index_cloned_inbox($self, 1); } sub parse_epochs ($$) { my ($opt_epochs, $v2_epochs) = @_; # $epcohs "LOW..HIGH" $opt_epochs // return; # undef => all epochs my ($lo, $dotdot, $hi, @extra) = split(/(\.\.)/, $opt_epochs); undef($lo) if ($lo // '') eq ''; my $re = qr/\A~?[0-9]+\z/; if (@extra || (($lo // '0') !~ $re) || (($hi // '0') !~ $re) || !(grep(defined, $lo, $hi))) { die < $b } keys %$v2_epochs; for (grep(defined, $lo, $hi)) { if (/\A[0-9]+\z/) { $_ > $n[-1] and die "`$_' exceeds maximum available epoch ($n[-1])\n"; $_ < $n[0] and die "`$_' is lower than minimum available epoch ($n[0])\n"; } elsif (/\A~([0-9]+)/) { my $off = -$1 - 1; $n[$off] // die "`$_' is out of range\n"; $_ = $n[$off]; } else { die "`$_' not understood\n" } } defined($lo) && defined($hi) && $lo > $hi and die "low value (`$lo') exceeds high (`$hi')\n"; $lo //= $n[0] if $dotdot; $hi //= $n[-1] if $dotdot; $hi //= $lo; my $want = {}; for ($lo..$hi) { if (defined $v2_epochs->{$_}) { $want->{$_} = 1; } else { warn "# epoch $_ is not available (non-fatal, $lo..$hi)\n"; } } $want } sub init_placeholder ($$) { my ($src, $edst) = @_; PublicInbox::Import::init_bare($edst); my $f = "$edst/config"; open my $fh, '>>', $f or die "open($f): $!"; print $fh < manifest.js.gz hashref my $lei = $self->{lei}; my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; my $pfx = $curl->torsocks($lei, (values %$v2_epochs)[0]) or return; my $dst = $self->{dst}; my $want = parse_epochs($lei->{opt}->{epoch}, $v2_epochs); my (@src_edst, @read_only, @skip_nr); for my $nr (sort { $a <=> $b } keys %$v2_epochs) { my $uri = $v2_epochs->{$nr}; my $src = $uri->as_string; my $edst = $dst; $src =~ m!/([0-9]+)(?:\.git)?\z! or die <<""; failed to extract epoch number from $src $1 + 0 == $nr or die "BUG: <$uri> miskeyed $1 != $nr"; $edst .= "/git/$nr.git"; if (!$want || $want->{$nr}) { push @src_edst, $src, $edst; } else { # create a placeholder so users only need to chmod +w init_placeholder($src, $edst); push @read_only, $edst; push @skip_nr, $nr; } } if (@skip_nr) { # filter out the epochs we skipped my $re = join('|', @skip_nr); my @del = grep(m!/git/$re\.git\z!, keys %$m); delete @$m{@del}; $self->{-culled_manifest} = 1; } my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock'; _try_config($self); my $on_destroy = $lk->lock_for_scope($$); my @cmd = clone_cmd($lei, my $opt = {}); while (my ($src, $edst) = splice(@src_edst, 0, 2)) { my $cmd = [ @$pfx, @cmd, $src, $edst ]; my $cerr = run_reap($lei, $cmd, $opt); return $lei->child_error($cerr, "@$cmd failed") if $cerr; } require PublicInbox::MultiGit; my $mg = PublicInbox::MultiGit->new($dst, 'all.git', 'git'); $mg->fill_alternates; for my $i ($mg->git_epochs) { $mg->epoch_cfg_set($i) } for my $edst (@read_only) { my @st = stat($edst) or die "stat($edst): $!"; chmod($st[2] & 0555, $edst) or die "chmod(a-w, $edst): $!"; } write_makefile($self->{dst}, 2); undef $on_destroy; # unlock index_cloned_inbox($self, 2); } # PSGI mount prefixes and manifest.js.gz prefixes don't always align... sub deduce_epochs ($$) { my ($m, $path) = @_; my ($v1_ent, @v2_epochs); my $path_pfx = ''; $path =~ s!/+\z!!; do { $v1_ent = $m->{$path}; @v2_epochs = grep(m!\A\Q$path\E/git/[0-9]+\.git\z!, keys %$m); } while (!defined($v1_ent) && !@v2_epochs && $path =~ s!\A(/[^/]+)/!/! and $path_pfx .= $1); ($path_pfx, $v1_ent ? $path : undef, @v2_epochs); } sub decode_manifest ($$$) { my ($fh, $fn, $uri) = @_; my $js; my $gz = do { local $/; <$fh> } // die "slurp($fn): $!"; gunzip(\$gz => \$js, MultiStream => 1) or die "gunzip($uri): $GunzipError\n"; my $m = eval { PublicInbox::Config->json->decode($js) }; die "$uri: error decoding `$js': $@\n" if $@; ref($m) eq 'HASH' or die "$uri unknown type: ".ref($m); $m; } sub try_manifest { my ($self) = @_; my $uri = URI->new($self->{src}); my $lei = $self->{lei}; my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; my $path = $uri->path; chop($path) eq '/' or die "BUG: $uri not canonicalized"; $uri->path($path . '/manifest.js.gz'); my $pdir = $lei->rel2abs($self->{dst}); $pdir =~ s!/[^/]+/?\z!!; my $ft = File::Temp->new(TEMPLATE => 'm-XXXX', UNLINK => 1, DIR => $pdir, SUFFIX => '.tmp'); my $fn = $ft->filename; my ($bn) = ($fn =~ m!/([^/]+)\z!); my $cmd = $curl->for_uri($lei, $uri, '-R', '-o', $bn); my $opt = { -C => $pdir }; $opt->{$_} = $lei->{$_} for (0..2); my $cerr = run_reap($lei, $cmd, $opt); if ($cerr) { return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing return $lei->child_error($cerr, "@$cmd failed"); } my $m = eval { decode_manifest($ft, $fn, $uri) }; if ($@) { warn $@; return try_scrape($self); } my ($path_pfx, $v1_path, @v2_epochs) = deduce_epochs($m, $path); if (@v2_epochs) { # It may be possible to have v1 + v2 in parallel someday: warn(<path($path_pfx.$_); my ($n) = ("$uri" =~ m!/([0-9]+)\.git\z!); $n => $uri->clone } @v2_epochs; clone_v2($self, \%v2_epochs, $m); } elsif (defined $v1_path) { clone_v1($self); } else { die "E: confused by <$uri>, possible matches:\n\t", join(', ', sort keys %$m), "\n"; } if (delete $self->{-culled_manifest}) { # set by clone_v2 # write the smaller manifest if epochs were skipped so # users won't have to delete manifest if they +w an # epoch they no longer want to skip my $json = PublicInbox::Config->json->encode($m); gzip(\$json => $fn) or die "gzip: $GzipError"; } ft_rename($ft, "$self->{dst}/manifest.js.gz", 0666); } sub start_clone_url { my ($self) = @_; return try_manifest($self) if $self->{src} =~ m!\Ahttps?://!; die "TODO: non-HTTP/HTTPS clone of $self->{src} not supported, yet"; } sub do_mirror { # via wq_io_do my ($self) = @_; my $lei = $self->{lei}; umask($lei->{client_umask}) if defined $lei->{client_umask}; eval { my $iv = $lei->{opt}->{'inbox-version'}; if (defined $iv) { return clone_v1($self) if $iv == 1; return try_scrape($self) if $iv == 2; die "bad --inbox-version=$iv\n"; } return start_clone_url($self) if $self->{src} =~ m!://!; die "TODO: cloning local directories not supported, yet"; }; $lei->fail($@) if $@; } sub start { my ($cls, $lei, $src, $dst) = @_; my $self = bless { src => $src, dst => $dst }, $cls; if ($src =~ m!https?://!) { require URI; require PublicInbox::LeiCurl; } require PublicInbox::Lock; require PublicInbox::Inbox; require PublicInbox::Admin; require PublicInbox::InboxWritable; $lei->request_umask; my ($op_c, $ops) = $lei->workers_start($self, 1); $lei->{wq1} = $self; $self->wq_io_do('do_mirror', []); $self->wq_close; $lei->wait_wq_events($op_c, $ops); } sub ipc_atfork_child { my ($self) = @_; $self->{lei}->_lei_atfork_child; $self->SUPER::ipc_atfork_child; } sub write_makefile { my ($dir, $ibx_ver) = @_; my $f = "$dir/Makefile"; if (sysopen my $fh, $f, O_CREAT|O_EXCL|O_WRONLY) { print $fh < # License: AGPL-3.0+ # internal command for dealing with inotify, kqueue vnodes, etc # it is a semi-persistent worker package PublicInbox::LeiNoteEvent; use strict; use v5.10.1; use parent qw(PublicInbox::IPC); use PublicInbox::DS; use Errno qw(ENOENT); our $to_flush; # { cfgpath => $lei } sub flush_lei ($;$) { my ($lei, $manual) = @_; my $lne = delete $lei->{cfg}->{-lei_note_event} // return; $lne->{lei_sock} = $lei->{sock} if $manual; $lne->wq_close; # runs _lei_wq_eof; } # we batch up writes and flush every 5s (matching Linux default # writeback behavior) since MUAs can trigger a storm of inotify events sub flush_task { # PublicInbox::DS timer callback my $todo = $to_flush // return; $to_flush = undef; for my $lei (values %$todo) { flush_lei($lei) } } sub eml_event ($$$$) { my ($self, $eml, $vmd, $state) = @_; my $sto = $self->{lei}->{sto}; if ($state =~ /\Aimport-(?:rw|ro)\z/) { $sto->wq_do('set_eml', $eml, $vmd); } elsif ($state =~ /\Aindex-(?:rw|ro)\z/) { my $xoids = $self->{lei}->ale->xoids_for($eml); $sto->wq_do('index_eml_only', $eml, $vmd, $xoids); } elsif ($state =~ /\Atag-(?:rw|ro)\z/) { my $docids = []; my $c = eval { $self->{lse}->kw_changed($eml, $vmd->{kw}, $docids); } // 1; # too new, assume changed since still to-be-committed. if (scalar @$docids) { # already in lei/store $sto->wq_do('set_eml_vmd', undef, $vmd, $docids) if $c; } elsif (my $xoids = $self->{lei}->ale->xoids_for($eml)) { # it's in an external, only set kw, here $sto->wq_do('set_xvmd', $xoids, $eml, $vmd); } # else { totally unknown: ignore } else { warn "unknown state: $state (in $self->{lei}->{cfg}->{'-f'})\n"; } } sub maildir_event { # via wq_nonblock_do my ($self, $fn, $vmd, $state) = @_; if (my $eml = PublicInbox::InboxWritable::eml_from_path($fn)) { eml_event($self, $eml, $vmd, $state); } elsif ($! == ENOENT) { $self->{lms}->clear_src(@{$vmd->{sync_info}}); } # else: eml_from_path already warns } sub lei_note_event { my ($lei, $folder, $new_cur, $bn, $fn, @rest) = @_; die "BUG: unexpected: @rest" if @rest; my $cfg = $lei->_lei_cfg or return; # gone (race) my $sto = $lei->_lei_store or return; # gone return flush_lei($lei, 1) if $folder eq 'done'; # special case my $lms = $lei->lms or return; $lms->lms_write_prepare if $new_cur eq ''; # for ->clear_src below $lei->{opt}->{quiet} = 1; $lms->arg2folder($lei, [ $folder ]); my $state = $cfg->get_1("watch.$folder.state") // 'tag-rw'; return if $state eq 'pause'; return $lms->clear_src($folder, \$bn) if $new_cur eq ''; $lms->lms_pause; $lei->ale; # prepare $sto->write_prepare($lei); require PublicInbox::MdirReader; my $self = $cfg->{-lei_note_event} //= do { my $wq = bless { lms => $lms }, __PACKAGE__; # MUAs such as mutt can trigger massive rename() storms so # use some CPU, but don't overwhelm slower storage, either my $jobs = $wq->detect_nproc // 1; $jobs = 4 if $jobs > 4; # same default as V2Writable my ($op_c, $ops) = $lei->workers_start($wq, $jobs); $lei->wait_wq_events($op_c, $ops); PublicInbox::DS::add_uniq_timer('flush_timer', 5, \&flush_task); $to_flush->{$lei->{cfg}->{'-f'}} //= $lei; $wq->prepare_nonblock; $lei->{lne} = $wq; }; if ($folder =~ /\Amaildir:/i) { my $fl = PublicInbox::MdirReader::maildir_basename_flags($bn) // return; return if index($fl, 'T') >= 0; my $kw = PublicInbox::MdirReader::flags2kw($fl); my $vmd = { kw => $kw, sync_info => [ $folder, \$bn ] }; $self->wq_nonblock_do('maildir_event', $fn, $vmd, $state); } # else: TODO: imap } sub ipc_atfork_child { my ($self) = @_; $self->{lei}->_lei_atfork_child(1); # persistent, for a while $self->{lms}->lms_write_prepare; $self->{lse} = $self->{lei}->{sto}->search; $self->SUPER::ipc_atfork_child; } sub _lei_wq_eof { # EOF callback for main lei daemon $_[0]->wq_eof('lne'); } 1; public-inbox-1.9.0/lib/PublicInbox/LeiOverview.pm000066400000000000000000000157301430031475700217100ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # per-mitem/smsg iterators for search results # "ovv" => "Overview viewer" package PublicInbox::LeiOverview; use strict; use v5.10.1; use parent qw(PublicInbox::Lock); use POSIX qw(strftime); use Fcntl qw(F_GETFL O_APPEND); use File::Spec; use File::Temp (); use PublicInbox::MID qw($MID_EXTRACT); use PublicInbox::Address qw(pairs); use PublicInbox::Config; use PublicInbox::Search qw(get_pct); use PublicInbox::LeiDedupe; use PublicInbox::LeiToMail; # cf. https://en.wikipedia.org/wiki/JSON_streaming my $JSONL = 'ldjson|ndjson|jsonl'; # 3 names for the same thing sub iso8601 ($) { strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($_[0])) } # we open this in the parent process before ->wq_io_do handoff sub ovv_out_lk_init ($) { my ($self) = @_; my $tmp = File::Temp->new("lei-ovv.dst.$$.lock-XXXX", TMPDIR => 1, UNLINK => 0); $self->{"lk_id.$self.$$"} = $self->{lock_path} = $tmp->filename; } sub ovv_out_lk_cancel ($) { my ($self) = @_; my $lock_path = delete $self->{"lk_id.$self.$$"} or return; unlink($lock_path); } sub detect_fmt ($) { my ($dst) = @_; if ($dst =~ m!\A([:/]+://)!) { die "$1 support not implemented, yet\n"; } elsif (!-e $dst || -d _) { 'maildir'; # the default TODO: MH? } elsif (-f _ || -p _) { die "unable to determine mbox family of $dst\n"; } else { die "unable to determine format of $dst\n"; } } sub new { my ($class, $lei, $ofmt_key) = @_; my $opt = $lei->{opt}; my $dst = $opt->{output} // '-'; $dst = '/dev/stdout' if $dst eq '-'; $ofmt_key //= 'format'; my $fmt = $opt->{$ofmt_key}; $fmt = lc($fmt) if defined $fmt; if ($dst =~ m!\A([a-z0-9\+]+)://!is) { defined($fmt) and die <<""; --$ofmt_key=$fmt invalid with URL $dst $fmt = lc $1; } elsif ($dst =~ s/\A([a-z0-9]+)://is) { # e.g. Maildir:/home/user/Mail/ my $ofmt = lc $1; $fmt //= $ofmt; die <<"" if $fmt ne $ofmt; --$ofmt_key=$fmt and --output=$ofmt conflict } my $devfd = $lei->path_to_fd($dst) // return; $fmt //= $devfd >= 0 ? 'json' : detect_fmt($dst); if (index($dst, '://') < 0) { # not a URL, so assume path $dst = $lei->canonpath_harder($dst); } # else URL my $self = bless { fmt => $fmt, dst => $dst }, $class; $lei->{ovv} = $self; my $json; if ($fmt =~ /\A($JSONL|(?:concat)?json)\z/) { $json = $self->{json} = ref(PublicInbox::Config->json); } if ($devfd >= 0) { my $isatty = $lei->{need_pager} = -t $lei->{$devfd}; $opt->{pretty} //= $isatty; if (!$isatty && -f _) { my $fl = fcntl($lei->{$devfd}, F_GETFL, 0) // die("fcntl(/dev/fd/$devfd): $!\n"); ovv_out_lk_init($self) unless ($fl & O_APPEND); } else { ovv_out_lk_init($self); } } elsif (!$opt->{quiet}) { $lei->{-progress} = 1; } if ($json) { $lei->{dedupe} //= PublicInbox::LeiDedupe->new($lei); } else { $lei->{l2m} = PublicInbox::LeiToMail->new($lei); if ($opt->{mua} && $lei->{l2m}->lock_free) { $lei->{early_mua} = 1; $opt->{alert} //= [ ':WINCH,:bell' ] if -t $lei->{1}; } } die("--shared is only for v2 inbox output\n") if $self->{fmt} ne 'v2' && $lei->{opt}->{shared}; $self; } # called once by parent sub ovv_begin { my ($self, $lei) = @_; if ($self->{fmt} eq 'json') { $lei->out('['); } # TODO HTML/Atom/... } # called once by parent (via PublicInbox::PktOp '' => query_done) sub ovv_end { my ($self, $lei) = @_; if ($self->{fmt} eq 'json') { # JSON doesn't allow trailing commas, and preventing # trailing commas is a PITA when parallelizing outputs $lei->out("null]\n"); } elsif ($self->{fmt} eq 'concatjson') { $lei->out("\n"); } } # prepares an smsg for JSON sub _unbless_smsg { my ($smsg, $mitem) = @_; # TODO: make configurable # num/tid are nonsensical with multi-inbox search, # lines/bytes are not generally useful delete @$smsg{qw(num tid lines bytes)}; $smsg->{rt} = iso8601(delete $smsg->{ts}); # JMAP receivedAt $smsg->{dt} = iso8601(delete $smsg->{ds}); # JMAP UTCDate $smsg->{pct} = get_pct($mitem) if $mitem; if (my $r = delete $smsg->{references}) { @{$smsg->{refs}} = ($r =~ m/$MID_EXTRACT/go); } if (my $m = delete($smsg->{mid})) { $smsg->{'m'} = $m; } for my $f (qw(from to cc)) { my $v = delete $smsg->{$f} or next; $smsg->{substr($f, 0, 1)} = pairs($v); } $smsg->{'s'} = delete $smsg->{subject}; my $kw = delete($smsg->{kw}); scalar { %$smsg, ($kw && scalar(@$kw) ? (kw => $kw) : ()) }; # unbless } sub ovv_atexit_child { my ($self, $lei) = @_; if (my $bref = delete $lei->{ovv_buf}) { my $lk = $self->lock_for_scope; $lei->out($$bref); } } # JSON module ->pretty output wastes too much vertical white space, # this (IMHO) provides better use of screen real-estate while not # being excessively compact: sub _json_pretty { my ($json, $k, $v) = @_; if (ref $v eq 'ARRAY') { if (@$v) { my $sep = ",\n" . (' ' x (length($k) + 7)); if (ref($v->[0])) { # f/t/c $v = '[' . join($sep, map { my $pair = $json->encode($_); $pair =~ s/(null|"),"/$1, "/g; $pair; } @$v) . ']'; } elsif ($k eq 'kw') { # keywords are short, one-line $v = $json->encode($v); $v =~ s/","/", "/g; } else { # refs, labels, ... $v = '[' . join($sep, map { substr($json->encode([$_]), 1, -1); } @$v) . ']'; } } else { $v = '[]'; } } qq{ "$k": }.$v; } sub ovv_each_smsg_cb { # runs in wq worker usually my ($self, $lei) = @_; my ($json, $dedupe); if (my $pkg = $self->{json}) { $json = $pkg->new; $json->utf8->canonical; $json->ascii(1) if $lei->{opt}->{ascii}; } my $l2m = $lei->{l2m}; if (!$l2m) { $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing'; $dedupe->prepare_dedupe; } $lei->{ovv_buf} = \(my $buf = '') if !$l2m; if ($l2m) { sub { my ($smsg, $mitem, $eml) = @_; $smsg->{pct} = get_pct($mitem) if $mitem; $l2m->wq_io_do('write_mail', [], $smsg, $eml); } } elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) { my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},"; my $lse = $lei->{lse}; sub { # DIY prettiness :P my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); $lse->xsmsg_vmd($smsg, $smsg->{L} ? undef : 1); $smsg = _unbless_smsg($smsg, $mitem); $buf .= "{\n"; $buf .= join(",\n", map { my $v = $smsg->{$_}; if (ref($v)) { _json_pretty($json, $_, $v); } else { $v = $json->encode([$v]); qq{ "$_": }.substr($v, 1, -1); } } sort keys %$smsg); $buf .= $EOR; return if length($buf) < 65536; my $lk = $self->lock_for_scope; $lei->out($buf); $buf = ''; } } elsif ($json) { my $ORS = $self->{fmt} eq 'json' ? ",\n" : "\n"; # JSONL my $lse = $lei->{lse}; sub { my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); $lse->xsmsg_vmd($smsg, $smsg->{L} ? undef : 1); $buf .= $json->encode(_unbless_smsg(@_)) . $ORS; return if length($buf) < 65536; my $lk = $self->lock_for_scope; $lei->out($buf); $buf = ''; } } else { die "TODO: unhandled case $self->{fmt}" } } no warnings 'once'; *DESTROY = \&ovv_out_lk_cancel; 1; public-inbox-1.9.0/lib/PublicInbox/LeiP2q.pm000066400000000000000000000130761430031475700205450ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # front-end for the "lei patch-to-query" sub-command package PublicInbox::LeiP2q; use strict; use v5.10.1; use parent qw(PublicInbox::IPC PublicInbox::LeiInput); use PublicInbox::Eml; use PublicInbox::Smsg; use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::Git qw(git_unquote); use PublicInbox::OnDestroy; use URI::Escape qw(uri_escape_utf8); my $FN = qr!((?:"?[^/\n]+/[^\r\n]+)|/dev/null)!; sub xphrase ($) { my ($s) = @_; return () unless $s =~ /\S/; # cf. xapian-core/queryparser/queryparser.lemony # [\./:\\\@] - is_phrase_generator (implicit phrase search) # FIXME not really sure about these..., we basically want to # extract the longest phrase possible that Xapian can handle map { s/\A\s*//; s/\s+\z//; m![^\./:\\\@\-\w]! ? qq("$_") : $_ ; } ($s =~ m!(\w[\|=><,\./:\\\@\-\w\s]+)!g); } sub add_qterm ($$@) { my ($self, $p, @v) = @_; for (@v) { $self->{qseen}->{"$p\0$_"} //= push(@{$self->{qterms}->{$p}}, $_); } } sub extract_terms { # eml->each_part callback my ($p, $self) = @_; my $part = $p->[0]; # ignore $depth and @idx; my $ct = $part->content_type || 'text/plain'; my ($s, undef) = msg_part_text($part, $ct); defined $s or return; my $in_diff; # TODO: b: nq: q: for (split(/\n/, $s)) { if ($in_diff && s/^ //) { # diff context add_qterm($self, 'dfctx', xphrase($_)); } elsif (/^-- $/) { # email signature begins $in_diff = undef; } elsif (m!^diff --git $FN $FN!) { # wait until "---" and "+++" to capture filenames $in_diff = 1; } elsif (/^index ([a-f0-9]+)\.\.([a-f0-9]+)\b/) { my ($oa, $ob) = ($1, $2); add_qterm($self, 'dfpre', $oa); add_qterm($self, 'dfpost', $ob); # who uses dfblob? } elsif (m!^(?:---|\+{3}) ($FN)!) { next if $1 eq '/dev/null'; my $fn = (split(m!/!, git_unquote($1.''), 2))[1]; add_qterm($self, 'dfn', xphrase($fn)); } elsif ($in_diff && s/^\+//) { # diff added add_qterm($self, 'dfb', xphrase($_)); } elsif ($in_diff && s/^-//) { # diff removed add_qterm($self, 'dfa', xphrase($_)); } elsif (/^@@ (?:\S+) (?:\S+) @@\s*$/) { # traditional diff w/o -p } elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)/) { add_qterm($self, 'dfhh', xphrase($1)); } elsif (/^(?:dis)similarity index/ || /^(?:old|new) mode/ || /^(?:deleted|new) file mode/ || /^(?:copy|rename) (?:from|to) / || /^(?:dis)?similarity index / || /^\\ No newline at end of file/ || /^Binary files .* differ/) { } elsif ($_ eq '') { # possible to be in diff context, some mail may be # stripped by MUA or even GNU diff(1). "git apply" # treats a bare "\n" as diff context, too } else { $in_diff = undef; } } } my %pfx2smsg = ( t => [ qw(to) ], c => [ qw(cc) ], f => [ qw(from) ], tc => [ qw(to cc) ], tcf => [ qw(to cc from) ], a => [ qw(to cc from) ], s => [ qw(subject) ], bs => [ qw(subject) ], # body handled elsewhere d => [ qw(ds) ], # nonsense? dt => [ qw(ds) ], # ditto... rt => [ qw(ts) ], # ditto... ); sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh my ($self, $eml) = @_; my $diff_want = $self->{diff_want} // do { my $want = $self->{lei}->{opt}->{want} // [ qw(dfpost7) ]; my @want = split(/[, ]+/, "@$want"); for (@want) { /\A(?:(d|dt|rt):)?([0-9]+)(\.(?:day|weeks)s?)?\z/ or next; my ($pfx, $n, $unit) = ($1, $2, $3); $n *= 86400 * ($unit =~ /week/i ? 7 : 1); $_ = [ $pfx, $n ]; } $self->{want_order} = \@want; $self->{diff_want} = +{ map { $_ => 1 } @want }; }; my $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->populate($eml); while (my ($pfx, $fields) = each %pfx2smsg) { next unless $diff_want->{$pfx}; for my $f (@$fields) { my $v = $smsg->{$f} // next; add_qterm($self, $pfx, xphrase($v)); } } $eml->each_part(\&extract_terms, $self, 1); } sub emit_query { my ($self) = @_; my $lei = $self->{lei}; if ($lei->{opt}->{debug}) { my $json = ref(PublicInbox::Config->json)->new; $json->utf8->canonical->pretty; print { $lei->{2} } $json->encode($self->{qterms}); } my (@q, %seen); for my $pfx (@{$self->{want_order}}) { if (ref($pfx) eq 'ARRAY') { my ($p, $t_range) = @$pfx; # TODO } elsif ($pfx =~ m!\A(?:OR|XOR|AND|NOT)\z! || $pfx =~ m!\A(?:ADJ|NEAR)(?:/[0-9]+)?\z!) { push @q, $pfx; } else { my $plusminus = ($pfx =~ s/\A([\+\-])//) ? $1 : ''; my $end = ($pfx =~ s/([0-9\*]+)\z//) ? $1 : ''; my $x = delete($self->{qterms}->{$pfx}) or next; my $star = $end =~ tr/*//d ? '*' : ''; my $min_len = ($end || 0) + 0; # no wildcards for bool_pfx_external $star = '' if $pfx =~ /\A(dfpre|dfpost|mid)\z/; $pfx = "$plusminus$pfx:"; if ($min_len) { push @q, map { my @t = ($pfx.$_.$star); while (length > $min_len) { chop $_; push @t, 'OR', $pfx.$_.$star; } @t; } @$x; } else { push @q, map { my $k = $pfx.$_.$star; $seen{$k}++ ? () : $k } @$x; } } } if ($lei->{opt}->{uri}) { @q = (join('+', map { uri_escape_utf8($_) } @q)); } else { @q = (join(' ', @q)); } $lei->out(@q, "\n"); } sub lei_p2q { # the "lei patch-to-query" entry point my ($lei, @inputs) = @_; $lei->{opt}->{'in-format'} //= 'eml' if $lei->{opt}->{stdin}; my $self = bless { missing_ok => 1 }, __PACKAGE__; $self->prepare_inputs($lei, \@inputs) or return; $lei->wq1_start($self); } sub ipc_atfork_child { my ($self) = @_; PublicInbox::LeiInput::input_only_atfork_child($self); PublicInbox::OnDestroy->new($$, \&emit_query, $self); } no warnings 'once'; *net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done; 1; public-inbox-1.9.0/lib/PublicInbox/LeiPmdir.pm000066400000000000000000000031421430031475700211470ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # WQ worker for dealing with parallel Maildir reads; # this does NOT use the {shard_info} field of LeiToMail # (and we may remove {shard_info}) # WQ key: {pmd} package PublicInbox::LeiPmdir; use strict; use v5.10.1; use parent qw(PublicInbox::IPC); sub new { my ($cls, $lei, $ipt) = @_; my $self = bless { -wq_ident => 'lei Maildir worker' }, $cls; my $jobs = $lei->{opt}->{jobs} // ''; $jobs =~ /\A[0-9]+,([0-9]+)\z/ and $jobs = $1; my $nproc = $jobs || do { # barely tested with >=4 CPUs, though I suspect I/O latency # of SATA SSD storage will make >=4 processes unnecessary, # here. NVMe users may wish to use '-j' my $n = $self->detect_nproc; $n = $n > 4 ? 4 : $n; }; my ($op_c, $ops) = $lei->workers_start($self, $nproc, undef, { ipt => $ipt }); # LeiInput subclass $op_c->{ops} = $ops; # for PktOp->event_step $self->{lei_sock} = $lei->{sock}; # keep client for pmd_done_wait $lei->{pmd} = $self; } sub ipc_atfork_child { my ($self) = @_; my $ipt = $self->{ipt} // die 'BUG: no self->{ipt}'; my $lei = $ipt->{lei} = $self->{lei}; delete @$lei{qw(auth net)}; # no network access in this worker $ipt->ipc_atfork_child; # calls _lei_atfork_child; } sub each_mdir_fn { # maildir_each_file callback my ($f, $fl, $self, @args) = @_; $self->wq_io_do('mdir_iter', [], $f, $fl, @args); } sub mdir_iter { # via wq_io_do my ($self, $f, $fl, @args) = @_; $self->{ipt}->pmdir_cb($f, $fl, @args); } sub _lei_wq_eof { # EOF callback for main lei daemon $_[0]->wq_eof('pmd'); } 1; public-inbox-1.9.0/lib/PublicInbox/LeiQuery.pm000066400000000000000000000175061430031475700212120ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # handles "lei q" command and provides internals for # several other sub-commands (up, lcat, ...) package PublicInbox::LeiQuery; use strict; use v5.10.1; sub prep_ext { # externals_each callback my ($lxs, $exclude, $loc) = @_; $lxs->prepare_external($loc) unless $exclude->{$loc}; } sub _start_query { # used by "lei q" and "lei up" my ($self) = @_; require PublicInbox::LeiOverview; PublicInbox::LeiOverview->new($self) or return; my $opt = $self->{opt}; my ($xj, $mj) = split(/,/, $opt->{jobs} // ''); (defined($xj) && $xj ne '' && $xj !~ /\A[1-9][0-9]*\z/) and die "`$xj' search jobs must be >= 1\n"; my $lxs = $self->{lxs}; $xj ||= $lxs->concurrency($opt); # allow: "--jobs ,$WRITER_ONLY" my $nproc = $lxs->detect_nproc || 1; # don't memoize, schedtool(1) exists $xj = $nproc if $xj > $nproc; $lxs->{-wq_nr_workers} = $xj; (defined($mj) && $mj !~ /\A[1-9][0-9]*\z/) and die "`$mj' writer jobs must be >= 1\n"; my $l2m = $self->{l2m}; # we use \1 (a ref) to distinguish between default vs. user-supplied if ($l2m && grep { $opt->{$_} //= \1 } (qw(mail-sync import-remote import-before))) { $self->_lei_store(1)->write_prepare($self); if ($opt->{'mail-sync'}) { my $lms = $l2m->{-lms_rw} = $self->lms(1); $lms->lms_write_prepare->lms_pause; # just create } } $l2m and $l2m->{-wq_nr_workers} //= $mj // int($nproc * 0.75 + 0.5); # keep some CPU for git # descending docid order is cheapest, MUA controls sorting order $self->{mset_opt}->{relevance} //= -2 if $l2m || $opt->{threads}; my $tot = $self->{mset_opt}->{total} //= $self->{opt}->{limit} // 10000; $self->{mset_opt}->{limit} = $tot > 10000 ? 10000 : $tot; $self->{mset_opt}->{offset} //= 0; $self->{mset_opt}->{threads} //= $opt->{threads}; if ($self->{net}) { require PublicInbox::LeiAuth; $self->{auth} = PublicInbox::LeiAuth->new } $lxs->do_query($self); } sub qstr_add { # PublicInbox::InputPipe::consume callback for --stdin my ($lei) = @_; # $_[1] = $rbuf $_[1] // $lei->fail("error reading stdin: $!"); return $lei->{mset_opt}->{qstr} .= $_[1] if $_[1] ne ''; eval { $lei->fchdir; $lei->{mset_opt}->{q_raw} = $lei->{mset_opt}->{qstr}; $lei->{lse}->query_approxidate($lei->{lse}->git, $lei->{mset_opt}->{qstr}); _start_query($lei); }; $lei->fail($@) if $@; } sub lxs_prepare { my ($self) = @_; require PublicInbox::LeiXSearch; # prepare any number of LeiXSearch || LeiSearch || Inbox || URL my $lxs = $self->{lxs} = PublicInbox::LeiXSearch->new; my $opt = $self->{opt}; my @only = @{$opt->{only} // []}; # --local is enabled by default unless --only is used # we'll allow "--only $LOCATION --local" my $sto = $self->_lei_store(1); $self->{lse} = $sto->search; if ($opt->{'local'} //= scalar(@only) ? 0 : 1) { $lxs->prepare_external($self->{lse}); } if (@only) { for my $loc (@only) { my @loc = $self->get_externals($loc) or return; $lxs->prepare_external($_) for @loc; } } else { my (@ilocals, @iremotes); for my $loc (@{$opt->{include} // []}) { my @loc = $self->get_externals($loc) or return; $lxs->prepare_external($_) for @loc; @ilocals = @{$lxs->{locals} // []}; @iremotes = @{$lxs->{remotes} // []}; } # --external is enabled by default, but allow --no-external if ($opt->{external} //= 1) { my $ex = $self->canonicalize_excludes($opt->{exclude}); $self->externals_each(\&prep_ext, $lxs, $ex); $opt->{remote} //= !($lxs->locals - $opt->{'local'}); $lxs->{locals} = \@ilocals if !$opt->{'local'}; $lxs->{remotes} = \@iremotes if !$opt->{remote}; } } ($lxs->locals || $lxs->remotes) ? ($self->{lxs} = $lxs) : die("no local or remote inboxes to search\n"); } # the main "lei q SEARCH_TERMS" method sub lei_q { my ($self, @argv) = @_; PublicInbox::Config->json; # preload before forking my $lxs = lxs_prepare($self) or return; $self->ale->refresh_externals($lxs, $self); my $opt = $self->{opt}; my %mset_opt; $mset_opt{asc} = $opt->{'reverse'} ? 1 : 0; if (defined(my $sort = $opt->{'sort'})) { if ($sort eq 'relevance') { $mset_opt{relevance} = 1; } elsif ($sort eq 'docid') { $mset_opt{relevance} = $mset_opt{asc} ? -1 : -2; } elsif ($sort =~ /\Areceived(?:-?[aA]t)?\z/) { # the default } else { die "unrecognized --sort=$sort\n"; } $opt->{save} and return $self->fail('--save and --sort are incompatible'); } $self->{mset_opt} = \%mset_opt; if ($opt->{stdin}) { return $self->fail(<<'') if @argv; no query allowed on command-line with --stdin require PublicInbox::InputPipe; PublicInbox::InputPipe::consume($self->{0}, \&qstr_add, $self); return; } chomp(@argv) and $self->qerr("# trailing `\\n' removed"); $mset_opt{q_raw} = [ @argv ]; # copy $mset_opt{qstr} = $self->{lse}->query_argv_to_string($self->{lse}->git, \@argv); _start_query($self); } # shell completion helper called by lei__complete sub _complete_q { my ($self, @argv) = @_; my @cur; my $cb = $self->lazy_cb(qw(forget-external _complete_)); while (@argv) { if ($argv[-1] =~ /\A(?:-I|(?:--(?:include|exclude|only)))\z/) { my @c = $cb->($self, @cur); # try basename match: if (scalar(@cur) == 1 && index($cur[0], '/') < 0) { my $all = $self->externals_each; my %bn; for my $loc (keys %$all) { my $bn = (split(m!/!, $loc))[-1]; ++$bn{$bn}; } push @c, grep { $bn{$_} == 1 && /\A\Q$cur[0]/ } keys %bn; } return @c if @c; } unshift(@cur, pop @argv); } (); } # Stuff we may pass through to curl (as of 7.64.0), see curl manpage for # details, so most options which make sense for HTTP/HTTPS (including proxy # support for Tor and other methods of getting past weird networks). # Most of these are untested by us, some may not make sense for our use case # and typos below are likely. # n.b. some short options (-$NUMBER) are not supported since they conflict # with other "lei q" switches. # FIXME: Getopt::Long doesn't easily let us support support options with # '.' in them (e.g. --http1.1) # TODO: should we depend on "-c http.*" options for things which have # analogues in git(1)? that would reduce likelihood of conflicts with # our other CLI options # Note: some names are renamed to avoid potential conflicts, # see %lei2curl in lib/PublicInbox/LeiCurl.pm sub curl_opt { qw( curl-config=s@ abstract-unix-socket=s anyauth basic cacert=s capath=s cert-status cert-type cert=s ciphers=s connect-timeout=s connect-to=s cookie-jar=s cookie=s crlfile=s digest disable dns-interface=s dns-ipv4-addr=s dns-ipv6-addr=s dns-servers=s doh-url=s egd-file=s engine=s false-start happy-eyeballs-timeout-ms=s haproxy-protocol header=s@ http2-prior-knowledge http2 insecure interface=s ipv4 ipv6 junk-session-cookies key-type=s key=s limit-rate=s local-port=s location-trusted location max-redirs=i max-time=s negotiate netrc-file=s netrc-optional netrc no-alpn no-buffer no-npn no-sessionid noproxy=s ntlm-wb ntlm pass=s pinnedpubkey=s post301 post302 post303 preproxy=s proxy-anyauth proxy-basic proxy-cacert=s proxy-capath=s proxy-cert-type=s proxy-cert=s proxy-ciphers=s proxy-crlfile=s proxy-digest proxy-header=s@ proxy-insecure proxy-key-type=s proxy-key proxy-negotiate proxy-ntlm proxy-pass=s proxy-pinnedpubkey=s proxy-service-name=s proxy-ssl-allow-beast proxy-tls13-ciphers=s proxy-tlsauthtype=s proxy-tlspassword=s proxy-tlsuser=s proxy-tlsv1 proxy-user=s proxy=s proxytunnel=s pubkey=s random-file=s referer=s resolve=s retry-connrefused retry-delay=s retry-max-time=s retry=i sasl-ir service-name=s socks4=s socks4a=s socks5-basic socks5-gssapi-service-name=s socks5-gssapi socks5-hostname=s socks5=s speed-limit speed-type ssl-allow-beast sslv2 sslv3 suppress-connect-headers tcp-fastopen tls-max=s tls13-ciphers=s tlsauthtype=s tlspassword=s tlsuser=s tlsv1 trace-ascii=s trace-time trace=s unix-socket=s user-agent=s user=s ) } 1; public-inbox-1.9.0/lib/PublicInbox/LeiRediff.pm000066400000000000000000000233241430031475700212770ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # The "lei rediff" sub-command, regenerates diffs with new options package PublicInbox::LeiRediff; use strict; use v5.10.1; use parent qw(PublicInbox::IPC PublicInbox::LeiInput); use File::Temp 0.19 (); # 0.19 for ->newdir use PublicInbox::Spawn qw(spawn which); use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::ViewDiff; use PublicInbox::LeiBlob; use PublicInbox::Git qw(git_quote git_unquote); use PublicInbox::Import; use PublicInbox::LEI; use PublicInbox::SolverGit; my $MODE = '(100644|120000|100755|160000)'; sub rediff_user_cb { # called by solver when done my ($res, $self) = @_; my $lei = $self->{lei}; my $log_buf = delete $lei->{log_buf}; $$log_buf =~ s/^/# /sgm; ref($res) eq 'ARRAY' or return $lei->child_error(0, $$log_buf); $lei->qerr($$log_buf); my ($git, $oid, $type, $size, $di) = @$res; my $oid_want = delete $self->{cur_oid_want}; # don't try to support all the git-show(1) options for non-blob, # this is just a convenience: $type ne 'blob' and return warn(<{git_dir} (wanted: $oid_want) EOF $self->{blob}->{$oid_want} = $oid; push @{$self->{gits}}, $git if $git->{-tmp}; } # returns a full blob for oid_want sub solve_1 ($$$) { my ($self, $oid_want, $hints) = @_; return if $oid_want =~ /\A0+\z/; $self->{cur_oid_want} = $oid_want; my $solver = bless { gits => $self->{gits}, user_cb => \&rediff_user_cb, uarg => $self, inboxes => [ $self->{lxs}->locals, @{$self->{rmt}} ], }, 'PublicInbox::SolverGit'; open my $log, '+>', \(my $log_buf = '') or die "PerlIO::scalar: $!"; $self->{lei}->{log_buf} = \$log_buf; local $PublicInbox::DS::in_loop = 0; # waitpid synchronously $solver->solve($self->{lei}->{env}, $log, $oid_want, $hints); $self->{blob}->{$oid_want}; # full OID } sub _lei_diff_prepare ($$) { my ($lei, $cmd) = @_; my $opt = $lei->{opt}; push @$cmd, '--'.($opt->{color} && !$opt->{'no-color'} ? '' : 'no-'). 'color'; for my $o (@PublicInbox::LEI::diff_opt) { my $c = ''; # remove single char short option $o =~ s/\|([a-z0-9])\b//i and $c = $1; if ($o =~ s/=[is]@\z//) { my $v = $opt->{$o} or next; push @$cmd, map { $c ? "-$c$_" : "--$o=$_" } @$v; } elsif ($o =~ s/=[is]\z//) { my $v = $opt->{$o} // next; push @$cmd, $c ? "-$c$v" : "--$o=$v"; } elsif ($o =~ s/:[is]\z//) { my $v = $opt->{$o} // next; push @$cmd, $c ? "-$c$v" : ($v eq '' ? "--$o" : "--$o=$v"); } elsif ($o =~ s/!\z//) { my $v = $opt->{$o} // next; push @$cmd, $v ? "--$o" : "--no-$o"; } elsif ($opt->{$o}) { push @$cmd, $c ? "-$c" : "--$o"; } } } sub diff_ctxq ($$) { my ($self, $ctxq) = @_; return unless $ctxq; my $blob = $self->{blob}; my $ta = <<'EOM'; reset refs/heads/A commit refs/heads/A author 0 +0000 committer 0 +0000 data 0 EOM my $tb = $ta; $tb =~ tr!A!B!; my $lei = $self->{lei}; while (my ($oid_a, $oid_b, $pa, $pb, $ma, $mb) = splice(@$ctxq, 0, 6)) { my $xa = $blob->{$oid_a} //= solve_1($self, $oid_a, { path_b => $pa }); my $xb = $blob->{$oid_b} //= solve_1($self, $oid_b, { oid_a => $oid_a, path_a => $pa, path_b => $pb }); $ta .= "M $ma $xa ".git_quote($pa)."\n" if $xa; $tb .= "M $mb $xb ".git_quote($pb)."\n" if $xb; } my $rw = $self->{gits}->[-1]; # has all known alternates if (!$rw->{-tmp}) { my $d = "$self->{rdtmp}/for_tree.git"; -d $d or PublicInbox::Import::init_bare($d); my $f = "$d/objects/info/alternates"; # always overwrite open my $fh, '>', $f or die "open $f: $!"; for my $git (@{$self->{gits}}) { print $fh $git->git_path('objects'),"\n"; } close $fh or die "close $f: $!"; $rw = PublicInbox::Git->new($d); } pipe(my ($r, $w)) or die "pipe: $!"; my $pid = spawn(['git', "--git-dir=$rw->{git_dir}", qw(fast-import --quiet --done --date-format=raw)], $lei->{env}, { 2 => $lei->{2}, 0 => $r }); close $r or die "close r fast-import: $!"; print $w $ta, "\n", $tb, "\ndone\n" or die "print fast-import: $!"; close $w or die "close w fast-import: $!"; waitpid($pid, 0); die "fast-import failed: \$?=$?" if $?; my $cmd = [ 'diff' ]; _lei_diff_prepare($lei, $cmd); $lei->qerr("# git @$cmd"); push @$cmd, qw(A B); unshift @$cmd, 'git', "--git-dir=$rw->{git_dir}"; $pid = spawn($cmd, $lei->{env}, { 2 => $lei->{2}, 1 => $lei->{1} }); waitpid($pid, 0); $lei->child_error($?) if $?; # for git diff --exit-code undef; } sub wait_requote ($$$) { # OnDestroy callback my ($lei, $pid, $old_1) = @_; $lei->{1} = $old_1; # closes stdin of `perl -pE 's/^/> /'` waitpid($pid, 0) == $pid or die "BUG(?) waitpid: \$!=$! \$?=$?"; $lei->child_error($?) if $?; } sub requote ($$) { my ($lei, $pfx) = @_; pipe(my($r, $w)) or die "pipe: $!"; my $rdr = { 0 => $r, 1 => $lei->{1}, 2 => $lei->{2} }; # $^X (perl) is overkill, but maybe there's a weird system w/o sed my $pid = spawn([$^X, '-pE', "s/^/$pfx/"], $lei->{env}, $rdr); my $old_1 = $lei->{1}; $w->autoflush(1); binmode $w, ':utf8'; $lei->{1} = $w; PublicInbox::OnDestroy->new(\&wait_requote, $lei, $pid, $old_1); } sub extract_oids { # Eml each_part callback my ($ary, $self) = @_; my ($p, undef, $idx) = @$ary; $self->{lei}->out($p->header_obj->as_string, "\n"); my ($s, undef) = msg_part_text($p, $p->content_type || 'text/plain'); defined $s or return; my $rq; if ($self->{dqre} && $s =~ s/$self->{dqre}//g) { # '> ' prefix(es) $rq = requote($self->{lei}, $1) if $self->{lei}->{opt}->{drq}; } my @top = split($PublicInbox::ViewDiff::EXTRACT_DIFFS, $s); undef $s; my $blobs = $self->{blobs}; # blobs to resolve my $ctxq; while (defined(my $x = shift @top)) { if (scalar(@top) >= 4 && $top[1] =~ $PublicInbox::ViewDiff::IS_OID && $top[0] =~ $PublicInbox::ViewDiff::IS_OID) { my ($ma, $mb); $x =~ /^old mode $MODE/sm and $ma = $1; $x =~ /^new mode $MODE/sm and $mb = $1; if (!defined($ma) && $x =~ /^index [a-z0-9]+\.\.[a-z0-9]+ $MODE/sm) { $ma = $mb = $1; } $ma //= '100644'; $mb //= $ma; my ($oid_a, $oid_b, $pa, $pb) = splice(@top, 0, 4); $pa eq '/dev/null' or $pa = (split(m'/', git_unquote($pa), 2))[1]; $pb eq '/dev/null' or $pb = (split(m'/', git_unquote($pb), 2))[1]; $blobs->{$oid_a} //= undef; $blobs->{$oid_b} //= undef; push @$ctxq, $oid_a, $oid_b, $pa, $pb, $ma, $mb; } elsif ($ctxq) { my @out; for (split(/^/sm, $x)) { if (/\A-- \r?\n/s) { # email sig starts push @out, $_; $ctxq = diff_ctxq($self, $ctxq); } elsif ($ctxq && (/\A[\+\- ]/ || /\A@@ / || # allow totally blank lines w/o leading # SP, git-apply does: /\A\r?\n/s)) { next; } else { push @out, $_; } } $self->{lei}->out(@out) if @out; } else { $ctxq = diff_ctxq($self, $ctxq); $self->{lei}->out($x); } } $ctxq = diff_ctxq($self, $ctxq); } # ensure dequoted parts are available for rebuilding patches: sub dequote_add { # Eml each_part callback my ($ary, $self) = @_; my ($p, undef, $idx) = @$ary; my ($s, undef) = msg_part_text($p, $p->content_type || 'text/plain'); defined $s or return; if ($s =~ s/$self->{dqre}//g) { # remove '> ' prefix(es) substr($s, 0, 0, "part-dequoted: $idx\n\n"); utf8::encode($s); $self->{tmp_sto}->add_eml(PublicInbox::Eml->new(\$s)); } } sub input_eml_cb { # callback for all emails my ($self, $eml) = @_; { local $SIG{__WARN__} = sub { return if "@_" =~ /^no email in From: .*? or Sender:/; return if PublicInbox::Eml::warn_ignore(@_); warn @_; }; $self->{tmp_sto}->add_eml($eml); $eml->each_part(\&dequote_add, $self) if $self->{dqre}; $self->{tmp_sto}->done; } $eml->each_part(\&extract_oids, $self, 1); } sub lei_rediff { my ($lei, @inputs) = @_; ($lei->{opt}->{drq} && $lei->{opt}->{'dequote-only'}) and return $lei->fail('--drq and --dequote-only are mutually exclusive'); ($lei->{opt}->{drq} && !$lei->{opt}->{verbose}) and $lei->{opt}->{quiet} //= 1; $lei->_lei_store(1)->write_prepare($lei); $lei->{opt}->{'in-format'} //= 'eml' if $lei->{opt}->{stdin}; # maybe it's a non-email (code) blob from a coderepo my $git_dirs = $lei->{opt}->{'git-dir'} //= []; if ($lei->{opt}->{cwd} // 1) { my $cgd = PublicInbox::LeiBlob::get_git_dir($lei, '.'); unshift(@$git_dirs, $cgd) if defined $cgd; } return $lei->fail('no --git-dir to try') unless @$git_dirs; my $lxs = $lei->lxs_prepare; if ($lxs->remotes) { require PublicInbox::LeiRemote; $lei->{curl} //= which('curl') or return $lei->fail('curl needed for', $lxs->remotes); } $lei->ale->refresh_externals($lxs, $lei); my $self = bless { -force_eml => 1, # for LeiInput->input_fh lxs => $lxs, }, __PACKAGE__; $self->prepare_inputs($lei, \@inputs) or return; my $isatty = -t $lei->{1}; $lei->{opt}->{color} //= $isatty; $lei->start_pager if $isatty; $lei->wq1_start($self); } sub ipc_atfork_child { my ($self) = @_; PublicInbox::LeiInput::input_only_atfork_child(@_); my $lei = $self->{lei}; $lei->{1}->autoflush(1); binmode $lei->{1}, ':utf8'; $self->{blobs} = {}; # oidhex => filename $self->{rdtmp} = File::Temp->newdir('lei-rediff-XXXX', TMPDIR => 1); $self->{tmp_sto} = PublicInbox::LeiStore->new( "$self->{rdtmp}/tmp.store", { creat => { nproc => 1 }, indexlevel => 'medium' }); $self->{tmp_sto}->{priv_eidx}->{parallel} = 0; $self->{rmt} = [ $self->{tmp_sto}->search, map { PublicInbox::LeiRemote->new($lei, $_) } $self->{lxs}->remotes ]; $self->{gits} = [ map { PublicInbox::Git->new($lei->rel2abs($_)) } @{$self->{lei}->{opt}->{'git-dir'}} ]; $lei->{env}->{TMPDIR} = $self->{rdtmp}->dirname; if (my $nr = ($lei->{opt}->{drq} || $lei->{opt}->{'dequote-only'})) { my $re = '\s*> ' x $nr; $self->{dqre} = qr/^($re)/ms; } undef; } no warnings 'once'; *net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done; 1; public-inbox-1.9.0/lib/PublicInbox/LeiRefreshMailSync.pm000066400000000000000000000070271430031475700231400ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # "lei refresh-mail-sync" drops dangling sync information # and attempts to detect moved files package PublicInbox::LeiRefreshMailSync; use strict; use v5.10.1; use parent qw(PublicInbox::IPC PublicInbox::LeiInput); use PublicInbox::LeiImport; use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::Import; sub folder_missing { # may be called by LeiInput my ($self, $folder) = @_; $self->{lms}->forget_folders($folder); } sub prune_mdir { # lms->each_src callback my ($oidbin, $id, $self, $mdir) = @_; my @try = $$id =~ /:2,[a-zA-Z]*\z/ ? qw(cur new) : qw(new cur); for (@try) { return if -f "$mdir/$_/$$id" } # both tries failed $self->{lms}->clear_src("maildir:$mdir", $id); } sub prune_imap { # lms->each_src callback my ($oidbin, $uid, $self, $uids, $url) = @_; return if exists $uids->{$uid}; $self->{lms}->clear_src($url, $uid); } # detects missed file moves sub pmdir_cb { # called via LeiPmdir->each_mdir_fn my ($self, $f, $fl) = @_; my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or die "BUG: $f was not from a Maildir?"; substr($folder, 0, 0) = 'maildir:'; # add prefix return if scalar($self->{lms}->name_oidbin($folder, $bn)); my $eml = eml_from_path($f) // return; my $oidbin = $self->{lei}->git_oid($eml)->digest; $self->{lms}->set_src($oidbin, $folder, \$bn); } sub input_path_url { # overrides PublicInbox::LeiInput::input_path_url my ($self, $input, @args) = @_; if ($input =~ /\Amaildir:(.+)/i) { $self->{lms}->each_src($input, \&prune_mdir, $self, $1); $self->{lse} //= $self->{lei}->{sto}->search; # call pmdir_cb (via maildir_each_file -> each_mdir_fn) PublicInbox::LeiInput::input_path_url($self, $input); } elsif ($input =~ m!\Aimaps?://!i) { my $uri = PublicInbox::URIimap->new($input); if (my $mic = $self->{lei}->{net}->mic_for_folder($uri)) { my $uids = $mic->search('UID 1:*'); $uids = +{ map { $_ => undef } @$uids }; $self->{lms}->each_src($$uri, \&prune_imap, $self, $uids, $$uri) } else { $self->folder_missing($$uri); } } else { die "BUG: $input not supported" } $self->{lei}->sto_done_request; } sub lei_refresh_mail_sync { my ($lei, @folders) = @_; my $sto = $lei->_lei_store or return $lei->fail(<lms or return $lei->fail(<{opt}->{all})) { $lms->group2folders($lei, $all, \@folders) or return; # TODO: handle NNTP servers which delete messages @folders = grep(!m!\Anntps?://!, @folders); } else { $lms->arg2folder($lei, \@folders); # may die } $lms->lms_pause; # must be done before fork $sto->write_prepare($lei); my $self = bless { missing_ok => 1, lms => $lms }, __PACKAGE__; $lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs $self->prepare_inputs($lei, \@folders) or return; $lei->{-err_type} = 'non-fatal'; $lei->wq1_start($self); } sub ipc_atfork_child { # needed for PublicInbox::LeiPmdir my ($self) = @_; PublicInbox::LeiInput::input_only_atfork_child($self); $self->{lms}->lms_write_prepare; undef; } sub _complete_refresh_mail_sync { my ($lei, @argv) = @_; my $lms = $lei->lms or return (); my $match_cb = $lei->complete_url_prepare(\@argv); my @k = $lms->folders($argv[-1] // undef, 1); my @m = map { $match_cb->($_) } @k; @m ? @m : @k } no warnings 'once'; *net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done; 1; public-inbox-1.9.0/lib/PublicInbox/LeiReindex.pm000066400000000000000000000022451430031475700214750ustar00rootroot00000000000000# Copyright all contributors # License: AGPL-3.0+ # "lei reindex" command to reindex everything in lei/store package PublicInbox::LeiReindex; use v5.12; use parent qw(PublicInbox::IPC); sub reindex_full { my ($lei) = @_; my $sto = $lei->{sto}; my $max = $sto->search->over(1)->max; $lei->qerr("# reindexing 1..$max"); $sto->wq_do('reindex_art', $_) for (1..$max); } sub reindex_store { # via wq_do my ($self) = @_; my ($lei, $argv) = delete @$self{qw(lei argv)}; if (!@$argv) { reindex_full($lei); } } sub lei_reindex { my ($lei, @argv) = @_; my $sto = $lei->_lei_store or return $lei->fail('nothing indexed'); $sto->write_prepare($lei); my $self = bless { lei => $lei, argv => \@argv }, __PACKAGE__; my ($op_c, $ops) = $lei->workers_start($self, 1); $lei->{wq1} = $self; $lei->wait_wq_events($op_c, $ops); $self->wq_do('reindex_store'); $self->wq_close; } sub _lei_wq_eof { # EOF callback for main lei daemon my ($lei) = @_; $lei->{sto}->wq_do('reindex_done'); $lei->wq_eof; } sub ipc_atfork_child { my ($self) = @_; $self->{lei}->_lei_atfork_child; $self->SUPER::ipc_atfork_child; } 1; public-inbox-1.9.0/lib/PublicInbox/LeiRemote.pm000066400000000000000000000045771430031475700213440ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # Make remote externals HTTP(S) inboxes behave like # PublicInbox::Inbox and PublicInbox::Search/ExtSearch. # This exists solely for SolverGit. It is a high-latency a # synchronous API that is not at all fast. package PublicInbox::LeiRemote; use v5.10.1; use strict; use IO::Uncompress::Gunzip; use PublicInbox::MboxReader; use PublicInbox::Spawn qw(popen_rd); use PublicInbox::LeiCurl; use PublicInbox::AutoReap; use PublicInbox::ContentHash qw(git_sha); sub new { my ($cls, $lei, $uri) = @_; bless { uri => $uri, lei => $lei }, $cls; } sub isrch { $_[0] } # SolverGit expcets this sub _each_mboxrd_eml { # callback for MboxReader->mboxrd my ($eml, $self) = @_; my $lei = $self->{lei}; my $xoids = $lei->{ale}->xoids_for($eml, 1); my $smsg = bless {}, 'PublicInbox::Smsg'; if ($lei->{sto} && !$xoids) { # memoize locally my $res = $lei->{sto}->wq_do('add_eml', $eml); $smsg = $res if ref($res) eq ref($smsg); } $smsg->{blob} //= $xoids ? (keys(%$xoids))[0] : $lei->git_oid($eml)->hexdigest; $smsg->populate($eml); $smsg->{mid} //= '(none)'; push @{$self->{smsg}}, $smsg; } sub mset { my ($self, $qstr, undef) = @_; # $opt ($_[2]) ignored my $lei = $self->{lei}; my $curl = PublicInbox::LeiCurl->new($lei, $lei->{curl}); push @$curl, '-s', '-d', ''; my $uri = $self->{uri}->clone; $uri->query_form(q => $qstr, x => 'm', r => 1); # r=1: relevance my $cmd = $curl->for_uri($self->{lei}, $uri); $self->{lei}->qerr("# $cmd"); my ($fh, $pid) = popen_rd($cmd, undef, { 2 => $lei->{2} }); my $ar = PublicInbox::AutoReap->new($pid); $self->{smsg} = []; $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1); PublicInbox::MboxReader->mboxrd($fh, \&_each_mboxrd_eml, $self); my $wait = $self->{lei}->{sto}->wq_do('done'); $ar->join; $lei->child_error($?) if $?; $self; # we are the mset (and $ibx, and $self) } sub size { scalar @{$_[0]->{smsg}} } # size of previous results sub mset_to_smsg { my ($self, $ibx, $mset) = @_; # all 3 are $self wantarray ? ($self->size, @{$self->{smsg}}) : $self->{smsg}; } sub base_url { "$_[0]->{uri}" } sub smsg_eml { my ($self, $smsg) = @_; if (my $bref = $self->{lei}->ale->git->cat_file($smsg->{blob})) { return PublicInbox::Eml->new($bref); } warn("E: $self->{uri} $smsg->{blob} gone <$smsg->{mid}>\n"); undef; } 1; public-inbox-1.9.0/lib/PublicInbox/LeiRm.pm000066400000000000000000000016651430031475700204620ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # implements the "lei rm" command, you can point this at # an entire spam mailbox or read a message from stdin package PublicInbox::LeiRm; use strict; use v5.10.1; use parent qw(PublicInbox::IPC PublicInbox::LeiInput); sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh my ($self, $eml) = @_; $self->{lei}->{sto}->wq_do('remove_eml', $eml); } sub lei_rm { my ($lei, @inputs) = @_; $lei->_lei_store(1)->write_prepare($lei); $lei->{opt}->{'in-format'} //= 'eml' if $lei->{opt}->{stdin}; my $self = bless {}, __PACKAGE__; $self->prepare_inputs($lei, \@inputs) or return; $lei->{-err_type} = 'non-fatal'; $lei->wq1_start($self); } no warnings 'once'; *ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child; *net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done; 1; public-inbox-1.9.0/lib/PublicInbox/LeiRmWatch.pm000066400000000000000000000015301430031475700214400ustar00rootroot00000000000000# Copyright all contributors # License: AGPL-3.0+ # "lei rm-watch" command package PublicInbox::LeiRmWatch; use strict; use v5.10.1; use parent qw(PublicInbox::LeiInput); sub lei_rm_watch { my ($lei, @argv) = @_; my $cfg = $lei->_lei_cfg(1); $lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs my $self = bless { missing_ok => 1 }, __PACKAGE__; $self->prepare_inputs($lei, \@argv) or return; for my $w (@{$self->{inputs}}) { $lei->_config('--remove-section', "watch.$w"); } delete $lei->{cfg}; # force reload $lei->refresh_watches; } sub _complete_rm_watch { my ($lei, @argv) = @_; my $cfg = $lei->_lei_cfg or return; my $match_cb = $lei->complete_url_prepare(\@argv); my @w = (join("\n", keys %$cfg) =~ m/^watch\.(.+?)\.state$/sgm); map { $match_cb->($_) } @w; } 1; public-inbox-1.9.0/lib/PublicInbox/LeiSavedSearch.pm000066400000000000000000000210531430031475700222650ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # pretends to be like LeiDedupe and also PublicInbox::Inbox package PublicInbox::LeiSavedSearch; use strict; use v5.10.1; use parent qw(PublicInbox::Lock); use PublicInbox::Git; use PublicInbox::OverIdx; use PublicInbox::LeiSearch; use PublicInbox::Config; use PublicInbox::Spawn qw(run_die); use PublicInbox::ContentHash qw(git_sha); use PublicInbox::MID qw(mids_for_index); use Digest::SHA qw(sha256_hex); our $LOCAL_PFX = qr!\A(?:maildir|mh|mbox.+|mmdf|v2):!i; # TODO: put in LeiToMail? # move this to PublicInbox::Config if other things use it: my %cquote = ("\n" => '\\n', "\t" => '\\t', "\b" => '\\b'); sub cquote_val ($) { # cf. git-config(1) my ($val) = @_; $val =~ s/([\n\t\b])/$cquote{$1}/g; $val =~ s/\"/\\\"/g; $val; } sub ARRAY_FIELDS () { qw(only include exclude) } sub BOOL_FIELDS () { qw(external local remote import-remote import-before threads) } sub SINGLE_FIELDS () { qw(limit dedupe output) } sub lss_dir_for ($$;$) { my ($lei, $dstref, $on_fs) = @_; my $pfx; if ($$dstref =~ m,\Aimaps?://,i) { # already canonicalized require PublicInbox::URIimap; my $uri = PublicInbox::URIimap->new($$dstref)->canonical; $$dstref = $$uri; $pfx = $uri->mailbox; } else { # can't use Cwd::abs_path since dirname($$dstref) may not exist $$dstref = $lei->rel2abs($$dstref); $$dstref =~ tr!/!/!s; $pfx = $$dstref; } ($pfx) = ($pfx =~ m{([^/]+)/*\z}); # basename my $lss_dir = $lei->share_path . '/saved-searches/'; my $d = "$lss_dir$pfx-".sha256_hex($$dstref); # fall-back to looking up by st_ino + st_dev in case we're in # a symlinked or bind-mounted path if ($on_fs && !-d $d && -e $$dstref) { my @cur = stat(_); my $want = pack('dd', @cur[1,0]); # st_ino + st_dev my ($c, $o, @st); for my $g ("$pfx-*", '*') { my @maybe = glob("$lss_dir$g/lei.saved-search"); for my $f (@maybe) { $c = $lei->cfg_dump($f) // next; $o = $c->{'lei.q.output'} // next; $o =~ s!$LOCAL_PFX!! or next; @st = stat($o) or next; next if pack('dd', @st[1,0]) ne $want; $f =~ m!\A(.+?)/[^/]+\z! and return $1; } } } $d; } sub list { my ($lei, $pfx) = @_; my $lss_dir = $lei->share_path.'/saved-searches'; return () unless -d $lss_dir; # TODO: persist the cache? Use another format? my $f = $lei->cache_dir."/saved-tmp.$$.".time.'.config'; open my $fh, '>', $f or die "open $f: $!"; print $fh "[include]\n"; for my $p (glob("$lss_dir/*/lei.saved-search")) { print $fh "\tpath = ", cquote_val($p), "\n"; } close $fh or die "close $f: $!"; my $cfg = $lei->cfg_dump($f); unlink($f); my $out = $cfg ? $cfg->get_all('lei.q.output') : []; map {; s!$LOCAL_PFX!!; $_; } @$out } sub translate_dedupe ($$) { my ($self, $lei) = @_; my $dd = $lei->{opt}->{dedupe} // 'content'; return 1 if $dd eq 'content'; # the default return $self->{"-dedupe_$dd"} = 1 if ($dd eq 'oid' || $dd eq 'mid'); die("--dedupe=$dd requires --no-save\n"); } sub up { # updating existing saved search via "lei up" my ($cls, $lei, $dst) = @_; my $f; my $self = bless { ale => $lei->ale }, $cls; my $dir = $dst; output2lssdir($self, $lei, \$dir, \$f) or return die("--no-save was used with $dst cwd=". $lei->rel2abs('.')."\n"); $self->{-cfg} = $lei->cfg_dump($f) // return $lei->child_error; $self->{-ovf} = "$dir/over.sqlite3"; $self->{'-f'} = $f; $self->{lock_path} = "$self->{-f}.flock"; $self; } sub new { # new saved search "lei q --save" my ($cls, $lei) = @_; my $self = bless { ale => $lei->ale }, $cls; require File::Path; my $dst = $lei->{ovv}->{dst}; # canonicalize away relative paths into the config if ($lei->{ovv}->{fmt} eq 'maildir' && $dst =~ m!(?:/*|\A)\.\.(?:/*|\z)! && !-d $dst) { File::Path::make_path($dst); $lei->{ovv}->{dst} = $dst = $lei->abs_path($dst); } my $dir = lss_dir_for($lei, \$dst); File::Path::make_path($dir); # raises on error $self->{-cfg} = {}; my $f = $self->{'-f'} = "$dir/lei.saved-search"; translate_dedupe($self, $lei) or return; open my $fh, '>', $f or return $lei->fail("open $f: $!"); my $sq_dst = PublicInbox::Config::squote_maybe($dst); my $q = $lei->{mset_opt}->{q_raw} // die 'BUG: {q_raw} missing'; if (ref $q) { $q = join("\n", map { "\tq = ".cquote_val($_) } @$q); } else { $q = "\tq = ".cquote_val($q); } $dst = "$lei->{ovv}->{fmt}:$dst" if $dst !~ m!\Aimaps?://!i; $lei->{opt}->{output} = $dst; print $fh <{opt}->{$k} // next; for my $x (@$ary) { print $fh "\t$k = ".cquote_val($x)."\n"; } } for my $k (BOOL_FIELDS) { my $val = $lei->{opt}->{$k} // next; print $fh "\t$k = ".($val ? 1 : 0)."\n"; } for my $k (SINGLE_FIELDS) { my $val = $lei->{opt}->{$k} // next; print $fh "\t$k = $val\n"; } $lei->{opt}->{stdin} and print $fh <fail("close $f: $!"); $self->{lock_path} = "$self->{-f}.flock"; $self->{-ovf} = "$dir/over.sqlite3"; $self; } sub description { $_[0]->{qstr} } # for WWW sub cfg_set { # called by LeiXSearch my ($self, @args) = @_; my $lk = $self->lock_for_scope; # git-config doesn't wait run_die([qw(git config -f), $self->{'-f'}, @args]); } # drop-in for LeiDedupe API sub is_dup { my ($self, $eml, $smsg) = @_; my $oidx = $self->{oidx} // die 'BUG: no {oidx}'; my $lk; if ($self->{-dedupe_mid}) { $lk //= $self->lock_for_scope_fast; for my $mid (@{mids_for_index($eml)}) { my ($id, $prv); return 1 if $oidx->next_by_mid($mid, \$id, \$prv); } } my $blob = $smsg ? $smsg->{blob} : git_sha(1, $eml)->hexdigest; $lk //= $self->lock_for_scope_fast; return 1 if $oidx->blob_exists($blob); if (my $xoids = PublicInbox::LeiSearch::xoids_for($self, $eml, 1)) { for my $docid (values %$xoids) { $oidx->add_xref3($docid, -1, $blob, '.'); } $oidx->commit_lazy; if ($self->{-dedupe_oid}) { exists $xoids->{$blob} ? 1 : undef; } else { 1; } } else { # n.b. above xoids_for fills out eml->{-lei_fake_mid} if needed unless ($smsg) { $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->{bytes} = 0; $smsg->populate($eml); } $smsg->{blob} //= $blob; $oidx->begin_lazy; $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); $oidx->add_overview($eml, $smsg); $oidx->add_xref3($smsg->{num}, -1, $blob, '.'); $oidx->commit_lazy; undef; } } sub prepare_dedupe { my ($self) = @_; $self->{oidx} //= do { my $creat = !-f $self->{-ovf}; my $lk = $self->lock_for_scope; # git-config doesn't wait my $oidx = PublicInbox::OverIdx->new($self->{-ovf}); $oidx->{-no_fsync} = 1; $oidx->dbh; if ($creat) { $oidx->{dbh}->do('PRAGMA journal_mode = WAL'); $oidx->eidx_prep; # for xref3 } $oidx }; } sub over { $_[0]->{oidx} } # for xoids_for # don't use ale->git directly since is_dup is called inside # ale->git->cat_async callbacks sub git { $_[0]->{git} //= PublicInbox::Git->new($_[0]->{ale}->git->{git_dir}) } sub pause_dedupe { my ($self) = @_; git($self)->cleanup; my $lockfh = delete $self->{lockfh}; # from lock_for_scope_fast; my $oidx = delete($self->{oidx}) // return; $oidx->commit_lazy; } sub reset_dedupe { my ($self) = @_; prepare_dedupe($self); my $lk = $self->lock_for_scope_fast; for my $t (qw(xref3 over id2num)) { $self->{oidx}->{dbh}->do("DELETE FROM $t"); } pause_dedupe($self); } sub mm { undef } sub altid_map { {} } sub cloneurl { [] } # find existing directory containing a `lei.saved-search' file based on # $dir_ref which is an output sub output2lssdir { my ($self, $lei, $dir_ref, $fn_ref) = @_; my $dst = $$dir_ref; # imap://$MAILBOX, /path/to/maildir, /path/to/mbox my $dir = lss_dir_for($lei, \$dst, 1); my $f = "$dir/lei.saved-search"; if (-f $f && -r _) { $self->{-cfg} = $lei->cfg_dump($f) // return; $$dir_ref = $dir; $$fn_ref = $f; return 1; } undef; } # cf. LeiDedupe->has_entries sub has_entries { my $oidx = $_[0]->{oidx} // die 'BUG: no {oidx}'; my @n = $oidx->{dbh}->selectrow_array('SELECT num FROM over LIMIT 1'); scalar(@n) ? 1 : undef; } no warnings 'once'; *nntp_url = \&cloneurl; *base_url = \&PublicInbox::Inbox::base_url; *smsg_eml = \&PublicInbox::Inbox::smsg_eml; *smsg_by_mid = \&PublicInbox::Inbox::smsg_by_mid; *msg_by_mid = \&PublicInbox::Inbox::msg_by_mid; *modified = \&PublicInbox::Inbox::modified; *recent = \&PublicInbox::Inbox::recent; *max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef *isrch = *search = \&mm; # TODO *DESTROY = \&pause_dedupe; 1; public-inbox-1.9.0/lib/PublicInbox/LeiSearch.pm000066400000000000000000000134441430031475700213070ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # read-only counterpart for PublicInbox::LeiStore package PublicInbox::LeiSearch; use strict; use v5.10.1; use parent qw(PublicInbox::ExtSearch); # PublicInbox::Search->reopen use PublicInbox::Search qw(xap_terms); use PublicInbox::ContentHash qw(content_digest content_hash git_sha); use PublicInbox::MID qw(mids mids_for_index); use Carp qw(croak); sub _msg_kw { # retry_reopen callback my ($self, $num) = @_; my $xdb = $self->xdb; # set {nshard} for num2docid; xap_terms('K', $xdb, $self->num2docid($num)); } sub msg_keywords { # array or hashref my ($self, $num) = @_; $self->retry_reopen(\&_msg_kw, $num); } sub _oid_kw { # retry_reopen callback my ($self, $nums) = @_; my $xdb = $self->xdb; # set {nshard}; my %kw; for my $num (@$nums) { # there should only be one... my $doc = $xdb->get_document($self->num2docid($num)); my $x = xap_terms('K', $doc); %kw = (%kw, %$x); } \%kw; } # returns undef if blob is unknown sub oidbin_keywords { my ($self, $oidbin) = @_; my @num = $self->over->oidbin_exists($oidbin) or return; $self->retry_reopen(\&_oid_kw, \@num); } sub _xsmsg_vmd { # retry_reopen my ($self, $smsg, $want_label) = @_; my $xdb = $self->xdb; # set {nshard}; my (%kw, %L, $doc, $x); $kw{flagged} = 1 if delete($smsg->{lei_q_tt_flagged}); my @num = $self->over->blob_exists($smsg->{blob}); for my $num (@num) { # there should only be one... $doc = $xdb->get_document($self->num2docid($num)); $x = xap_terms('K', $doc); %kw = (%kw, %$x); if ($want_label) { # JSON/JMAP only $x = xap_terms('L', $doc); %L = (%L, %$x); } } $smsg->{kw} = [ sort keys %kw ] if scalar(keys(%kw)); $smsg->{L} = [ sort keys %L ] if scalar(keys(%L)); } # lookup keywords+labels for external messages sub xsmsg_vmd { my ($self, $smsg, $want_label) = @_; return if $smsg->{kw}; # already set by LeiXSearch->mitem_kw eval { $self->retry_reopen(\&_xsmsg_vmd, $smsg, $want_label) }; warn "$$ $0 (nshard=$self->{nshard}) $smsg->{blob}: $@" if $@; } # when a message has no Message-IDs at all, this is needed for # unsent Draft messages, at least sub content_key ($) { my ($eml) = @_; my $dig = content_digest($eml); my $chash = $dig->clone->digest; my $mids = mids_for_index($eml); unless (@$mids) { $eml->{-lei_fake_mid} = $mids->[0] = PublicInbox::Import::digest2mid($dig, $eml, 0); } ($chash, $mids); } sub _cmp_1st { # git->cat_async callback my ($bref, $oid, $type, $size, $cmp) = @_; # cmp: [chash, xoids, smsg, lms] $bref //= $cmp->[3] ? $cmp->[3]->local_blob($oid, 1) : undef; if ($bref && content_hash(PublicInbox::Eml->new($bref)) eq $cmp->[0]) { $cmp->[1]->{$oid} = $cmp->[2]->{num}; } } # returns { OID => num } mapping for $eml matches # The `num' hash value only makes sense from LeiSearch itself # and is nonsense from the PublicInbox::LeiALE subclass sub xoids_for { my ($self, $eml, $min) = @_; my ($chash, $mids) = content_key($eml); my @overs = ($self->over // $self->overs_all); my $git = $self->git; my $xoids = {}; # no lms when used via {ale}: my $lms = $self->{-lms_rw} //= lms($self) if defined($self->{topdir}); for my $mid (@$mids) { for my $o (@overs) { my ($id, $prev); while (my $cur = $o->next_by_mid($mid, \$id, \$prev)) { next if $cur->{bytes} == 0 || $xoids->{$cur->{blob}}; $git->cat_async($cur->{blob}, \&_cmp_1st, [$chash, $xoids, $cur, $lms]); if ($min && scalar(keys %$xoids) >= $min) { $git->async_wait_all; return $xoids; } } } } $git->async_wait_all; # it could be an 'lei index'-ed file that just got renamed if (scalar(keys %$xoids) < ($min // 1) && defined($self->{topdir})) { my $hex = git_sha(1, $eml)->hexdigest; my @n = $overs[0]->blob_exists($hex); for (@n) { $xoids->{$hex} //= $_ } } scalar(keys %$xoids) ? $xoids : undef; } # returns true if $eml is indexed by lei/store and keywords don't match sub kw_changed { my ($self, $eml, $new_kw_sorted, $docids) = @_; my $cur_kw; if ($eml) { my $xoids = xoids_for($self, $eml) // return; $docids //= []; @$docids = sort { $a <=> $b } values %$xoids; if (!@$docids && $self->over) { my $bin = git_sha(1, $eml)->digest; @$docids = $self->over->oidbin_exists($bin); } } for my $id (@$docids) { $cur_kw = eval { msg_keywords($self, $id) } and last; } if (!defined($cur_kw) && $@) { $docids = join(', num:', @$docids); croak "E: num:$docids keyword lookup failure: $@"; } # RFC 5550 sec 5.9 on the $Forwarded keyword states: # "Once set, the flag SHOULD NOT be cleared" if (exists($cur_kw->{forwarded}) && !grep(/\Aforwarded\z/, @$new_kw_sorted)) { delete $cur_kw->{forwarded}; } $cur_kw = join("\0", sort keys %$cur_kw); join("\0", @$new_kw_sorted) eq $cur_kw ? 0 : 1; } sub all_terms { my ($self, $pfx) = @_; my $xdb = $self->xdb; my $cur = $xdb->allterms_begin($pfx); my $end = $xdb->allterms_end($pfx); my %ret; for (; $cur != $end; $cur++) { my $tn = $cur->get_termname; index($tn, $pfx) == 0 and $ret{substr($tn, length($pfx))} = undef; } wantarray ? (sort keys %ret) : \%ret; } sub qparse_new { my ($self) = @_; my $qp = $self->SUPER::qparse_new; # PublicInbox::Search $self->{qp_flags} |= PublicInbox::Search::FLAG_PHRASE() | PublicInbox::Search::FLAG_PURE_NOT(); $qp->add_boolean_prefix('kw', 'K'); $qp->add_boolean_prefix('L', 'L'); $qp } sub lms { my ($self) = @_; require PublicInbox::LeiMailSync; my $f = "$self->{topdir}/mail_sync.sqlite3"; -f $f ? PublicInbox::LeiMailSync->new($f) : undef; } # allow SolverGit->resolve_patch to work with "lei index" sub smsg_eml { my ($self, $smsg) = @_; PublicInbox::Inbox::smsg_eml($self, $smsg) // do { my $lms = lms($self); my $bref = $lms ? $lms->local_blob($smsg->{blob}, 1) : undef; $bref ? PublicInbox::Eml->new($bref) : undef; }; } 1; public-inbox-1.9.0/lib/PublicInbox/LeiSelfSocket.pm000066400000000000000000000023131430031475700221350ustar00rootroot00000000000000# Copyright all contributors # License: AGPL-3.0+ # dummy placeholder socket for internal lei commands. # This receives what script/lei receives, but isn't connected # to an interactive terminal so I'm not sure what to do with it... package PublicInbox::LeiSelfSocket; use strict; use v5.10.1; use parent qw(PublicInbox::DS); use Data::Dumper; $Data::Dumper::Useqq = 1; # should've been the Perl default :P use PublicInbox::Syscall qw(EPOLLIN); use PublicInbox::Spawn; my $recv_cmd; sub new { my ($cls, $r) = @_; my $self = bless { sock => $r }, $cls; $r->blocking(0); no warnings 'once'; $recv_cmd = $PublicInbox::LEI::recv_cmd; $self->SUPER::new($r, EPOLLIN); } sub event_step { my ($self) = @_; my (@fds) = $recv_cmd->($self->{sock}, my $buf, 4096 * 33); if (scalar(@fds) == 1 && !defined($fds[0])) { return if $!{EAGAIN}; die "recvmsg: $!" unless $!{ECONNRESET}; $buf = ''; } else { # just in case open so perl can auto-close them: for (@fds) { open my $fh, '+<&=', $_ }; } return $self->close if $buf eq ''; warn Dumper({ 'unexpected self msg' => $buf, fds => \@fds }); # TODO: figure out what to do with these messages... } 1; public-inbox-1.9.0/lib/PublicInbox/LeiStore.pm000066400000000000000000000447511430031475700212030ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # # Local storage (cache/memo) for lei(1), suitable for personal/private # mail iff on encrypted device/FS. Based on v2, but only deduplicates # git storage based on git OID (index deduplication is done in ContentHash) # # for xref3, the following are constant: $eidx_key = '.', $xnum = -1 # # We rely on the synchronous IPC API for this in lei-daemon and # multiple lei clients to write to it at once. This allows the # lei/store IPC process to be decoupled from network latency in # lei WQ workers. package PublicInbox::LeiStore; use strict; use v5.10.1; use parent qw(PublicInbox::Lock PublicInbox::IPC); use PublicInbox::ExtSearchIdx; use PublicInbox::Eml; use PublicInbox::Import; use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::V2Writable; use PublicInbox::ContentHash qw(content_hash); use PublicInbox::MID qw(mids); use PublicInbox::LeiSearch; use PublicInbox::MDA; use PublicInbox::Spawn qw(spawn); use PublicInbox::MdirReader; use PublicInbox::LeiToMail; use File::Temp (); use POSIX (); use IO::Handle (); # ->autoflush use Sys::Syslog qw(syslog openlog); use Errno qw(EEXIST ENOENT); use PublicInbox::Syscall qw(rename_noreplace); sub new { my (undef, $dir, $opt) = @_; my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt); my $self = bless { priv_eidx => $eidx }, __PACKAGE__; eidx_init($self)->done if $opt->{creat}; $self; } sub git { $_[0]->{priv_eidx}->git } # read-only sub packing_factor { $PublicInbox::V2Writable::PACKING_FACTOR } sub rotate_bytes { $_[0]->{rotate_bytes} // ((1024 * 1024 * 1024) / $_[0]->packing_factor) } sub git_ident ($) { my ($git) = @_; my $rdr = {}; open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!"; chomp(my $i = $git->qx([qw(var GIT_COMMITTER_IDENT)], undef, $rdr)); $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ and return ($1, $2); my ($user, undef, undef, undef, undef, undef, $gecos) = getpwuid($<); ($user) = (($user // $ENV{USER} // '') =~ /([\w\-\.\+]+)/); $user //= 'lei-user'; ($gecos) = (($gecos // '') =~ /([\w\-\.\+ \t]+)/); $gecos //= 'lei user'; require Sys::Hostname; my ($host) = (Sys::Hostname::hostname() =~ /([\w\-\.]+)/); $host //= 'localhost'; ($gecos, "$user\@$host") } sub importer { my ($self) = @_; my $max; my $im = $self->{im}; if ($im) { return $im if $im->{bytes_added} < $self->rotate_bytes; delete $self->{im}; $im->done; undef $im; $self->checkpoint; $max = $self->{priv_eidx}->{mg}->git_epochs + 1; } my (undef, $tl) = eidx_init($self); # acquire lock $max //= $self->{priv_eidx}->{mg}->git_epochs; while (1) { my $latest = $self->{priv_eidx}->{mg}->add_epoch($max); my $git = PublicInbox::Git->new($latest); $self->done; # unlock # re-acquire lock, update alternates for new epoch (undef, $tl) = eidx_init($self); my $packed_bytes = $git->packed_bytes; my $unpacked_bytes = $packed_bytes / $self->packing_factor; if ($unpacked_bytes >= $self->rotate_bytes) { $max++; next; } my ($n, $e) = git_ident($git); $self->{im} = $im = PublicInbox::Import->new($git, $n, $e); $im->{bytes_added} = int($packed_bytes / $self->packing_factor); $im->{lock_path} = undef; $im->{path_type} = 'v2'; return $im; } } sub search { PublicInbox::LeiSearch->new($_[0]->{priv_eidx}->{topdir}); } # follows the stderr file sub _tail_err { my ($self) = @_; print { $self->{-err_wr} } readline($self->{-tmp_err}); } sub eidx_init { my ($self) = @_; my $eidx = $self->{priv_eidx}; my $tl = wantarray && $self->{-err_wr} ? PublicInbox::OnDestroy->new($$, \&_tail_err, $self) : undef; $eidx->idx_init({-private => 1}); # acquires lock wantarray ? ($eidx, $tl) : $eidx; } sub _docids_for ($$) { my ($self, $eml) = @_; my %docids; my $eidx = $self->{priv_eidx}; my ($chash, $mids) = PublicInbox::LeiSearch::content_key($eml); my $oidx = $eidx->{oidx}; my $im = $self->{im}; for my $mid (@$mids) { my ($id, $prev); while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) { next if $cur->{bytes} == 0; # external-only message my $oid = $cur->{blob}; my $docid = $cur->{num}; my $bref = $im ? $im->cat_blob($oid) : undef; $bref //= $eidx->git->cat_file($oid) // _lms_rw($self)->local_blob($oid, 1) // do { warn "W: $oid (#$docid) <$mid> not found\n"; next; }; local $self->{current_info} = $oid; my $x = PublicInbox::Eml->new($bref); $docids{$docid} = $docid if content_hash($x) eq $chash; } } sort { $a <=> $b } values %docids; } # n.b. similar to LeiExportKw->export_kw_md, but this is for a single eml sub export1_kw_md ($$$$$) { my ($self, $mdir, $bn, $oidbin, $vmdish) = @_; # vmd/vmd_mod my $orig = $bn; my (@try, $unkn, $kw); if ($bn =~ s/:2,([a-zA-Z]*)\z//) { ($kw, $unkn) = PublicInbox::MdirReader::flags2kw($1); if (my $set = $vmdish->{kw}) { $kw = $set; } elsif (my $add = $vmdish->{'+kw'}) { @$kw{@$add} = (); } elsif (my $del = $vmdish->{-kw}) { delete @$kw{@$del}; } # else no changes... @try = qw(cur new); } else { # no keywords, yet, could be in new/ @try = qw(new cur); $unkn = []; if (my $set = $vmdish->{kw}) { $kw = $set; } elsif (my $add = $vmdish->{'+kw'}) { @$kw{@$add} = (); # auto-vivify } else { # ignore $vmdish->{-kw} $kw = []; } } $kw = [ keys %$kw ] if ref($kw) eq 'HASH'; $bn .= ':2,'. PublicInbox::LeiToMail::kw2suffix($kw, @$unkn); return if $orig eq $bn; # no change # we use link(2) + unlink(2) since rename(2) may # inadvertently clobber if the "uniquefilename" part wasn't # actually unique. my $dst = "$mdir/cur/$bn"; for my $d (@try) { my $src = "$mdir/$d/$orig"; if (rename_noreplace($src, $dst)) { # TODO: verify oidbin? $self->{lms}->mv_src("maildir:$mdir", $oidbin, \$orig, $bn); return; } elsif ($! == EEXIST) { # lost race with "lei export-kw"? return; } elsif ($! != ENOENT) { syslog('warning', "rename_noreplace($src -> $dst): $!"); } } for (@try) { return if -e "$mdir/$_/$orig" }; $self->{lms}->clear_src("maildir:$mdir", \$orig); } sub sto_export_kw ($$$) { my ($self, $docid, $vmdish) = @_; # vmdish (vmd or vmd_mod) my ($eidx, $tl) = eidx_init($self); my $lms = _lms_rw($self) // return; my $xr3 = $eidx->{oidx}->get_xref3($docid, 1); for my $row (@$xr3) { my (undef, undef, $oidbin) = @$row; my $locs = $lms->locations_for($oidbin) // next; while (my ($loc, $ids) = each %$locs) { if ($loc =~ s!\Amaildir:!!i) { for my $id (@$ids) { export1_kw_md($self, $loc, $id, $oidbin, $vmdish); } } # TODO: IMAP } } } # vmd = { kw => [ qw(seen ...) ], L => [ qw(inbox ...) ] } sub set_eml_vmd { my ($self, $eml, $vmd, $docids) = @_; my ($eidx, $tl) = eidx_init($self); $docids //= [ _docids_for($self, $eml) ]; for my $docid (@$docids) { $eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd); sto_export_kw($self, $docid, $vmd); } $docids; } sub add_eml_vmd { my ($self, $eml, $vmd) = @_; my ($eidx, $tl) = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { $eidx->idx_shard($docid)->ipc_do('add_vmd', $docid, $vmd); } \@docids; } sub remove_eml_vmd { # remove just the VMD my ($self, $eml, $vmd) = @_; my ($eidx, $tl) = eidx_init($self); my @docids = _docids_for($self, $eml); for my $docid (@docids) { $eidx->idx_shard($docid)->ipc_do('remove_vmd', $docid, $vmd); } \@docids; } sub _lms_rw ($) { # it is important to have eidx processes open before lms my ($self) = @_; $self->{lms} // do { require PublicInbox::LeiMailSync; my ($eidx, $tl) = eidx_init($self); my $f = "$self->{priv_eidx}->{topdir}/mail_sync.sqlite3"; my $lms = PublicInbox::LeiMailSync->new($f); $lms->lms_write_prepare; $self->{lms} = $lms; }; } sub _remove_if_local { # git->cat_async arg my ($bref, $oidhex, $type, $size, $self) = @_; $self->{im}->remove($bref) if $bref; } sub remove_docids ($;@) { my ($self, @docids) = @_; my $eidx = eidx_init($self); for my $docid (@docids) { $eidx->remove_doc($docid); $eidx->{oidx}->{dbh}->do(<importer; # may create new epoch my ($eidx, $tl) = eidx_init($self); my $oidx = $eidx->{oidx}; my @docids = _docids_for($self, $eml); my $git = $eidx->git; for my $docid (@docids) { my $xr3 = $oidx->get_xref3($docid, 1); for my $row (@$xr3) { my (undef, undef, $oidbin) = @$row; my $oidhex = unpack('H*', $oidbin); $git->cat_async($oidhex, \&_remove_if_local, $self); } } $git->async_wait_all; remove_docids($self, @docids); \@docids; } sub oid2docid ($$) { my ($self, $oid) = @_; my $eidx = eidx_init($self); my ($docid, @cull) = $eidx->{oidx}->blob_exists($oid); if (@cull) { # fixup old bugs... warn <ipc_do('add_vmd', $docid, $vmd); sto_export_kw($self, $docid, $vmd); } sub _docids_and_maybe_kw ($$) { my ($self, $docids) = @_; return $docids unless wantarray; my $kw = {}; for my $num (@$docids) { # likely only 1, unless ContentHash changes # can't use ->search->msg_keywords on uncommitted docs my $idx = $self->{priv_eidx}->idx_shard($num); my $tmp = eval { $idx->ipc_do('get_terms', 'K', $num) }; if ($@) { warn "#$num get_terms: $@" } else { @$kw{keys %$tmp} = values(%$tmp) }; } ($docids, [ sort keys %$kw ]); } sub _reindex_1 { # git->cat_async callback my ($bref, $hex, $type, $size, $smsg) = @_; my $self = delete $smsg->{-sto}; my ($eidx, $tl) = eidx_init($self); $bref //= _lms_rw($self)->local_blob($hex, 1); if ($bref) { my $eml = PublicInbox::Eml->new($bref); $smsg->{-merge_vmd} = 1; # preserve existing keywords $eidx->idx_shard($smsg->{num})->index_eml($eml, $smsg); } elsif ($type eq 'missing') { # pre-release/buggy lei may've indexed external-only msgs, # try to correct that, here warn("E: missing $hex, culling (ancient lei artifact?)\n"); $smsg->{to} = $smsg->{cc} = $smsg->{from} = ''; $smsg->{bytes} = 0; $eidx->{oidx}->update_blob($smsg, ''); my $eml = PublicInbox::Eml->new("\r\n\r\n"); $eidx->idx_shard($smsg->{num})->index_eml($eml, $smsg); } else { warn("E: $type $hex\n"); } } sub reindex_art { my ($self, $art) = @_; my ($eidx, $tl) = eidx_init($self); my $smsg = $eidx->{oidx}->get_art($art) // return; return if $smsg->{bytes} == 0; # external-only message $smsg->{-sto} = $self; $eidx->git->cat_async($smsg->{blob} // die("no blob (#$art)"), \&_reindex_1, $smsg); } sub reindex_done { my ($self) = @_; my ($eidx, $tl) = eidx_init($self); $eidx->git->async_wait_all; # ->done to be called via sto_done_request } sub add_eml { my ($self, $eml, $vmd, $xoids) = @_; my $im = $self->{-fake_im} // $self->importer; # may create new epoch my ($eidx, $tl) = eidx_init($self); my $oidx = $eidx->{oidx}; # PublicInbox::Import::add checks this my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg'; $smsg->{-eidx_git} = $eidx->git if !$self->{-fake_im}; my $im_mark = $im->add($eml, undef, $smsg); if ($vmd && $vmd->{sync_info}) { _lms_rw($self)->set_src($smsg->oidbin, @{$vmd->{sync_info}}); } unless ($im_mark) { # duplicate blob returns undef return unless wantarray; my @docids = $oidx->blob_exists($smsg->{blob}); return _docids_and_maybe_kw $self, \@docids; } local $self->{current_info} = $smsg->{blob}; my $vivify_xvmd = delete($smsg->{-vivify_xvmd}) // []; # exact matches if ($xoids) { # fuzzy matches from externals in ale->xoids_for delete $xoids->{$smsg->{blob}}; # added later if (scalar keys %$xoids) { my %docids = map { $_ => 1 } @$vivify_xvmd; for my $oid (keys %$xoids) { my $docid = oid2docid($self, $oid); $docids{$docid} = $docid if defined($docid); } @$vivify_xvmd = sort { $a <=> $b } keys(%docids); } } if (@$vivify_xvmd) { # docids list $xoids //= {}; $xoids->{$smsg->{blob}} = 1; for my $docid (@$vivify_xvmd) { my $cur = $oidx->get_art($docid); my $idx = $eidx->idx_shard($docid); if (!$cur || $cur->{bytes} == 0) { # really vivifying $smsg->{num} = $docid; $oidx->add_overview($eml, $smsg); $smsg->{-merge_vmd} = 1; $idx->index_eml($eml, $smsg); } else { # lse fuzzy hit off ale $idx->ipc_do('add_eidx_info', $docid, '.', $eml); } for my $oid (keys %$xoids) { $oidx->add_xref3($docid, -1, $oid, '.'); } _add_vmd($self, $idx, $docid, $vmd) if $vmd; } _docids_and_maybe_kw $self, $vivify_xvmd; } elsif (my @docids = _docids_for($self, $eml)) { # fuzzy match from within lei/store for my $docid (@docids) { my $idx = $eidx->idx_shard($docid); $oidx->add_xref3($docid, -1, $smsg->{blob}, '.'); # add_eidx_info for List-Id $idx->ipc_do('add_eidx_info', $docid, '.', $eml); _add_vmd($self, $idx, $docid, $vmd) if $vmd; } _docids_and_maybe_kw $self, \@docids; } else { # totally new message, no keywords delete $smsg->{-oidx}; # for IPC-friendliness $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); $oidx->add_overview($eml, $smsg); $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); my $idx = $eidx->idx_shard($smsg->{num}); $idx->index_eml($eml, $smsg); _add_vmd($self, $idx, $smsg->{num}, $vmd) if $vmd; wantarray ? ($smsg, []) : $smsg; } } sub set_eml { my ($self, $eml, $vmd, $xoids) = @_; add_eml($self, $eml, $vmd, $xoids) // set_eml_vmd($self, $eml, $vmd); } sub index_eml_only { my ($self, $eml, $vmd, $xoids) = @_; require PublicInbox::FakeImport; local $self->{-fake_im} = PublicInbox::FakeImport->new; set_eml($self, $eml, $vmd, $xoids); } # store {kw} / {L} info for a message which is only in an external sub _external_only ($$$) { my ($self, $xoids, $eml) = @_; my $eidx = $self->{priv_eidx}; my $oidx = $eidx->{oidx} // die 'BUG: {oidx} missing'; my $smsg = bless { blob => '' }, 'PublicInbox::Smsg'; $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); # save space for an externals-only message my $hdr = $eml->header_obj; $smsg->populate($hdr); # sets lines == 0 $smsg->{bytes} = 0; delete @$smsg{qw(From Subject)}; $smsg->{to} = $smsg->{cc} = $smsg->{from} = ''; $oidx->add_overview($hdr, $smsg); # subject+references for threading $smsg->{subject} = ''; for my $oid (keys %$xoids) { $oidx->add_xref3($smsg->{num}, -1, $oid, '.'); } my $idx = $eidx->idx_shard($smsg->{num}); $idx->index_eml(PublicInbox::Eml->new("\n\n"), $smsg); ($smsg, $idx); } sub update_xvmd { my ($self, $xoids, $eml, $vmd_mod) = @_; my ($eidx, $tl) = eidx_init($self); my $oidx = $eidx->{oidx}; my %seen; for my $oid (keys %$xoids) { my $docid = oid2docid($self, $oid) // next; delete $xoids->{$oid}; next if $seen{$docid}++; my $idx = $eidx->idx_shard($docid); $idx->ipc_do('update_vmd', $docid, $vmd_mod); sto_export_kw($self, $docid, $vmd_mod); } return unless scalar(keys(%$xoids)); # see if it was indexed, but with different OID(s) if (my @docids = _docids_for($self, $eml)) { for my $docid (@docids) { next if $seen{$docid}; for my $oid (keys %$xoids) { $oidx->add_xref3($docid, -1, $oid, '.'); } my $idx = $eidx->idx_shard($docid); $idx->ipc_do('update_vmd', $docid, $vmd_mod); sto_export_kw($self, $docid, $vmd_mod); } return; } # totally unseen my ($smsg, $idx) = _external_only($self, $xoids, $eml); $idx->ipc_do('update_vmd', $smsg->{num}, $vmd_mod); sto_export_kw($self, $smsg->{num}, $vmd_mod); } # set or update keywords for external message, called via ipc_do sub set_xvmd { my ($self, $xoids, $eml, $vmd) = @_; my ($eidx, $tl) = eidx_init($self); my $oidx = $eidx->{oidx}; my %seen; # see if we can just update existing docs for my $oid (keys %$xoids) { my $docid = oid2docid($self, $oid) // next; delete $xoids->{$oid}; # all done with this oid next if $seen{$docid}++; my $idx = $eidx->idx_shard($docid); $idx->ipc_do('set_vmd', $docid, $vmd); sto_export_kw($self, $docid, $vmd); } return unless scalar(keys(%$xoids)); # n.b. we don't do _docids_for here, we expect the caller # already checked $lse->kw_changed before calling this sub return unless (@{$vmd->{kw} // []}) || (@{$vmd->{L} // []}); # totally unseen: my ($smsg, $idx) = _external_only($self, $xoids, $eml); $idx->ipc_do('add_vmd', $smsg->{num}, $vmd); sto_export_kw($self, $smsg->{num}, $vmd); } sub checkpoint { my ($self, $wait) = @_; if (my $im = $self->{im}) { $wait ? $im->barrier : $im->checkpoint; } delete $self->{lms}; $self->{priv_eidx}->checkpoint($wait); } sub xchg_stderr { my ($self) = @_; _tail_err($self) if $self->{-err_wr}; my $dir = $self->{priv_eidx}->{topdir}; return unless -e $dir; my $old = delete $self->{-tmp_err}; my $pfx = POSIX::strftime('%Y%m%d%H%M%S', gmtime(time)); my $err = File::Temp->new(TEMPLATE => "$pfx.$$.err-XXXX", SUFFIX => '.err', DIR => $dir); open STDERR, '>>', $err->filename or die "dup2: $!"; STDERR->autoflush(1); # shared with shard subprocesses $self->{-tmp_err} = $err; # separate file description for RO access undef; } sub done { my ($self, $sock_ref) = @_; my $err = ''; if (my $im = delete($self->{im})) { eval { $im->done }; if ($@) { $err .= "import done: $@\n"; warn $err; } } delete $self->{lms}; $self->{priv_eidx}->done; # V2Writable::done xchg_stderr($self); die $err if $err; } sub ipc_atfork_child { my ($self) = @_; my $lei = $self->{lei}; $lei->_lei_atfork_child(1) if $lei; xchg_stderr($self); if (my $to_close = delete($self->{to_close})) { close($_) for @$to_close; } openlog('lei/store', 'pid,nowait,nofatal,ndelay', 'user'); $self->SUPER::ipc_atfork_child; } sub recv_and_run { my ($self, @args) = @_; local $PublicInbox::DS::in_loop = 0; # waitpid synchronously $self->SUPER::recv_and_run(@args); } sub _sto_atexit { # dwaitpid callback my ($args, $pid) = @_; my $self = $args->[0]; warn "lei/store PID:$pid died \$?=$?\n" if $?; } sub write_prepare { my ($self, $lei) = @_; $lei // die 'BUG: $lei not passed'; unless ($self->{-ipc_req}) { my $dir = $lei->store_path; substr($dir, -length('/lei/store'), 10, ''); pipe(my ($r, $w)) or die "pipe: $!"; $w->autoflush(1); # Mail we import into lei are private, so headers filtered out # by -mda for public mail are not appropriate local @PublicInbox::MDA::BAD_HEADERS = (); $self->wq_workers_start("lei/store $dir", 1, $lei->oldset, { lei => $lei, -err_wr => $w, to_close => [ $r ], }); $self->wq_wait_async(\&_sto_atexit); # outlives $lei require PublicInbox::LeiStoreErr; PublicInbox::LeiStoreErr->new($r, $lei); } $lei->{sto} = $self; } 1; public-inbox-1.9.0/lib/PublicInbox/LeiStoreErr.pm000066400000000000000000000023111430031475700216360ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # forwards stderr from lei/store process to any lei clients using # the same store, falls back to syslog if no matching clients exist. package PublicInbox::LeiStoreErr; use v5.12; use parent qw(PublicInbox::DS); use PublicInbox::Syscall qw(EPOLLIN); use Sys::Syslog qw(openlog syslog closelog); use IO::Handle (); # ->blocking sub new { my ($cls, $rd, $lei) = @_; my $self = bless { sock => $rd, store_path => $lei->store_path }, $cls; $rd->blocking(0); $self->SUPER::new($rd, EPOLLIN); # level-trigger } sub event_step { my ($self) = @_; my $n = sysread($self->{sock}, my $buf, 8192); return ($!{EAGAIN} ? 0 : $self->close) if !defined($n); return $self->close if !$n; my $printed; for my $lei (values %PublicInbox::DS::DescriptorMap) { my $cb = $lei->can('store_path') // next; next if $cb->($lei) ne $self->{store_path}; my $err = $lei->{2} // next; print $err $buf and $printed = 1; } if (!$printed) { openlog('lei/store', 'pid,nowait,nofatal,ndelay', 'user'); for my $l (split(/\n/, $buf)) { syslog('warning', '%s', $l) } closelog(); # don't share across fork } } 1; public-inbox-1.9.0/lib/PublicInbox/LeiSucks.pm000066400000000000000000000047171430031475700211750ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # Undocumented hidden command somebody might discover if they're # frustrated and need to report a bug. There's no manpage and # it won't show up in tab completions or help. package PublicInbox::LeiSucks; use strict; use v5.10.1; use Digest::SHA (); use Config; use POSIX (); use PublicInbox::Config; use PublicInbox::IPC; sub lei_sucks { my ($lei, @argv) = @_; $lei->start_pager if -t $lei->{1}; my ($os, undef, $rel, undef, $mac)= POSIX::uname(); if ($mac eq 'x86_64' && $Config{ptrsize} == 4) { $mac .= $Config{cppsymbols} =~ /\b__ILP32__=1\b/ ? ',u=x32' : ',u=x86'; } eval { require PublicInbox }; my $pi_ver = eval('$PublicInbox::VERSION') // '(???)'; my $nproc = PublicInbox::IPC::detect_nproc() // '?'; my @out = ("lei $pi_ver\n", "perl $Config{version} / $os $rel / $mac ". "ptrsize=$Config{ptrsize} nproc=$nproc\n"); chomp(my $gv = `git --version` || "git missing"); $gv =~ s/ version / /; my $json = ref(PublicInbox::Config->json); $json .= ' ' . eval('$'.$json.'::VERSION') if $json; $json ||= '(no JSON)'; push @out, "$gv / $json\n"; if (eval { require PublicInbox::Over }) { push @out, 'SQLite '. (eval('$DBD::SQLite::sqlite_version') // '(undef)') . ', DBI '.(eval('$DBI::VERSION') // '(undef)') . ', DBD::SQLite '. (eval('$DBD::SQLite::VERSION') // '(undef)')."\n"; } else { push @out, "Unable to load DBI / DBD::SQLite: $@\n"; } if (eval { require PublicInbox::Search } && PublicInbox::Search::load_xapian()) { push @out, 'Xapian '. join('.', map { $PublicInbox::Search::Xap->can($_)->(); } qw(major_version minor_version revision)) . ", bindings: $PublicInbox::Search::Xap"; my $xs_ver = eval '$'."$PublicInbox::Search::Xap".'::VERSION'; push @out, $xs_ver ? " $xs_ver\n" : " SWIG\n"; } else { push @out, "Xapian not available: $@\n"; } my $dig = Digest::SHA->new(1); push @out, "public-inbox blob OIDs of loaded features:\n"; for my $m (grep(m{^PublicInbox/}, sort keys %INC)) { my $f = $INC{$m} // next; # lazy require failed (missing dep) $dig->add('blob '.(-s $f)."\0"); $dig->addfile($f); push @out, ' '.$dig->hexdigest.' '.$m."\n"; } push @out, <<'EOM'; Let us know how it sucks! Please include the above and any other relevant information when sending plain-text mail to us at: meta@public-inbox.org -- archives: https://public-inbox.org/meta/ EOM $lei->out(@out); } 1; public-inbox-1.9.0/lib/PublicInbox/LeiTag.pm000066400000000000000000000065451430031475700206210ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # handles "lei tag" command package PublicInbox::LeiTag; use strict; use v5.10.1; use parent qw(PublicInbox::IPC PublicInbox::LeiInput); use PublicInbox::InboxWritable qw(eml_from_path); sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh my ($self, $eml) = @_; if (my $xoids = $self->{lse}->xoids_for($eml) // # tries LeiMailSync $self->{lei}->{ale}->xoids_for($eml)) { $self->{lei}->{sto}->wq_do('update_xvmd', $xoids, $eml, $self->{vmd_mod}); } else { ++$self->{unimported}; } } sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn my ($self, $f) = @_; my $eml = eml_from_path($f) or return; input_eml_cb($self, $eml); } sub lei_tag { # the "lei tag" method my ($lei, @argv) = @_; $lei->{opt}->{'in-format'} //= 'eml' if $lei->{opt}->{stdin}; my $sto = $lei->_lei_store(1)->write_prepare($lei); my $self = bless {}, __PACKAGE__; $lei->ale; # refresh and prepare my $vmd_mod = $self->vmd_mod_extract(\@argv); return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err}; $self->{vmd_mod} = $vmd_mod; # before LeiPmdir->new in prepare_inputs $self->prepare_inputs($lei, \@argv) or return; grep(defined, @$vmd_mod{qw(+kw +L -L -kw)}) or return $lei->fail('no keywords or labels specified'); $lei->{-err_type} = 'non-fatal'; $lei->wq1_start($self); } sub note_unimported { my ($self) = @_; my $n = $self->{unimported} or return; $self->{lei}->{pkt_op_p}->pkt_do('incr', 'unimported', $n); } sub ipc_atfork_child { my ($self) = @_; PublicInbox::LeiInput::input_only_atfork_child($self); $self->{lse} = $self->{lei}->{sto}->search; # this goes out-of-scope at worker process exit: PublicInbox::OnDestroy->new($$, \¬e_unimported, $self); } # Workaround bash word-splitting s to ['kw', ':', 'keyword' ...] # Maybe there's a better way to go about this in # contrib/completion/lei-completion.bash sub _complete_tag_common ($) { my ($argv) = @_; # Workaround bash word-splitting URLs to ['https', ':', '//' ...] # Maybe there's a better way to go about this in # contrib/completion/lei-completion.bash my $re = ''; my $cur = pop(@$argv) // ''; if (@$argv) { my @x = @$argv; if ($cur eq ':' && @x) { push @x, $cur; $cur = ''; } while (@x > 2 && $x[0] !~ /\A[+\-](?:kw|L)\z/ && $x[1] ne ':') { shift @x; } if (@x >= 2) { # qw(kw : $KEYWORD) or qw(kw :) $re = join('', @x); } else { # just return everything and hope for the best $re = join('', @$argv); } $re = quotemeta($re); } ($cur, $re); } # FIXME: same problems as _complete_forget_external and similar sub _complete_tag { my ($self, @argv) = @_; require PublicInbox::LeiImport; my @in = PublicInbox::LeiImport::_complete_import(@_); my @L = eval { $self->_lei_store->search->all_terms('L') }; my @kwL = ((map { ("+kw:$_", "-kw:$_") } @PublicInbox::LeiInput::KW), (map { ("+L:$_", "-L:$_") } @L)); my ($cur, $re) = _complete_tag_common(\@argv); my @m = map { # only return the part specified on the CLI # don't duplicate if already 100% completed /\A$re(\Q$cur\E.*)/ ? ($cur eq $1 ? () : $1) : (); } grep(/$re\Q$cur/, @kwL); (@in, (@m ? @m : @kwL)); } no warnings 'once'; # the following works even when LeiAuth is lazy-loaded *net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done; 1; public-inbox-1.9.0/lib/PublicInbox/LeiToMail.pm000066400000000000000000000611711430031475700212670ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # Writes PublicInbox::Eml objects atomically to a mbox variant or Maildir package PublicInbox::LeiToMail; use strict; use v5.10.1; use parent qw(PublicInbox::IPC); use PublicInbox::Eml; use PublicInbox::ProcessPipe; use PublicInbox::Spawn qw(spawn); use Symbol qw(gensym); use IO::Handle; # ->autoflush use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY); use PublicInbox::Syscall qw(rename_noreplace); my %kw2char = ( # Maildir characters draft => 'D', flagged => 'F', forwarded => 'P', # passed answered => 'R', seen => 'S', ); my %kw2status = ( flagged => [ 'X-Status' => 'F' ], answered => [ 'X-Status' => 'A' ], seen => [ 'Status' => 'R' ], draft => [ 'X-Status' => 'T' ], ); sub _mbox_hdr_buf ($$$) { my ($eml, $type, $smsg) = @_; $eml->header_set($_) for (qw(Lines Bytes Content-Length)); my %hdr = (Status => []); # set Status, X-Status for my $k (@{$smsg->{kw} // []}) { if (my $ent = $kw2status{$k}) { push @{$hdr{$ent->[0]}}, $ent->[1]; } else { # X-Label? warn "# keyword `$k' not supported for mbox\n"; } } # When writing to empty mboxes, messages are always 'O' # (not-\Recent in IMAP), it saves MUAs the trouble of # rewriting the mbox if no other changes are made. # We put 'O' at the end (e.g. "Status: RO") to match mutt(1) output. # We only set smsg->{-recent} if augmenting existing stores. my $status = join('', sort(@{$hdr{Status}})); $status .= 'O' unless $smsg->{-recent}; $eml->header_set('Status', $status) if $status; if (my $chars = delete $hdr{'X-Status'}) { $eml->header_set('X-Status', join('', sort(@$chars))); } my $buf = delete $eml->{hdr}; # fixup old bug from import (pre-a0c07cba0e5d8b6a) $$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; my $ident = $smsg->{blob} // 'lei'; if (defined(my $pct = $smsg->{pct})) { $ident .= "=$pct" } substr($$buf, 0, 0, # prepend From line "From $ident\@$type Thu Jan 1 00:00:00 1970$eml->{crlf}"); $buf; } sub atomic_append { # for on-disk destinations (O_APPEND, or O_EXCL) my ($lei, $buf) = @_; if (defined(my $w = syswrite($lei->{1} // return, $$buf))) { return if $w == length($$buf); $buf = "short atomic write: $w != ".length($$buf); } elsif ($!{EPIPE}) { return $lei->note_sigpipe(1); } else { $buf = "atomic write: $!"; } $lei->fail($buf); } sub eml2mboxrd ($;$) { my ($eml, $smsg) = @_; my $buf = _mbox_hdr_buf($eml, 'mboxrd', $smsg); if (my $bdy = delete $eml->{bdy}) { $$bdy =~ s/^(>*From )/>$1/gm; $$buf .= $eml->{crlf}; substr($$bdy, 0, 0, $$buf); # prepend header $buf = $bdy; } $$buf .= $eml->{crlf}; $buf; } sub eml2mboxo { my ($eml, $smsg) = @_; my $buf = _mbox_hdr_buf($eml, 'mboxo', $smsg); if (my $bdy = delete $eml->{bdy}) { $$bdy =~ s/^From />From /gm; $$buf .= $eml->{crlf}; substr($$bdy, 0, 0, $$buf); # prepend header $buf = $bdy; } $$buf .= $eml->{crlf}; $buf; } sub _mboxcl_common ($$$) { my ($buf, $bdy, $crlf) = @_; # add Lines: so mutt won't have to add it on MUA close my $lines = $$bdy =~ tr!\n!\n!; $$buf .= 'Content-Length: '.length($$bdy).$crlf. 'Lines: '.$lines.$crlf.$crlf; substr($$bdy, 0, 0, $$buf); # prepend header $$bdy .= $crlf; $bdy; } # mboxcl still escapes "From " lines sub eml2mboxcl { my ($eml, $smsg) = @_; my $buf = _mbox_hdr_buf($eml, 'mboxcl', $smsg); my $bdy = delete($eml->{bdy}) // \(my $empty = ''); $$bdy =~ s/^From />From /gm; _mboxcl_common($buf, $bdy, $eml->{crlf}); } # mboxcl2 has no "From " escaping sub eml2mboxcl2 { my ($eml, $smsg) = @_; my $buf = _mbox_hdr_buf($eml, 'mboxcl2', $smsg); my $bdy = delete($eml->{bdy}) // \(my $empty = ''); _mboxcl_common($buf, $bdy, $eml->{crlf}); } sub git_to_mail { # git->cat_async callback my ($bref, $oid, $type, $size, $arg) = @_; $type // return; # called by git->async_abort my ($write_cb, $smsg) = @$arg; if ($type eq 'missing' && $smsg->{-lms_rw}) { if ($bref = $smsg->{-lms_rw}->local_blob($oid, 1)) { $type = 'blob'; $size = length($$bref); } } return warn("W: $oid is $type (!= blob)\n") if $type ne 'blob'; return warn("E: $oid is empty\n") unless $size; die "BUG: expected=$smsg->{blob} got=$oid" if $smsg->{blob} ne $oid; $write_cb->($bref, $smsg); } sub reap_compress { # dwaitpid callback my ($lei, $pid) = @_; my $cmd = delete $lei->{"pid.$pid"}; return if $? == 0; $lei->fail("@$cmd failed", $? >> 8); } sub _post_augment_mbox { # open a compressor process from top-level process my ($self, $lei) = @_; my $zsfx = $self->{zsfx} or return; my $cmd = PublicInbox::MboxReader::zsfx2cmd($zsfx, undef, $lei); my ($r, $w) = @{delete $lei->{zpipe}}; my $rdr = { 0 => $r, 1 => $lei->{1}, 2 => $lei->{2}, pgid => 0 }; my $pid = spawn($cmd, undef, $rdr); my $pp = gensym; my $dup = bless { "pid.$pid" => $cmd }, ref($lei); $dup->{$_} = $lei->{$_} for qw(2 sock); tie *$pp, 'PublicInbox::ProcessPipe', $pid, $w, \&reap_compress, $dup; $lei->{1} = $pp; } # --augment existing output destination, with deduplication sub _augment { # MboxReader eml_cb my ($eml, $lei) = @_; # ignore return value, just populate the skv $lei->{dedupe}->is_dup($eml); } sub _mbox_augment_kw_maybe { my ($eml, $lei, $lse, $augment) = @_; my $kw = PublicInbox::MboxReader::mbox_keywords($eml); update_kw_maybe($lei, $lse, $eml, $kw); _augment($eml, $lei) if $augment; } sub _mbox_write_cb ($$) { my ($self, $lei) = @_; my $ovv = $lei->{ovv}; my $m = 'eml2'.$ovv->{fmt}; my $eml2mbox = $self->can($m) or die "$self->$m missing"; $lei->{1} // die "no stdout ($m, $ovv->{dst})"; # redirected earlier $lei->{1}->autoflush(1); my $atomic_append = !defined($ovv->{lock_path}); my $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe; my $lse = $lei->{lse}; # may be undef my $set_recent = $dedupe->has_entries; sub { # for git_to_mail my ($buf, $smsg, $eml) = @_; $eml //= PublicInbox::Eml->new($buf); ++$lei->{-nr_seen}; return if $dedupe->is_dup($eml, $smsg); $lse->xsmsg_vmd($smsg) if $lse; $smsg->{-recent} = 1 if $set_recent; $buf = $eml2mbox->($eml, $smsg); if ($atomic_append) { atomic_append($lei, $buf); } else { my $lk = $ovv->lock_for_scope; $lei->out($$buf); } ++$lei->{-nr_write}; } } sub update_kw_maybe ($$$$) { my ($lei, $lse, $eml, $kw) = @_; return unless $lse; my $c = $lse->kw_changed($eml, $kw, my $docids = []); my $vmd = { kw => $kw }; if (scalar @$docids) { # already in lei/store $lei->{sto}->wq_do('set_eml_vmd', undef, $vmd, $docids) if $c; } elsif (my $xoids = $lei->{ale}->xoids_for($eml)) { # it's in an external, only set kw, here $lei->{sto}->wq_do('set_xvmd', $xoids, $eml, $vmd); } else { # never-before-seen, import the whole thing # XXX this is critical in protecting against accidental # data loss without --augment $lei->{sto}->wq_do('set_eml', $eml, $vmd); } } sub _md_update { # maildir_each_eml cb my ($f, $kw, $eml, $lei, $lse, $unlink) = @_; update_kw_maybe($lei, $lse, $eml, $kw); $unlink ? unlink($f) : _augment($eml, $lei); } # maildir_each_file callback, \&CORE::unlink doesn't work with it sub _unlink { unlink($_[0]) } sub _rand () { state $seq = 0; sprintf('%x,%x,%x,%x', rand(0xffffffff), time, $$, ++$seq); } sub kw2suffix ($;@) { my $kw = shift; join('', sort(map { $kw2char{$_} // () } @$kw, @_)); } sub _buf2maildir ($$$$) { my ($dst, $buf, $smsg, $dir) = @_; my $kw = $smsg->{kw} // []; my $rand = ''; # chosen by die roll :P my ($tmp, $fh, $base, $ok); my $common = $smsg->{blob} // _rand; if (defined(my $pct = $smsg->{pct})) { $common .= "=$pct" } do { $tmp = $dst.'tmp/'.$rand.$common; } while (!($ok = sysopen($fh, $tmp, O_CREAT|O_EXCL|O_WRONLY)) && $!{EEXIST} && ($rand = _rand.',')); if ($ok && print $fh $$buf and close($fh)) { $dst .= $dir; # 'new/' or 'cur/' $rand = ''; do { $base = $rand.$common.':2,'.kw2suffix($kw); } while (!($ok = rename_noreplace($tmp, $dst.$base)) && $!{EEXIST} && ($rand = _rand.',')); \$base; } else { my $err = "Error writing $smsg->{blob} to $dst: $!\n"; $_[0] = undef; # clobber dst unlink($tmp); die $err; } } sub _maildir_write_cb ($$) { my ($self, $lei) = @_; my $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe if $dedupe; my $dst = $lei->{ovv}->{dst}; my $lse = $lei->{lse}; # may be undef my $lms = $self->{-lms_rw}; my $out = $lms ? 'maildir:'.$lei->abs_path($dst) : undef; $lms->lms_write_prepare if $lms; # Favor cur/ and only write to new/ when augmenting. This # saves MUAs from having to do a mass rename when the initial # search result set is huge. my $dir = $dedupe && $dedupe->has_entries ? 'new/' : 'cur/'; sub { # for git_to_mail my ($bref, $smsg, $eml) = @_; $dst // return $lei->fail; # dst may be undef-ed in last run ++$lei->{-nr_seen}; return if $dedupe && $dedupe->is_dup($eml // PublicInbox::Eml->new($$bref), $smsg); $lse->xsmsg_vmd($smsg) if $lse; my $n = _buf2maildir($dst, $bref // \($eml->as_string), $smsg, $dir); $lms->set_src($smsg->oidbin, $out, $n) if $lms; ++$lei->{-nr_write}; } } sub _imap_write_cb ($$) { my ($self, $lei) = @_; my $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe if $dedupe; my $append = $lei->{net}->can('imap_append'); my $uri = $self->{uri}; my $mic = $lei->{net}->mic_get($uri); my $folder = $uri->mailbox; $uri->uidvalidity($mic->uidvalidity($folder)); my $lse = $lei->{lse}; # may be undef my $lms = $self->{-lms_rw}; $lms->lms_write_prepare if $lms; sub { # for git_to_mail my ($bref, $smsg, $eml) = @_; $mic // return $lei->fail; # mic may be undef-ed in last run ++$lei->{-nr_seen}; return if $dedupe && $dedupe->is_dup($eml // PublicInbox::Eml->new($$bref), $smsg); $lse->xsmsg_vmd($smsg) if $lse; my $uid = eval { $append->($mic, $folder, $bref, $smsg, $eml) }; if (my $err = $@) { undef $mic; die $err; } # imap_append returns UID if IMAP server has UIDPLUS extension ($lms && $uid =~ /\A[0-9]+\z/) and $lms->set_src($smsg->oidbin, $$uri, $uid + 0); ++$lei->{-nr_write}; } } sub _text_write_cb ($$) { my ($self, $lei) = @_; my $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe if $dedupe; my $lvt = $lei->{lvt}; my $ovv = $lei->{ovv}; $lei->{1} // die "no stdout ($ovv->{dst})"; # redirected earlier $lei->{1}->autoflush(1); binmode $lei->{1}, ':utf8'; my $lse = $lei->{lse}; # may be undef sub { # for git_to_mail my ($bref, $smsg, $eml) = @_; $lse->xsmsg_vmd($smsg) if $lse; $eml //= PublicInbox::Eml->new($bref); return if $dedupe && $dedupe->is_dup($eml, $smsg); my $lk = $ovv->lock_for_scope; $lei->out(${$lvt->eml_to_text($smsg, $eml)}, "\n"); } } sub _v2_write_cb ($$) { my ($self, $lei) = @_; my $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe if $dedupe; sub { # for git_to_mail my ($bref, $smsg, $eml) = @_; $eml //= PublicInbox::Eml->new($bref); ++$lei->{-nr_seen}; return if $dedupe && $dedupe->is_dup($eml, $smsg); $lei->{v2w}->wq_do('add', $eml); # V2Writable->add ++$lei->{-nr_write}; } } sub write_cb { # returns a callback for git_to_mail my ($self, $lei) = @_; # _mbox_write_cb, _maildir_write_cb, _imap_write_cb, _v2_write_cb my $m = "_$self->{base_type}_write_cb"; $self->$m($lei); } sub new { my ($cls, $lei) = @_; my $fmt = $lei->{ovv}->{fmt}; my $dst = $lei->{ovv}->{dst}; my $self = bless {}, $cls; my @conflict; if ($fmt eq 'maildir') { require PublicInbox::MdirReader; $self->{base_type} = 'maildir'; -e $dst && !-d _ and die "$dst exists and is not a directory\n"; $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/'; $lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q'; } elsif (substr($fmt, 0, 4) eq 'mbox') { require PublicInbox::MboxReader; $self->can("eml2$fmt") or die "bad mbox format: $fmt\n"; $self->{base_type} = 'mbox'; if ($lei->{cmd} eq 'q' && (($lei->path_to_fd($dst) // -1) < 0) && (-f $dst || !-e _)) { $lei->{opt}->{save} //= \1; } } elsif ($fmt =~ /\Aimaps?\z/) { require PublicInbox::NetWriter; require PublicInbox::URIimap; # {net} may exist from "lei up" for auth my $net = $lei->{net} // PublicInbox::NetWriter->new; $net->{quiet} = $lei->{opt}->{quiet}; my $uri = PublicInbox::URIimap->new($dst)->canonical; $net->add_url($$uri); my $err = $net->errors($lei); return $lei->fail($err) if $err; $uri->mailbox or return $lei->fail("No mailbox: $dst"); $self->{uri} = $uri; $dst = $lei->{ovv}->{dst} = $$uri; # canonicalized $lei->{net} = $net; $self->{base_type} = 'imap'; $lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q'; } elsif ($fmt eq 'text' || $fmt eq 'reply') { require PublicInbox::LeiViewText; $lei->{lvt} = PublicInbox::LeiViewText->new($lei, $fmt); $self->{base_type} = 'text'; $self->{-wq_nr_workers} = 1; # for pager @conflict = qw(mua save); } elsif ($fmt eq 'v2') { die "--dedupe=oid and v2 are incompatible\n" if ($lei->{opt}->{dedupe}//'') eq 'oid'; $self->{base_type} = 'v2'; $self->{-wq_nr_workers} = 1; # v2 has shards $lei->{opt}->{save} = \1; $dst = $lei->{ovv}->{dst} = $lei->abs_path($dst); @conflict = qw(mua sort); } else { die "bad mail --format=$fmt\n"; } if ($self->{base_type} =~ /\A(?:text|mbox)\z/) { (-d $dst || (-e _ && !-w _)) and die "$dst exists and is not a writable file\n"; } my @err = map { defined($lei->{opt}->{$_}) ? "--$_" : () } @conflict; die "@err incompatible with $fmt\n" if @err; $self->{dst} = $dst; $lei->{dedupe} = $lei->{lss} // do { my $dd_cls = 'PublicInbox::'. ($lei->{opt}->{save} ? 'LeiSavedSearch' : 'LeiDedupe'); eval "require $dd_cls"; die "$dd_cls: $@" if $@; my $dd = $dd_cls->new($lei); $lei->{lss} //= $dd if $dd && $dd->can('cfg_set'); $dd; }; $self; } sub _pre_augment_maildir { my ($self, $lei) = @_; my $dst = $lei->{ovv}->{dst}; for my $x (qw(tmp new cur)) { my $d = $dst.$x; next if -d $d; require File::Path; File::Path::mkpath($d); -d $d or die "$d is not a directory"; } # for utime, so no opendir open $self->{poke_dh}, '<', "${dst}cur" or die "open ${dst}cur: $!"; } sub clobber_dst_prepare ($;$) { my ($lei, $f) = @_; if (my $lms = defined($f) ? $lei->lms : undef) { $lms->lms_write_prepare; $lms->forget_folders($f); } my $dedupe = $lei->{dedupe} or return; $dedupe->reset_dedupe if $dedupe->can('reset_dedupe'); } sub _do_augment_maildir { my ($self, $lei) = @_; return if $lei->{cmd} eq 'up'; my $dst = $lei->{ovv}->{dst}; my $lse = $lei->{opt}->{'import-before'} ? $lei->{lse} : undef; my $mdr = PublicInbox::MdirReader->new; if ($lei->{opt}->{augment}) { my $dedupe = $lei->{dedupe}; if ($dedupe && $dedupe->prepare_dedupe) { $mdr->{shard_info} = $self->{shard_info}; $mdr->maildir_each_eml($dst, \&_md_update, $lei, $lse); $dedupe->pause_dedupe; } } elsif ($lse) { clobber_dst_prepare($lei, "maildir:$dst"); $mdr->{shard_info} = $self->{shard_info}; $mdr->maildir_each_eml($dst, \&_md_update, $lei, $lse, 1); } else {# clobber existing Maildir clobber_dst_prepare($lei, "maildir:$dst"); $mdr->maildir_each_file($dst, \&_unlink); } } sub _imap_augment_or_delete { # PublicInbox::NetReader::imap_each cb my ($uri, $uid, $kw, $eml, $lei, $lse, $delete_mic) = @_; update_kw_maybe($lei, $lse, $eml, $kw); if ($delete_mic) { $lei->{net}->imap_delete_1($uri, $uid, $delete_mic); } else { _augment($eml, $lei); } } sub _do_augment_imap { my ($self, $lei) = @_; return if $lei->{cmd} eq 'up'; my $net = $lei->{net}; my $lse = $lei->{opt}->{'import-before'} ? $lei->{lse} : undef; if ($lei->{opt}->{augment}) { my $dedupe = $lei->{dedupe}; if ($dedupe && $dedupe->prepare_dedupe) { $net->imap_each($self->{uri}, \&_imap_augment_or_delete, $lei, $lse); $dedupe->pause_dedupe; } } elsif ($lse) { my $delete_mic; clobber_dst_prepare($lei, "$self->{uri}"); $net->imap_each($self->{uri}, \&_imap_augment_or_delete, $lei, $lse, \$delete_mic); $delete_mic->expunge if $delete_mic; } elsif (!$self->{-wq_worker_nr}) { # undef or 0 # clobber existing IMAP folder clobber_dst_prepare($lei, "$self->{uri}"); $net->imap_delete_all($self->{uri}); } } sub _pre_augment_text { my ($self, $lei) = @_; my $dst = $lei->{ovv}->{dst}; my $out; my $devfd = $lei->path_to_fd($dst) // die "bad $dst"; if ($devfd >= 0) { $out = $lei->{$devfd}; } else { # normal-looking path if (-p $dst) { open $out, '>', $dst or die "open($dst): $!"; } elsif (-f _ || !-e _) { # text allows augment, HTML/Atom won't my $mode = $lei->{opt}->{augment} ? '>>' : '>'; open $out, $mode, $dst or die "open($mode, $dst): $!"; } else { die "$dst is not a file or FIFO\n"; } } $lei->{ovv}->ovv_out_lk_init if !$lei->{ovv}->{lock_path}; $lei->{1} = $out; undef; } sub _pre_augment_mbox { my ($self, $lei) = @_; my $dst = $lei->{ovv}->{dst}; my $out; my $devfd = $lei->path_to_fd($dst) // die "bad $dst"; if ($devfd >= 0) { $out = $lei->{$devfd}; } else { # normal-looking path if (-p $dst) { open $out, '>', $dst or die "open($dst): $!"; } elsif (-f _ || !-e _) { require PublicInbox::MboxLock; my $m = $lei->{opt}->{'lock'} // PublicInbox::MboxLock->defaults; $self->{mbl} = PublicInbox::MboxLock->acq($dst, 1, $m); $out = $self->{mbl}->{fh}; } else { die "$dst is not a file or FIFO\n"; } $lei->{old_1} = $lei->{1}; # keep for spawning MUA } # Perl does SEEK_END even with O_APPEND :< $self->{seekable} = seek($out, 0, SEEK_SET); if (!$self->{seekable} && !$!{ESPIPE} && !defined($devfd)) { die "seek($dst): $!\n"; } if (!$self->{seekable}) { my $imp_before = $lei->{opt}->{'import-before'}; die "--import-before specified but $dst is not seekable\n" if $imp_before && !ref($imp_before); die "--augment specified but $dst is not seekable\n" if $lei->{opt}->{augment}; die "cannot --save with unseekable $dst\n" if $lei->{dedupe} && $lei->{dedupe}->can('reset_dedupe'); } if ($self->{zsfx} = PublicInbox::MboxReader::zsfx($dst)) { pipe(my ($r, $w)) or die "pipe: $!"; $lei->{zpipe} = [ $r, $w ]; $lei->{ovv}->{lock_path} and die 'BUG: unexpected {ovv}->{lock_path}'; $lei->{ovv}->ovv_out_lk_init; } elsif (!$self->{seekable} && !$lei->{ovv}->{lock_path}) { $lei->{ovv}->ovv_out_lk_init; } $lei->{1} = $out; undef; } sub _do_augment_mbox { my ($self, $lei) = @_; return unless $self->{seekable}; my $opt = $lei->{opt}; return if $lei->{cmd} eq 'up'; my $out = $lei->{1}; my ($fmt, $dst) = @{$lei->{ovv}}{qw(fmt dst)}; return clobber_dst_prepare($lei) unless -s $out; unless ($opt->{augment} || $opt->{'import-before'}) { truncate($out, 0) or die "truncate($dst): $!"; return; } my $rd; if (my $zsfx = $self->{zsfx}) { $rd = PublicInbox::MboxReader::zsfxcat($out, $zsfx, $lei); } else { open($rd, '+>>&', $out) or die "dup: $!"; } my $dedupe; if ($opt->{augment}) { $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe if $dedupe; } else { clobber_dst_prepare($lei); } if ($opt->{'import-before'}) { # the default my $lse = $lei->{lse}; PublicInbox::MboxReader->$fmt($rd, \&_mbox_augment_kw_maybe, $lei, $lse, $opt->{augment}); if (!$opt->{augment} and !truncate($out, 0)) { die "truncate($dst): $!"; } } else { # --augment --no-import-before PublicInbox::MboxReader->$fmt($rd, \&_augment, $lei); } # maybe some systems don't honor O_APPEND, Perl does this: seek($out, 0, SEEK_END) or die "seek $dst: $!"; $dedupe->pause_dedupe if $dedupe; } sub v2w_done_wait { # dwaitpid callback my ($arg, $pid) = @_; my ($v2w, $lei) = @$arg; $lei->child_error($?, "error for $v2w->{ibx}->{inboxdir}") if $?; } sub _pre_augment_v2 { my ($self, $lei) = @_; my $dir = $self->{dst}; require PublicInbox::InboxWritable; my ($ibx, @creat); if (-d $dir) { my $opt = { -min_inbox_version => 2 }; require PublicInbox::Admin; my @ibx = PublicInbox::Admin::resolve_inboxes([ $dir ], $opt); $ibx = $ibx[0] or die "$dir is not a v2 inbox\n"; } else { $creat[0] = {}; $ibx = PublicInbox::Inbox->new({ name => 'lei-result', # XXX configurable inboxdir => $dir, version => 2, address => [ 'lei@example.com' ], }); } PublicInbox::InboxWritable->new($ibx, @creat); $ibx->init_inbox if @creat; my $v2w = $ibx->importer; $v2w->wq_workers_start("lei/v2w $dir", 1, $lei->oldset, {lei => $lei}); $v2w->wq_wait_async(\&v2w_done_wait, $lei); $lei->{v2w} = $v2w; return if !$lei->{opt}->{shared}; my $d = "$lei->{ale}->{git}->{git_dir}/objects"; my $al = "$dir/git/0.git/objects/info/alternates"; open my $fh, '+>>', $al or die "open($al): $!"; seek($fh, 0, SEEK_SET) or die "seek($al): $!"; grep(/\A\Q$d\E\n/, <$fh>) and return; print $fh "$d\n" or die "print($al): $!"; close $fh or die "close($al): $!"; } sub pre_augment { # fast (1 disk seek), runs in same process as post_augment my ($self, $lei) = @_; # _pre_augment_maildir, _pre_augment_mbox, _pre_augment_v2 my $m = $self->can("_pre_augment_$self->{base_type}") or return; $m->($self, $lei); } sub do_augment { # slow, runs in wq worker my ($self, $lei) = @_; # _do_augment_maildir, _do_augment_mbox, or _do_augment_imap my $m = $self->can("_do_augment_$self->{base_type}") or return; $m->($self, $lei); } # fast (spawn compressor or mkdir), runs in same process as pre_augment sub post_augment { my ($self, $lei, @args) = @_; $self->{-au_noted}++ and $lei->qerr("# writing to $self->{dst} ..."); my $wait = $lei->{opt}->{'import-before'} ? $lei->{sto}->wq_do('checkpoint', 1) : 0; # _post_augment_mbox my $m = $self->can("_post_augment_$self->{base_type}") or return; $m->($self, $lei, @args); } # called by every single l2m worker process sub do_post_auth { my ($self) = @_; my $lei = $self->{lei}; # lei_xsearch can start as soon as all l2m workers get here $lei->{pkt_op_p}->pkt_do('incr_start_query') or die "incr_start_query: $!"; my $aug; if (lock_free($self)) { # all workers do_augment my $mod = $self->{-wq_nr_workers}; my $shard = $self->{-wq_worker_nr}; if (my $net = $lei->{net}) { $net->{shard_info} = [ $mod, $shard ]; } else { # Maildir $self->{shard_info} = [ $mod, $shard ]; } $aug = 'incr_post_augment'; } elsif ($self->{-wq_worker_nr} == 0) { # 1st worker do_augment $aug = 'do_post_augment'; } if ($aug) { local $0 = 'do_augment'; eval { do_augment($self, $lei) }; $lei->fail($@) if $@; $lei->{pkt_op_p}->pkt_do($aug) or die "pkt_do($aug): $!"; } # done augmenting, connect the compressor pipe for each worker if (my $zpipe = delete $lei->{zpipe}) { $lei->{1} = $zpipe->[1]; close $zpipe->[0]; } my $au_peers = delete $self->{au_peers}; if ($au_peers) { # wait for peer l2m to finish augmenting: $au_peers->[1] = undef; sysread($au_peers->[0], my $barrier1, 1); } $self->{wcb} = $self->write_cb($lei); if ($au_peers) { # wait for peer l2m to set write_cb $au_peers->[3] = undef; sysread($au_peers->[2], my $barrier2, 1); } } sub ipc_atfork_child { my ($self) = @_; my $lei = $self->{lei}; $lei->_lei_atfork_child; $lei->{auth}->do_auth_atfork($self) if $lei->{auth}; $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); $self->{git} = $self->{lei}->{ale}->git; $SIG{TERM} = sub { # avoid ->DESTROY ordering problems my $git = delete $self->{git}; $git->async_wait_all if $git; exit(15 + 128); }; $self->SUPER::ipc_atfork_child; } sub lock_free { $_[0]->{base_type} =~ /\A(?:maildir|imap|jmap)\z/ ? 1 : 0; } # wakes up the MUA when complete so it can refresh messages list sub poke_dst { my ($self) = @_; if ($self->{base_type} eq 'maildir') { my $t = time + 1; utime($t, $t, $self->{poke_dh}) or warn "futimes: $!"; } } sub write_mail { # via ->wq_io_do my ($self, $smsg, $eml) = @_; return $self->{wcb}->(undef, $smsg, $eml) if $eml; $smsg->{-lms_rw} = $self->{-lms_rw}; $self->{git}->cat_async($smsg->{blob}, \&git_to_mail, [$self->{wcb}, $smsg]); } sub wq_atexit_child { my ($self) = @_; local $PublicInbox::DS::in_loop = 0; # waitpid synchronously my $lei = $self->{lei}; delete $self->{wcb}; $lei->{ale}->git->async_wait_all; my ($nr_w, $nr_s) = delete(@$lei{qw(-nr_write -nr_seen)}); $nr_s or return; return if $lei->{early_mua} || !$lei->{-progress} || !$lei->{pkt_op_p}; $lei->{pkt_op_p}->pkt_do('l2m_progress', $nr_w, $nr_s); } # runs on a 1s timer in lei-daemon sub augment_inprogress { my ($err, $opt, $dst, $au_noted) = @_; eval { return if $$au_noted++ || !$err || !defined(fileno($err)); print $err '# '.($opt->{'import-before'} ? "importing non-external contents of $dst" : ( ($opt->{dedupe} // 'content') ne 'none') ? "scanning old contents of $dst for dedupe" : "removing old contents of $dst")." ...\n"; }; warn "E: $@ ($dst)" if $@; } # called in top-level lei-daemon when LeiAuth is done sub net_merge_all_done { my ($self, $lei) = @_; if ($PublicInbox::DS::in_loop && $self->can("_do_augment_$self->{base_type}") && !$lei->{opt}->{quiet}) { $self->{-au_noted} = 0; PublicInbox::DS::add_timer(1, \&augment_inprogress, $lei->{2}, $lei->{opt}, $self->{dst}, \$self->{-au_noted}); } $self->wq_broadcast('do_post_auth'); $self->wq_close; } 1; public-inbox-1.9.0/lib/PublicInbox/LeiUp.pm000066400000000000000000000170711430031475700204660ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # "lei up" - updates the result of "lei q --save" package PublicInbox::LeiUp; use strict; use v5.10.1; # n.b. we use LeiInput to setup IMAP auth use parent qw(PublicInbox::IPC PublicInbox::LeiInput); use PublicInbox::LeiSavedSearch; use PublicInbox::DS; use PublicInbox::PktOp; use PublicInbox::LeiFinmsg; my $REMOTE_RE = qr!\A(?:imap|http)s?://!i; # http(s) will be for JMAP sub up1 ($$) { my ($lei, $out) = @_; # precedence note for CLI switches between lei q and up: # `lei q --only' > `lei q --no-(remote|local|external)' # `lei up --no-(remote|local|external)' > `lei.q.only' in saved search my %no = map { my $v = $lei->{opt}->{$_}; # set by CLI (defined($v) && !$v) ? ($_ => 1) : (); } qw(remote local external); my $cli_exclude = delete $lei->{opt}->{exclude}; my $lss = PublicInbox::LeiSavedSearch->up($lei, $out) or return; my $f = $lss->{'-f'}; my $mset_opt = $lei->{mset_opt} = { relevance => -2 }; my $q = $lss->{-cfg}->get_all('lei.q') // die("lei.q unset in $f (out=$out)\n"); my $lse = $lei->{lse} // die 'BUG: {lse} missing'; my $rawstr = $lss->{-cfg}->{'lei.internal.rawstr'} // (scalar(@$q) == 1 && substr($q->[0], -1) eq "\n"); if ($rawstr) { scalar(@$q) > 1 and die "$f: lei.q has multiple values (@$q) (out=$out)\n"; $lse->query_approxidate($lse->git, $mset_opt->{qstr} = $q->[0]); } else { $mset_opt->{qstr} = $lse->query_argv_to_string($lse->git, $q); } # n.b. only a few CLI args are accepted for "up", so //= usually sets for my $k ($lss->ARRAY_FIELDS) { my $v = $lss->{-cfg}->get_all("lei.q.$k") // next; $lei->{opt}->{$k} //= $v; } # --no-(local|remote) CLI flags overrided saved `lei.q.only' my $only = $lei->{opt}->{only}; @$only = map { $lei->get_externals($_) } @$only if $only; if (scalar keys %no && $only) { @$only = grep(!m!\Ahttps?://!i, @$only) if $no{remote}; @$only = grep(m!\Ahttps?://!i, @$only) if $no{'local'}; } if ($cli_exclude) { my $ex = $lei->canonicalize_excludes($cli_exclude); @$only = grep { !$ex->{$_} } @$only if $only; push @{$lei->{opt}->{exclude}}, @$cli_exclude; } delete $lei->{opt}->{only} if $no{external} || ($only && !@$only); for my $k ($lss->BOOL_FIELDS, $lss->SINGLE_FIELDS) { my $v = $lss->{-cfg}->get_1("lei.q.$k") // next; $lei->{opt}->{$k} //= $v; } my $o = $lei->{opt}->{output} // ''; return die("lei.q.output unset in $f (out=$out)\n") if $o eq ''; $lss->translate_dedupe($lei) or return; $lei->{lss} = $lss; # for LeiOverview->new and query_remote_mboxrd my $lxs = $lei->lxs_prepare or return; $lei->ale->refresh_externals($lxs, $lei); $lei->_start_query; } sub redispatch_all ($$) { my ($self, $lei) = @_; my $upq = [ (@{$self->{o_local} // []}, @{$self->{o_remote} // []}) ]; return up1($lei, $upq->[0]) if @$upq == 1; # just one, may start MUA # FIXME: this is also used per-query, see lei->_start_query my $j = $lei->{opt}->{jobs} || do { my $n = $self->detect_nproc // 1; $n > 4 ? 4 : $n; }; $j = ($j =~ /\A([0-9]+)/) ? $1 + 0 : 1; # may be --jobs=$x,$m on CLI # re-dispatch into our event loop w/o creating an extra fork-level # $upq will be drained via DESTROY as each query finishes $lei->{fmsg} = PublicInbox::LeiFinmsg->new($lei); my ($op_c, $op_p) = PublicInbox::PktOp->pair; # call lei->dclose when upq is done processing: $op_c->{ops} = { '' => [ $lei->can('dclose'), $lei ] }; my @first_batch = splice(@$upq, 0, $j); # initial parallelism $lei->{-upq} = $upq; $lei->{daemon_pid} = $$; $lei->event_step_init; # wait for client disconnects for my $out (@first_batch) { PublicInbox::DS::requeue( PublicInbox::LeiUp1::nxt($lei, $out, $op_p)); } } sub filter_lss { my ($self, $lei, $all) = @_; my @outs = PublicInbox::LeiSavedSearch::list($lei); if ($all eq 'local') { $self->{o_local} = [ grep(!/$REMOTE_RE/, @outs) ]; } elsif ($all eq 'remote') { $self->{o_remote} = [ grep(/$REMOTE_RE/, @outs) ]; } elsif ($all eq '') { $self->{o_remote} = [ grep(/$REMOTE_RE/, @outs) ]; $self->{o_local} = [ grep(!/$REMOTE_RE/, @outs) ]; } else { undef; } } sub lei_up { my ($lei, @outs) = @_; my $opt = $lei->{opt}; my $self = bless { -mail_sync => 1 }, __PACKAGE__; if (defined(my $all = $opt->{all})) { return $lei->fail("--all and @outs incompatible") if @outs; defined($opt->{mua}) and return $lei->fail('--all and --mua= are incompatible'); filter_lss($self, $lei, $all) // return $lei->fail("only --all=$all not understood"); } elsif ($lei->{lse}) { # redispatched scalar(@outs) == 1 or die "BUG: lse set w/ >1 out[@outs]"; return up1($lei, $outs[0]); } else { $self->{o_remote} = [ grep(/$REMOTE_RE/, @outs) ]; $self->{o_local} = [ grep(!/$REMOTE_RE/, @outs) ]; } $lei->{lse} = $lei->_lei_store(1)->write_prepare($lei)->search; ((@{$self->{o_local} // []} + @{$self->{o_remote} // []}) > 1 && defined($opt->{mua})) and return $lei->fail(<{o_remote}) { # setup lei->{auth} $self->prepare_inputs($lei, $self->{o_remote}) or return; } if ($lei->{auth}) { # start auth worker require PublicInbox::NetWriter; bless $lei->{net}, 'PublicInbox::NetWriter'; $lei->wq1_start($self); # net_merge_all_done will fire when auth is done } else { redispatch_all($self, $lei); # see below } } # called in top-level lei-daemon when LeiAuth is done sub net_merge_all_done { my ($self, $lei) = @_; $lei->{net} = delete($self->{-net_new}) if $self->{-net_new}; $self->wq_close; eval { redispatch_all($self, $lei) }; $lei->child_error(0, "E: $@") if $@; } sub _complete_up { # lei__complete hook my ($lei, @argv) = @_; my $match_cb = $lei->complete_url_prepare(\@argv); map { $match_cb->($_) } PublicInbox::LeiSavedSearch::list($lei); } sub _wq_done_wait { # dwaitpid callback my ($arg, $pid) = @_; my ($wq, $lei) = @$arg; $lei->child_error($?, 'auth failure') if $? } no warnings 'once'; *ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child; package PublicInbox::LeiUp1; # for redispatch_all use strict; use v5.10.1; sub nxt ($$$) { my ($lei, $out, $op_p) = @_; bless { lei => $lei, out => $out, op_p => $op_p }, __PACKAGE__; } sub event_step { # runs via PublicInbox::DS::requeue my ($self) = @_; my $lei = $self->{lei}; # the original, from lei_up my $l = bless { %$lei }, ref($lei); # per-output copy delete($l->{sock}) or return; # client disconnected if {sock} is gone $l->{opt} = { %{$l->{opt}} }; # deep copy delete $l->{opt}->{all}; $l->qerr("# updating $self->{out}"); my $o = " (output: $self->{out})"; # add to all warnings my $cb = $SIG{__WARN__} // \&CORE::warn; local $SIG{__WARN__} = sub { my @m = @_; push(@m, $o) if !@m || $m[-1] !~ s/\n\z/$o\n/; $cb->(@m); }; $l->{-up1} = $self; # for LeiUp1->DESTROY delete @$l{qw(-socks -event_init_done)}; my ($op_c, $op_p) = PublicInbox::PktOp->pair; $self->{unref_on_destroy} = $op_c->{sock}; # to cleanup $lei->{-socks} $lei->pkt_ops($op_c->{ops} //= {}); # errors from $l -> script/lei push @{$lei->{-socks}}, $op_c->{sock}; # script/lei signals to $l $l->{sock} = $op_p->{op_p}; # receive signals from op_c->{sock} $op_c = $op_p = undef; eval { $l->dispatch('up', $self->{out}) }; $lei->child_error(0, $@) if $@ || $l->{failed}; # lei->fail() } sub DESTROY { my ($self) = @_; my $lei = $self->{lei}; # the original, from lei_up return if $lei->{daemon_pid} != $$; my $sock = delete $self->{unref_on_destroy}; my $s = $lei->{-socks} // []; @$s = grep { $_ != $sock } @$s; my $out = shift(@{$lei->{-upq}}) or return; PublicInbox::DS::requeue(nxt($lei, $out, $self->{op_p})); } 1; public-inbox-1.9.0/lib/PublicInbox/LeiViewText.pm000066400000000000000000000210051430031475700216510ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # PublicInbox::Eml to (optionally colorized) text coverter for terminals # the non-HTML counterpart to PublicInbox::View package PublicInbox::LeiViewText; use strict; use v5.10.1; use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::MID qw(references); use PublicInbox::View; use PublicInbox::Hval; use PublicInbox::ViewDiff; use PublicInbox::Spawn qw(popen_rd); use Term::ANSIColor; use POSIX (); use PublicInbox::Address; sub _xs { # xhtml_map works since we don't search for HTML ([&<>'"]) $_[0] =~ s/([\x7f\x00-\x1f])/$PublicInbox::Hval::xhtml_map{$1}/sge; } my %DEFAULT_COLOR = ( # mutt names, loaded from ~/.config/lei/config quoted => 'blue', hdrdefault => 'cyan', status => 'bright_cyan', # smsg stuff attachment => 'bright_red', # git names and defaults, falls back to ~/.gitconfig new => 'green', old => 'red', meta => 'bold', frag => 'cyan', func => undef, context => undef, ); my $COLOR = qr/(?:bright)? (?:normal|black|red|green|yellow|blue|magenta|cyan|white)/x; sub my_colored { my ($self, $slot, $buf) = @_; my $val = $self->{"color.$slot"} //= $self->{-leicfg}->{"color.$slot"} // $self->{-gitcfg}->{"color.diff.$slot"} // $self->{-gitcfg}->{"diff.color.$slot"} // $DEFAULT_COLOR{$slot}; $val = $val->[-1] if ref($val) eq 'ARRAY'; if (defined $val) { $val = lc $val; # git doesn't use "_", Term::ANSIColor does $val =~ s/\Abright([^_])/bright_$1/ig; # git: "green black" => T::A: "green on_black" $val =~ s/($COLOR)(.+?)($COLOR)/$1$2on_$3/; # FIXME: convert git #XXXXXX to T::A-compatible colors # for 256-color terminals ${$self->{obuf}} .= colored($buf, $val); } else { ${$self->{obuf}} .= $buf; } } sub uncolored { ${$_[0]->{obuf}} .= $_[2] } sub new { my ($cls, $lei, $fmt) = @_; my $self = bless { %{$lei->{opt}}, -colored => \&uncolored }, $cls; $self->{-quote_reply} = 1 if $fmt eq 'reply'; return $self unless $self->{color} //= -t $lei->{1}; my $cmd = [ qw(git config -z --includes -l) ]; my ($r, $pid) = popen_rd($cmd, undef, { 2 => $lei->{2} }); my $cfg = PublicInbox::Config::config_fh_parse($r, "\0", "\n"); waitpid($pid, 0); if ($?) { warn "# git-config failed, no color (non-fatal)\n"; return $self; } $self->{-colored} = \&my_colored; $self->{-gitcfg} = $cfg; $self->{-leicfg} = $lei->{cfg}; $self; } sub quote_hdr_buf ($$) { my ($self, $eml) = @_; my $hbuf = ''; my $to = $eml->header_raw('Reply-To') // $eml->header_raw('From') // $eml->header_raw('Sender'); my $cc = ''; for my $f (qw(To Cc)) { for my $v ($eml->header_raw($f)) { next if $v !~ /\S/; $cc .= ", $v"; $to //= $v; } } substr($cc, 0, 2, ''); # s/^, //; PublicInbox::View::fold_addresses($to); PublicInbox::View::fold_addresses($cc); _xs($to); _xs($cc); $hbuf .= "To: $to\n" if defined $to && $to =~ /\S/; $hbuf .= "Cc: $cc\n" if $cc =~ /\S/; my $s = $eml->header_str('Subject') // 'your mail'; _xs($s); substr($s, 0, 0, 'Re: ') if $s !~ /\bRe:/i; $hbuf .= "Subject: $s\n"; if (defined(my $irt = $eml->header_raw('Message-ID'))) { _xs($irt); $hbuf .= "In-Reply-To: $irt\n"; } $self->{-colored}->($self, 'hdrdefault', $hbuf); my ($n) = PublicInbox::Address::names($eml->header_str('From') // $eml->header_str('Sender') // $eml->header_str('Reply-To') // 'unknown sender'); my $d = $eml->header_raw('Date') // 'some unknown date'; _xs($d); _xs($n); ${delete $self->{obuf}} . "\nOn $d, $n wrote:\n"; } sub hdr_buf ($$) { my ($self, $eml) = @_; my $hbuf = ''; for my $f (qw(From To Cc)) { for my $v ($eml->header($f)) { next if $v !~ /\S/; PublicInbox::View::fold_addresses($v); _xs($v); $hbuf .= "$f: $v\n"; } } for my $f (qw(Subject Date Newsgroups Message-ID X-Message-ID)) { for my $v ($eml->header($f)) { _xs($v); $hbuf .= "$f: $v\n"; } } if (my @irt = $eml->header_raw('In-Reply-To')) { for my $v (@irt) { _xs($v); $hbuf .= "In-Reply-To: $v\n"; } } else { my $refs = references($eml); if (defined(my $irt = pop @$refs)) { _xs($irt); $hbuf .= "In-Reply-To: <$irt>\n"; } if (@$refs) { my $max = $self->{-max_cols}; $hbuf .= 'References: ' . join("\n\t", map { '<'._xs($_).'>' } @$refs) . ">\n"; } } $self->{-colored}->($self, 'hdrdefault', $hbuf .= "\n"); } sub attach_note ($$$$;$) { my ($self, $ct, $p, $fn, $err) = @_; my ($part, $depth, $idx) = @$p; my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...) my $abuf = $err ? <body); my $ts = "Type: $ct, Size: $size bytes"; my $d = $part->header('Content-Description') // $fn // ''; _xs($d); $abuf .= $d eq '' ? "$ts --]\n" : "$d --]\n[-- $ts --]\n"; if (my $blob = $self->{-smsg}->{blob}) { $abuf .= "[-- lei blob $blob:$idx --]\n"; } $self->{-colored}->($self, 'attachment', $abuf); hdr_buf($self, $part) if $part->{is_submsg}; } sub flush_text_diff ($$) { my ($self, $cur) = @_; my @top = split($PublicInbox::ViewDiff::EXTRACT_DIFFS, $$cur); undef $$cur; # free memory my $dctx; my $obuf = $self->{obuf}; my $colored = $self->{-colored}; while (defined(my $x = shift @top)) { if (scalar(@top) >= 4 && $top[1] =~ $PublicInbox::ViewDiff::IS_OID && $top[0] =~ $PublicInbox::ViewDiff::IS_OID) { splice(@top, 0, 4); $dctx = 1; $colored->($self, 'meta', $x); } elsif ($dctx) { # Quiet "Complex regular subexpression recursion limit" # warning. Perl will truncate matches upon hitting # that limit, giving us more (and shorter) scalars than # would be ideal, but otherwise it's harmless. # # We could replace the `+' metacharacter with `{1,100}' # to limit the matches ourselves to 100, but we can # let Perl do it for us, quietly. no warnings 'regexp'; for my $s (split(/((?:(?:^\+[^\n]*\n)+)| (?:(?:^-[^\n]*\n)+)| (?:^@@ [^\n]+\n))/xsm, $x)) { if (!defined($dctx)) { ${$self->{obuf}} .= $s; } elsif ($s =~ s/\A(@@ \S+ \S+ @@\s*)//) { $colored->($self, 'frag', $1); $colored->($self, 'func', $s); } elsif ($s =~ /\A\+/) { $colored->($self, 'new', $s); } elsif ($s =~ /\A-- $/sm) { # email sig starts $dctx = undef; ${$self->{obuf}} .= $s; } elsif ($s =~ /\A-/) { $colored->($self, 'old', $s); } else { $colored->($self, 'context', $s); } } } else { ${$self->{obuf}} .= $x; } } } sub add_text_buf { # callback for Eml->each_part my ($p, $self) = @_; my ($part, $depth, $idx) = @$p; my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; my ($s, $err) = msg_part_text($part, $ct); return attach_note($self, $ct, $p, $fn) unless defined $s; hdr_buf($self, $part) if $part->{is_submsg}; $s =~ s/\r+\n/\n/sg; _xs($s); my $diff = ($s =~ /^--- [^\n]+\n\+{3} [^\n]+\n@@ /ms); my @sections = PublicInbox::MsgIter::split_quotes($s); undef $s; # free memory if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) { # badly-encoded message with $err? tell the world about it! attach_note($self, $ct, $p, $fn, $err); ${$self->{obuf}} .= "\n"; } my $colored = $self->{-colored}; for my $cur (@sections) { if ($cur =~ /\A>/) { $colored->($self, 'quoted', $cur); } elsif ($diff) { flush_text_diff($self, \$cur); } else { ${$self->{obuf}} .= $cur; } undef $cur; # free memory } } # returns a stringref suitable for $lei->out or print sub eml_to_text { my ($self, $smsg, $eml) = @_; local $Term::ANSIColor::EACHLINE = "\n"; $self->{obuf} = \(my $obuf = ''); $self->{-smsg} = $smsg; $self->{-max_cols} = ($self->{columns} //= 80) - 8; # for header wrap my $h = []; if ($self->{-quote_reply}) { my $blob = $smsg->{blob} // 'unknown-blob'; my $pct = $smsg->{pct} // 'unknown'; my $t = POSIX::asctime(gmtime($smsg->{ts} // $smsg->{ds} // 0)); $h->[0] = "From $blob\@$pct $t"; } else { for my $f (qw(blob pct)) { push @$h, "$f:$smsg->{$f}" if defined $smsg->{$f}; } @$h = ("# @$h\n") if @$h; for my $f (qw(kw L)) { my $v = $smsg->{$f} or next; push @$h, "# $f:".join(',', @$v)."\n" if @$v; } } $h = join('', @$h); $self->{-colored}->($self, 'status', $h); my $quote_hdr; if ($self->{-quote_reply}) { $quote_hdr = ${delete $self->{obuf}}; $quote_hdr .= quote_hdr_buf($self, $eml); } else { hdr_buf($self, $eml); } $eml->each_part(\&add_text_buf, $self, 1); if (defined $quote_hdr) { ${$self->{obuf}} =~ s/^/> /sgm; substr(${$self->{obuf}}, 0, 0, $quote_hdr); } delete $self->{obuf}; } 1; public-inbox-1.9.0/lib/PublicInbox/LeiWatch.pm000066400000000000000000000005401430031475700211410ustar00rootroot00000000000000# Copyright all contributors # License: AGPL-3.0+ # represents a Maildir or IMAP "watch" item package PublicInbox::LeiWatch; use strict; use v5.10.1; use parent qw(PublicInbox::IPC); # "url" may be something like "maildir:/path/to/dir" sub new { bless { url => $_[1] }, $_[0] } 1; public-inbox-1.9.0/lib/PublicInbox/LeiXSearch.pm000066400000000000000000000505451430031475700214420ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # Combine any combination of PublicInbox::Search, # PublicInbox::ExtSearch, and PublicInbox::LeiSearch objects # into one Xapian DB package PublicInbox::LeiXSearch; use strict; use v5.10.1; use parent qw(PublicInbox::LeiSearch PublicInbox::IPC); use PublicInbox::DS qw(now); use File::Temp 0.19 (); # 0.19 for ->newdir use File::Spec (); use PublicInbox::Search qw(xap_terms); use PublicInbox::Spawn qw(popen_rd spawn which); use PublicInbox::MID qw(mids); use PublicInbox::Smsg; use PublicInbox::AutoReap; use PublicInbox::Eml; use PublicInbox::LEI; use Fcntl qw(SEEK_SET F_SETFL O_APPEND O_RDWR); use PublicInbox::ContentHash qw(git_sha); use POSIX qw(strftime); sub new { my ($class) = @_; PublicInbox::Search::load_xapian(); bless { qp_flags => $PublicInbox::Search::QP_FLAGS | PublicInbox::Search::FLAG_PURE_NOT(), }, $class } sub attach_external { my ($self, $ibxish) = @_; # ibxish = ExtSearch or Inbox my $desc = $ibxish->{inboxdir} // $ibxish->{topdir}; my $srch = $ibxish->search // return warn("$desc not indexed for Xapian ($@ $!)\n"); my @shards = $srch->xdb_shards_flat or return warn("$desc has no Xapian shards\n"); if (delete $self->{xdb}) { # XXX: do we need this? # clobber existing {xdb} if amending my $expect = delete $self->{nshard}; my $shards = delete $self->{shards_flat}; scalar(@$shards) == $expect or die "BUG: {nshard}$expect != shards=".scalar(@$shards); my $prev = {}; for my $old_ibxish (@{$self->{shard2ibx}}) { next if $prev == $old_ibxish; $prev = $old_ibxish; my @shards = $old_ibxish->search->xdb_shards_flat; push @{$self->{shards_flat}}, @shards; } my $nr = scalar(@{$self->{shards_flat}}); $nr == $expect or die "BUG: reloaded $nr shards, expected $expect" } push @{$self->{shards_flat}}, @shards; push(@{$self->{shard2ibx}}, $ibxish) for (@shards); } # returns a list of local inboxes (or count in scalar context) sub locals { @{$_[0]->{locals} // []} } sub remotes { @{$_[0]->{remotes} // []} } # called by PublicInbox::Search::xdb (usually via ->mset) sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} } sub _mitem_kw { # retry_reopen callback my ($srch, $smsg, $mitem, $flagged) = @_; my $doc = $mitem->get_document; my $kw = xap_terms('K', $doc); $kw->{flagged} = 1 if $flagged; my @L = xap_terms('L', $doc); # we keep the empty {kw} array here to prevent expensive work in # ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty $smsg->{kw} = [ sort keys %$kw ]; $smsg->{L} = \@L if scalar(@L); } sub mitem_kw ($$$;$) { my ($srch, $smsg, $mitem, $flagged) = @_; $srch->retry_reopen(\&_mitem_kw, $smsg, $mitem, $flagged); } # like over->get_art sub smsg_for { my ($self, $mitem) = @_; # cf. https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID my $nshard = $self->{nshard}; my $docid = $mitem->get_docid; my $shard = ($docid - 1) % $nshard; my $num = int(($docid - 1) / $nshard) + 1; my $ibx = $self->{shard2ibx}->[$shard]; my $smsg = $ibx->over->get_art($num); return if $smsg->{bytes} == 0; # external message if ($ibx->can('msg_keywords')) { mitem_kw($self, $smsg, $mitem); } $smsg; } sub recent { my ($self, $qstr, $opt) = @_; $opt //= {}; $opt->{relevance} //= -2; $self->mset($qstr //= 'z:1..', $opt); } sub over {} sub _check_mset_limit ($$$) { my ($lei, $desc, $mset) = @_; return if defined($lei->{opt}->{limit}); # user requested limit my $est = $mset->get_matches_estimated; my $tot = $lei->{mset_opt}->{total}; $est > $tot and $lei->qerr(<<""); # $desc estimated matches ($est) exceeds default --limit=$tot } sub _mset_more ($$) { my ($mset, $mo) = @_; my $size = $mset->size; $size >= $mo->{limit} && (($mo->{offset} += $size) < $mo->{total}); } # $startq will EOF when do_augment is done augmenting and allow # query_combined_mset and query_thread_mset to proceed. sub wait_startq ($) { my ($lei) = @_; my $startq = delete $lei->{startq} or return; while (1) { my $n = sysread($startq, my $do_augment_done, 1); if (defined $n) { return if $n == 0; # no MUA if ($do_augment_done eq 'q') { $lei->{opt}->{quiet} = 1; delete $lei->{opt}->{verbose}; delete $lei->{-progress}; } else { die "BUG: do_augment_done=`$do_augment_done'"; } return; } die "wait_startq: $!" unless $!{EINTR}; } } sub mset_progress { my $lei = shift; return if $lei->{early_mua} || !$lei->{-progress}; if ($lei->{pkt_op_p}) { $lei->{pkt_op_p}->pkt_do('mset_progress', @_); } else { # single lei-daemon consumer my ($desc, $mset_size, $mset_total_est) = @_; $lei->{-mset_total} += $mset_size if $mset_total_est ne '?'; $lei->qerr("# $desc $mset_size/$mset_total_est"); } } sub l2m_progress { my ($lei, $nr_write, $nr_seen) = @_; $lei->{-nr_write} += $nr_write; $lei->{-nr_seen} += $nr_seen; } sub query_one_mset { # for --threads and l2m w/o sort my ($self, $ibxish) = @_; local $0 = "$0 query_one_mset"; my $lei = $self->{lei}; my ($srch, $over) = ($ibxish->search, $ibxish->over); my $dir = $ibxish->{inboxdir} // $ibxish->{topdir}; return warn("$dir not indexed by Xapian\n") unless ($srch && $over); bless $srch, 'PublicInbox::LeiSearch'; # for ->qparse_new my $mo = { %{$lei->{mset_opt}} }; # copy my $mset; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); my $can_kw = !!$ibxish->can('msg_keywords'); my $threads = $lei->{opt}->{threads} // 0; my $fl = $threads > 1 ? 1 : undef; my $lss = $lei->{lss}; my $maxk = "external.$dir.maxuid"; my $stop_at = $lss ? $lss->{-cfg}->{$maxk} : undef; if (defined $stop_at) { ref($stop_at) and return warn("$maxk=$stop_at has multiple values\n"); ($stop_at =~ /[^0-9]/) and return warn("$maxk=$stop_at not numeric\n"); } my $first_ids; do { $mset = $srch->mset($mo->{qstr}, $mo); mset_progress($lei, $dir, $mo->{offset} + $mset->size, $mset->get_matches_estimated); wait_startq($lei); # wait for keyword updates my $ids = $srch->mset_to_artnums($mset, $mo); @$ids = grep { $_ > $stop_at } @$ids if defined($stop_at); my $i = 0; if ($threads) { # copy $ids if $lss since over->expand_thread # shifts @{$ctx->{ids}} $first_ids = [ @$ids ] if $lss; my $ctx = { ids => $ids }; my %n2item = map { ($ids->[$i++], $_) } $mset->items; while ($over->expand_thread($ctx)) { for my $n (@{$ctx->{xids}}) { my $smsg = $over->get_art($n) or next; my $mitem = delete $n2item{$n}; next if $smsg->{bytes} == 0; if ($mitem && $can_kw) { mitem_kw($srch, $smsg, $mitem, $fl); } elsif ($mitem && $fl) { # call ->xsmsg_vmd, later $smsg->{lei_q_tt_flagged} = 1; } $each_smsg->($smsg, $mitem); } @{$ctx->{xids}} = (); } } else { $first_ids = $ids; my @items = $mset->items; for my $n (@$ids) { my $mitem = $items[$i++]; my $smsg = $over->get_art($n) or next; next if $smsg->{bytes} == 0; mitem_kw($srch, $smsg, $mitem, $fl) if $can_kw; $each_smsg->($smsg, $mitem); } } } while (_mset_more($mset, $mo)); _check_mset_limit($lei, $dir, $mset); if ($lss && scalar(@$first_ids)) { undef $stop_at; my $max = $first_ids->[0]; $lss->cfg_set($maxk, $max); undef $lss; } undef $each_smsg; # may commit $lei->{ovv}->ovv_atexit_child($lei); } sub query_combined_mset { # non-parallel for non-"--threads" users my ($self) = @_; local $0 = "$0 query_combined_mset"; my $lei = $self->{lei}; my $mo = { %{$lei->{mset_opt}} }; my $mset; for my $loc (locals($self)) { attach_external($self, $loc); } my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); do { $mset = $self->mset($mo->{qstr}, $mo); mset_progress($lei, 'xsearch', $mo->{offset} + $mset->size, $mset->get_matches_estimated); wait_startq($lei); # wait for keyword updates for my $mitem ($mset->items) { my $smsg = smsg_for($self, $mitem) or next; $each_smsg->($smsg, $mitem); } } while (_mset_more($mset, $mo)); _check_mset_limit($lei, 'xsearch', $mset); undef $each_smsg; # may commit $lei->{ovv}->ovv_atexit_child($lei); } sub _smsg_fill ($$) { my ($smsg, $eml) = @_; $smsg->populate($eml); $smsg->parse_references($eml, mids($eml)); $smsg->{$_} //= '' for qw(from to cc ds subject references mid); delete @$smsg{qw(From Subject -ds -ts)}; } sub each_remote_eml { # callback for MboxReader->mboxrd my ($eml, $self, $lei, $each_smsg) = @_; my $xoids = $lei->{ale}->xoids_for($eml, 1); my $smsg = bless {}, 'PublicInbox::Smsg'; if ($self->{import_sto} && !$xoids) { my ($res, $kw) = $self->{import_sto}->wq_do('add_eml', $eml); if (ref($res) eq ref($smsg)) { # totally new message $smsg = $res; $self->{-sto_imported} = 1; } $smsg->{kw} = $kw; # short-circuit xsmsg_vmd } $smsg->{blob} //= $xoids ? (keys(%$xoids))[0] : $lei->git_oid($eml)->hexdigest; _smsg_fill($smsg, $eml); wait_startq($lei); my $nr = ++$lei->{-nr_remote_eml}; # needed for lss->cfg_set if ($lei->{-progress}) { my $now = now(); my $next = $lei->{-next_progress} //= ($now + 1); if ($now > $next) { $lei->{-next_progress} = $now + 1; mset_progress($lei, $lei->{-current_url}, $nr, '?'); } } $each_smsg->($smsg, undef, $eml); } sub fudge_qstr_time ($$$) { my ($lei, $uri, $qstr) = @_; return ($qstr, undef) unless $lei->{lss}; my $cfg = $lei->{lss}->{-cfg} // die 'BUG: no lss->{-cfg}'; my $cfg_key = "external.$uri.lastresult"; my $lr = $cfg->{$cfg_key} or return ($qstr, $cfg_key); if ($lr !~ /\A\-?[0-9]+\z/) { $lei->child_error(0, "$cfg->{-f}: $cfg_key=$lr not an integer, ignoring"); return ($qstr, $cfg_key); } my $rft = $lei->{opt}->{'remote-fudge-time'}; if ($rft && $rft !~ /\A-?[0-9]+\z/) { my @t = $lei->{lss}->git->date_parse($rft); my $diff = time - $t[0]; $lei->qerr("# $rft => $diff seconds"); $rft = $diff; } $lr -= ($rft || (48 * 60 * 60)); $lei->qerr("# $uri limiting to ". strftime('%Y-%m-%d %k:%M %z', localtime($lr)). ' and newer'); # this should really be rt: (received-time), but no stable # public-inbox releases support it, yet. my $dt = 'dt:'.strftime('%Y%m%d%H%M%S', gmtime($lr)).'..'; if ($qstr =~ /\S/) { substr($qstr, 0, 0, '('); $qstr .= ') AND '; } ($qstr .= $dt, $cfg_key); } sub query_remote_mboxrd { my ($self, $uris) = @_; local $0 = "$0 query_remote_mboxrd"; local $SIG{TERM} = sub { exit(0) }; # for DESTROY (File::Temp, $reap) my $lei = $self->{lei}; my $opt = $lei->{opt}; chomp(my $qstr = $lei->{mset_opt}->{qstr}); $qstr =~ s/[ \n\t]+/ /sg; # make URLs less ugly my @qform = (x => 'm'); push(@qform, t => 1) if $opt->{threads}; my $verbose = $opt->{verbose}; my $reap_tail; my $cerr = File::Temp->new(TEMPLATE => 'curl.err-XXXX', TMPDIR => 1); fcntl($cerr, F_SETFL, O_APPEND|O_RDWR) or warn "set O_APPEND: $!"; my $rdr = { 2 => $cerr }; if ($verbose) { # spawn a process to force line-buffering, otherwise curl # will write 1 character at-a-time and parallel outputs # mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss my $o = { 1 => $lei->{2}, 2 => $lei->{2} }; my $pid = spawn(['tail', '-f', $cerr->filename], undef, $o); $reap_tail = PublicInbox::AutoReap->new($pid); } my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return; push @$curl, '-s', '-d', ''; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); $self->{import_sto} = $lei->{sto} if $lei->{opt}->{'import-remote'}; for my $uri (@$uris) { $lei->{-current_url} = $uri->as_string; $lei->{-nr_remote_eml} = 0; my $start = time; my ($q, $key) = fudge_qstr_time($lei, $uri, $qstr); $uri->query_form(@qform, q => $q); my $cmd = $curl->for_uri($lei, $uri); $lei->qerr("# $cmd"); my ($fh, $pid) = popen_rd($cmd, undef, $rdr); my $reap_curl = PublicInbox::AutoReap->new($pid); $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1); PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self, $lei, $each_smsg); if (delete($self->{-sto_imported})) { my $wait = $self->{import_sto}->wq_do('done'); } $reap_curl->join; if ($? == 0) { # don't update if no results, maybe MTA is down my $nr = $lei->{-nr_remote_eml}; $lei->{lss}->cfg_set($key, $start) if $key && $nr; mset_progress($lei, $lei->{-current_url}, $nr, $nr); next; } my $err; if (-s $cerr) { seek($cerr, 0, SEEK_SET) // warn "seek($cmd stderr): $!"; $err = do { local $/; <$cerr> } // warn "read($cmd stderr): $!"; truncate($cerr, 0) // warn "truncate($cmd stderr): $!"; } $err //= ''; next if (($? >> 8) == 22 && $err =~ /\b404\b/); $uri->query_form(q => $qstr); $lei->child_error($?, "E: <$uri> $err"); } undef $each_smsg; $lei->{ovv}->ovv_atexit_child($lei); } sub git { $_[0]->{git} // die 'BUG: git uninitialized' } sub xsearch_done_wait { # dwaitpid callback my ($arg, $pid) = @_; my ($wq, $lei) = @$arg; return if !$?; my $s = $? & 127; return $lei->child_error($?) if $s == 13 || $s == 15; $lei->child_error($?, 'non-fatal error from '.ref($wq)." \$?=$?"); } sub query_done { # EOF callback for main daemon my ($lei) = @_; local $PublicInbox::LEI::current_lei = $lei; eval { my $l2m = delete $lei->{l2m}; delete $lei->{lxs}; ($lei->{opt}->{'mail-sync'} && !$lei->{sto}) and warn "BUG: {sto} missing with --mail-sync"; $lei->sto_done_request if $lei->{sto}; if (my $v2w = delete $lei->{v2w}) { my $wait = $v2w->wq_do('done'); # may die $v2w->wq_close; } $lei->{ovv}->ovv_end($lei); if ($l2m) { # close() calls LeiToMail reap_compress if (my $out = delete $lei->{old_1}) { if (my $mbout = $lei->{1}) { close($mbout) or die <<""; Error closing $lei->{ovv}->{dst}: \$!=$! \$?=$? } $lei->{1} = $out; } if ($l2m->lock_free) { $l2m->poke_dst; $lei->poke_mua; } else { # mbox users delete $l2m->{mbl}; # drop dotlock } } if ($lei->{-progress}) { my $tot = $lei->{-mset_total} // 0; my $nr_w = $lei->{-nr_write} // 0; my $d = ($lei->{-nr_seen} // 0) - $nr_w; my $x = "$tot matches"; $x .= ", $d duplicates" if $d; if ($l2m) { my $m = "# $nr_w written to " . "$lei->{ovv}->{dst} ($x)"; $nr_w ? $lei->qfin($m) : $lei->qerr($m); } else { $lei->qerr("# $x"); } } $lei->start_mua if $l2m && !$l2m->lock_free; $lei->dclose; }; $lei->fail($@) if $@; } sub do_post_augment { my ($lei) = @_; local $PublicInbox::LEI::current_lei = $lei; my $l2m = $lei->{l2m} or return; # client disconnected eval { $lei->fchdir; $l2m->post_augment($lei); }; my $err = $@; if ($err) { if (my $lxs = delete $lei->{lxs}) { $lxs->wq_kill('-TERM'); $lxs->wq_close; } $lei->fail("$err"); } if (!$err && delete $lei->{early_mua}) { # non-augment case eval { $lei->start_mua }; $lei->fail($@) if $@; } close(delete $lei->{au_done}); # triggers wait_startq in lei_xsearch } sub incr_post_augment { # called whenever an l2m shard finishes augment my ($lei) = @_; my $l2m = $lei->{l2m} or return; # client disconnected return if ++$lei->{nr_post_augment} != $l2m->{-wq_nr_workers}; do_post_augment($lei); } my $MAX_PER_HOST = 4; sub concurrency { my ($self, $opt) = @_; my $nl = $opt->{threads} ? locals($self) : 1; my $nr = remotes($self); $nr = $MAX_PER_HOST if $nr > $MAX_PER_HOST; $nl + $nr; } sub start_query ($$) { # always runs in main (lei-daemon) process my ($self, $lei) = @_; local $PublicInbox::LEI::current_lei = $lei; if ($self->{opt_threads} || ($lei->{l2m} && !$self->{opt_sort})) { for my $ibxish (locals($self)) { $self->wq_io_do('query_one_mset', [], $ibxish); } } elsif (locals($self)) { $self->wq_io_do('query_combined_mset', []); } my $i = 0; my $q = []; for my $uri (remotes($self)) { push @{$q->[$i++ % $MAX_PER_HOST]}, $uri; } for my $uris (@$q) { $self->wq_io_do('query_remote_mboxrd', [], $uris); } if ($self->{-do_lcat}) { $self->wq_io_do('lcat_dump', []); } $self->wq_close; # lei_xsearch workers stop when done } sub incr_start_query { # called whenever an l2m shard starts do_post_auth my ($self, $lei) = @_; my $l2m = $lei->{l2m}; return if ++$self->{nr_start_query} != $l2m->{-wq_nr_workers}; start_query($self, $lei); } sub ipc_atfork_child { my ($self) = @_; $self->{lei}->_lei_atfork_child; $self->SUPER::ipc_atfork_child; } sub do_query { my ($self, $lei) = @_; my $l2m = $lei->{l2m}; my $ops = { 'sigpipe_handler' => [ $lei ], 'fail_handler' => [ $lei ], 'do_post_augment' => [ \&do_post_augment, $lei ], 'incr_post_augment' => [ \&incr_post_augment, $lei ], '' => [ \&query_done, $lei ], 'mset_progress' => [ \&mset_progress, $lei ], 'l2m_progress' => [ \&l2m_progress, $lei ], 'x_it' => [ $lei ], 'child_error' => [ $lei ], 'incr_start_query' => [ $self, $lei ], }; $lei->{auth}->op_merge($ops, $l2m, $lei) if $l2m && $lei->{auth}; my $end = $lei->pkt_op_pair; $lei->{1}->autoflush(1); $lei->start_pager if delete $lei->{need_pager}; $lei->{ovv}->ovv_begin($lei); die 'BUG: xdb|over open' if $lei->{lse}->{xdb} || $lei->{lse}->{over}; if ($l2m) { $l2m->pre_augment($lei); if ($lei->{opt}->{augment} && delete $lei->{early_mua}) { $lei->start_mua; } my $F_SETPIPE_SZ = $^O eq 'linux' ? 1031 : undef; if ($l2m->{-wq_nr_workers} > 1 && $l2m->{base_type} =~ /\A(?:maildir|mbox)\z/) { # setup two barriers to coordinate ->has_entries # between l2m workers pipe(my ($a_r, $a_w)) or die "pipe: $!"; fcntl($a_r, $F_SETPIPE_SZ, 4096) if $F_SETPIPE_SZ; pipe(my ($b_r, $b_w)) or die "pipe: $!"; fcntl($b_r, $F_SETPIPE_SZ, 4096) if $F_SETPIPE_SZ; $l2m->{au_peers} = [ $a_r, $a_w, $b_r, $b_w ]; } $l2m->wq_workers_start('lei2mail', undef, $lei->oldset, { lei => $lei }); $l2m->wq_wait_async(\&xsearch_done_wait, $lei); pipe($lei->{startq}, $lei->{au_done}) or die "pipe: $!"; fcntl($lei->{startq}, $F_SETPIPE_SZ, 4096) if $F_SETPIPE_SZ; delete $l2m->{au_peers}; } $self->wq_workers_start('lei_xsearch', undef, $lei->oldset, { lei => $lei }); $self->wq_wait_async(\&xsearch_done_wait, $lei); my $op_c = delete $lei->{pkt_op_c}; delete $lei->{pkt_op_p}; @$end = (); $self->{opt_threads} = $lei->{opt}->{threads}; $self->{opt_sort} = $lei->{opt}->{'sort'}; $self->{-do_lcat} = !!(delete $lei->{lcat_todo}); if ($l2m) { $l2m->net_merge_all_done($lei) unless $lei->{auth}; } else { start_query($self, $lei); } $lei->event_step_init; # wait for shutdowns $lei->wait_wq_events($op_c, $ops); } sub add_uri { my ($self, $uri) = @_; if (my $curl = $self->{curl} //= which('curl') // 0) { require PublicInbox::MboxReader; require IO::Uncompress::Gunzip; require PublicInbox::LeiCurl; push @{$self->{remotes}}, $uri; } else { warn "curl missing, ignoring $uri\n"; } } sub prepare_external { my ($self, $loc, $boost) = @_; # n.b. already ordered by boost if (ref $loc) { # already a URI, or PublicInbox::Inbox-like object return add_uri($self, $loc) if $loc->can('scheme'); } elsif ($loc =~ m!\Ahttps?://!) { require URI; return add_uri($self, URI->new($loc)); } elsif (-f "$loc/ei.lock") { require PublicInbox::ExtSearch; die "`\\n' not allowed in `$loc'\n" if index($loc, "\n") >= 0; $loc = PublicInbox::ExtSearch->new($loc); } elsif (-f "$loc/inbox.lock" || -d "$loc/public-inbox") { die "`\\n' not allowed in `$loc'\n" if index($loc, "\n") >= 0; require PublicInbox::Inbox; # v2, v1 $loc = bless { inboxdir => $loc }, 'PublicInbox::Inbox'; } elsif (!-e $loc) { warn "W: $loc gone, perhaps run: lei forget-external $loc\n"; return; } else { warn "W: $loc ignored, unable to determine external type\n"; return; } push @{$self->{locals}}, $loc; } sub _lcat_i { # LeiMailSync->each_src iterator callback my ($oidbin, $id, $each_smsg) = @_; $each_smsg->({blob => unpack('H*', $oidbin), pct => 100}); } sub _lcat2smsg { # git->cat_async callback my ($bref, $oid, $type, $size, $smsg) = @_; if ($bref) { my $eml = PublicInbox::Eml->new($bref); my $json_dump = delete $smsg->{-json_dump}; bless $smsg, 'PublicInbox::Smsg'; _smsg_fill($smsg, $eml); $json_dump->($smsg, undef, $eml); } } sub lcat_dump { # via wq_io_do my ($self) = @_; my $lei = $self->{lei}; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); my $git = $lei->{ale}->git; if (!$lei->{l2m}) { my $json_dump = $each_smsg; $each_smsg = sub { my ($smsg) = @_; $smsg->{-json_dump} = $json_dump; $git->cat_async($smsg->{blob}, \&_lcat2smsg, $smsg); }; } my $lms; for my $ent (@{$lei->{lcat_todo}}) { if (ref $ent eq 'HASH') { # { fid => $fid ,.. } $lms //= $lei->{lse}->lms; $lms->each_src($ent, \&_lcat_i, $each_smsg); } else { # oidhex $each_smsg->({ blob => $ent, pct => 100 }); } } $git->async_wait_all; undef $each_smsg; # may commit $lei->{ovv}->ovv_atexit_child($lei); } 1; public-inbox-1.9.0/lib/PublicInbox/Linkify.pm000066400000000000000000000061551430031475700210560ustar00rootroot00000000000000# Copyright (C) 2014-2021 all contributors # License: AGPL-3.0+ # two-step linkification. # intended usage is in the following order: # # linkify_1 # # linkify_2 # # Maybe this could be done more efficiently... package PublicInbox::Linkify; use strict; use warnings; use Digest::SHA qw/sha1_hex/; use PublicInbox::Hval qw(ascii_html mid_href); use PublicInbox::MID qw($MID_EXTRACT); my $SALT = rand; my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher):// [\@:\w\.-]+(?:/ (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? )? )}xi; sub new { bless {}, $_[0] } # try to distinguish paired punctuation chars from the URL itself # Maybe other languages/formats can be supported here, too... my %pairs = ( "(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays) "'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby "!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby ); sub linkify_1 { $_[1] =~ s^$LINK_RE^ my $beg = $1 || ''; my $url = $2; my $end = ''; # it's fairly common to end URLs in messages with # '.', ',' or ';' to denote the end of a statement; # assume the intent was to end the statement/sentence # in English if (defined(my $re = $pairs{$beg})) { if ($url =~ s/$re//) { $end = $1; } } elsif ($url =~ s/(\))?([\.,;])\z//) { $end = $2; # require ')' to be paired with '(' if (defined $1) { # ')' if (index($url, '(') < 0) { $end = ")$end"; } else { $url .= ')'; } } } elsif ($url !~ /\(/ && $url =~ s/\)\z//) { $end = ')'; } $url = ascii_html($url); # for IDN # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($url . $SALT); $_[0]->{$key} = $url; $beg . 'PI-LINK-'. $key . $end; ^geo; $_[1]; } sub linkify_2 { # Added "PI-LINK-" prefix to avoid false-positives on git commits $_[1] =~ s!\bPI-LINK-([a-f0-9]{40})\b! my $key = $1; my $url = $_[0]->{$key}; if (defined $url) { "$url"; } else { # false positive or somebody tried to mess with us $key; } !ge; $_[1]; } # single pass linkification of within $str # with $pfx being the URL prefix sub linkify_mids { my ($self, $pfx, $str, $raw) = @_; $$str =~ s!$MID_EXTRACT! my $mid = $1; my $html = ascii_html($mid); my $href = mid_href($mid); # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($html . $SALT); my $repl = qq(<$html>); $repl .= qq{ (raw)} if $raw; $self->{$key} = $repl; 'PI-LINK-'. $key; !ge; $$str = ascii_html($$str); $$str =~ s!\bPI-LINK-([a-f0-9]{40})\b! my $key = $1; my $repl = $_[0]->{$key}; if (defined $repl) { $repl; } else { # false positive or somebody tried to mess with us $key; } !ge; } sub to_html { linkify_2($_[0], ascii_html(linkify_1(@_))) } 1; public-inbox-1.9.0/lib/PublicInbox/Listener.pm000066400000000000000000000032241430031475700212300ustar00rootroot00000000000000# Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ # # Used by -nntpd for listen sockets package PublicInbox::Listener; use strict; use parent 'PublicInbox::DS'; use Socket qw(SOL_SOCKET SO_KEEPALIVE IPPROTO_TCP TCP_NODELAY); use IO::Handle; use PublicInbox::Syscall qw(EPOLLIN EPOLLEXCLUSIVE); use Errno qw(EAGAIN ECONNABORTED); # Warn on transient errors, mostly resource limitations. # EINTR would indicate the failure to set NonBlocking in systemd or similar my %ERR_WARN = map {; eval("Errno::$_()") => $_ } qw(EMFILE ENFILE ENOBUFS ENOMEM EINTR); sub new ($$$) { my ($class, $s, $cb) = @_; setsockopt($s, SOL_SOCKET, SO_KEEPALIVE, 1); setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1); # ignore errors on non-TCP listen($s, 2**31 - 1); # kernel will clamp my $self = bless { post_accept => $cb }, $class; $self->SUPER::new($s, EPOLLIN|EPOLLEXCLUSIVE); } sub event_step { my ($self) = @_; my $sock = $self->{sock} or return; # no loop here, we want to fairly distribute clients # between multiple processes sharing the same socket # XXX our event loop needs better granularity for # a single accept() here to be, umm..., acceptable # on high-traffic sites. if (my $addr = accept(my $c, $sock)) { IO::Handle::blocking($c, 0); # no accept4 :< eval { $self->{post_accept}->($c, $addr, $sock) }; warn "E: $@\n" if $@; } elsif ($! == EAGAIN || $! == ECONNABORTED) { # EAGAIN is common and likely # ECONNABORTED is common with bad connections return; } elsif (my $sym = $ERR_WARN{int($!)}) { warn "W: accept(): $! ($sym)\n"; } else { warn "BUG?: accept(): $!\n"; } } 1; public-inbox-1.9.0/lib/PublicInbox/Lock.pm000066400000000000000000000034221430031475700203330ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ # Base class for per-inbox locking package PublicInbox::Lock; use strict; use v5.10.1; use Fcntl qw(:flock :DEFAULT); use Carp qw(croak); use PublicInbox::OnDestroy; # we only acquire the flock if creating or reindexing; # PublicInbox::Import already has the lock on its own. sub lock_acquire { my ($self) = @_; my $lock_path = $self->{lock_path}; croak 'already locked '.($lock_path // '(undef)') if $self->{lockfh}; return unless defined($lock_path); sysopen(my $lockfh, $lock_path, O_RDWR|O_CREAT) or croak "failed to open $lock_path: $!\n"; flock($lockfh, LOCK_EX) or croak "lock $lock_path failed: $!\n"; $self->{lockfh} = $lockfh; } sub lock_release { my ($self, $wake) = @_; defined(my $lock_path = $self->{lock_path}) or return; my $lockfh = delete $self->{lockfh} or croak "not locked: $lock_path"; syswrite($lockfh, '.') if $wake; flock($lockfh, LOCK_UN) or croak "unlock $lock_path failed: $!\n"; close $lockfh or croak "close $lock_path failed: $!\n"; } # caller must use return value sub lock_for_scope { my ($self, @single_pid) = @_; lock_acquire($self) or return; # lock_path not set PublicInbox::OnDestroy->new(@single_pid, \&lock_release, $self); } sub lock_acquire_fast { $_[0]->{lockfh} or return lock_acquire($_[0]); flock($_[0]->{lockfh}, LOCK_EX) or croak "lock (fast) failed: $!"; } sub lock_release_fast { flock($_[0]->{lockfh} // return, LOCK_UN) or croak "unlock (fast) $_[0]->{lock_path}: $!"; } # caller must use return value sub lock_for_scope_fast { my ($self, @single_pid) = @_; lock_acquire_fast($self) or return; # lock_path not set PublicInbox::OnDestroy->new(@single_pid, \&lock_release_fast, $self); } 1; public-inbox-1.9.0/lib/PublicInbox/MDA.pm000066400000000000000000000054101430031475700200430ustar00rootroot00000000000000# Copyright (C) 2013-2021 all contributors # License: AGPL-3.0+ # # For the -mda script (mail delivery agent) package PublicInbox::MDA; use strict; use warnings; use PublicInbox::MsgTime; use PublicInbox::Address; use constant MAX_SIZE => 1024 * 500; # same as spamc default, should be tunable use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian our @BAD_HEADERS = ( # postfix qw(delivered-to x-original-to), # prevent training loops # The rest are taken from Mailman 2.1.15: # could contain passwords: qw(approved approve x-approved x-approve urgent), # could be used phishing: qw(return-receipt-to disposition-notification-to x-confirm-reading-to), # Pegasus mail: qw(x-pmrqc) ); # drop plus addressing for matching sub __drop_plus { my ($str_addr) = @_; $str_addr =~ s/\+.*\@/\@/; $str_addr; } # do not allow Bcc, only Cc and To if recipient is set sub precheck { my ($klass, $simple, $address) = @_; my @mid = $simple->header('Message-ID'); return 0 if scalar(@mid) != 1; my $mid = $mid[0]; return 0 if (length($mid) > MAX_MID_SIZE); return 0 unless usable_str(length(''), $mid) && $mid =~ /\@/; return 0 unless usable_str(length('u@h'), $simple->header("From")); return 0 unless usable_str(length(':o'), $simple->header("Subject")); return 0 unless usable_date($simple->header("Date")); return 0 if length($simple->as_string) > MAX_SIZE; alias_specified($simple, $address); } sub usable_str { my ($len, $str) = @_; defined($str) && length($str) >= $len; } sub usable_date { defined(eval { PublicInbox::MsgTime::str2date_zone($_[0]) }); } sub alias_specified { my ($simple, $address) = @_; my @address = ref($address) eq 'ARRAY' ? @$address : ($address); my %ok = map { lc(__drop_plus($_)) => 1; } @address; foreach my $line ($simple->header('Cc'), $simple->header('To')) { my @addrs = PublicInbox::Address::emails($line); foreach my $addr (@addrs) { if ($ok{lc(__drop_plus($addr))}) { return 1; } } } return 0; } sub set_list_headers { my ($class, $simple, $dst) = @_; unless (defined $simple->header('List-Id')) { my $pa = $dst->{-primary_address}; $pa =~ tr/@/./; # RFC2919 $simple->header_set("List-Id", "<$pa>"); } } sub inboxes_for_list_id ($$) { my ($klass, $pi_cfg, $simple) = @_; # newer Email::Simple allows header_raw, as does Email::MIME: my @list_ids = $simple->can('header_raw') ? $simple->header_raw('List-Id') : $simple->header('List-Id'); my @dests; for my $list_id (@list_ids) { $list_id =~ /<[ \t]*(.+)?[ \t]*>/ or next; if (my $ibx = $pi_cfg->lookup_list_id($1)) { push @dests, $ibx; } } if (scalar(@list_ids) > 1) { warn "W: multiple List-IDs in message:\n"; warn "W: List-ID: $_\n" for @list_ids } \@dests; } 1; public-inbox-1.9.0/lib/PublicInbox/MID.pm000066400000000000000000000063111430031475700200540ustar00rootroot00000000000000# Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ # # Various Message-ID-related functions. package PublicInbox::MID; use strict; use warnings; use base qw/Exporter/; our @EXPORT_OK = qw(mid_clean id_compress mid2path mid_escape MID_ESC mids references mids_for_index mids_in $MID_EXTRACT); use URI::Escape qw(uri_escape_utf8); use Digest::SHA qw/sha1_hex/; require PublicInbox::Address; use constant { ID_MAX => 40, # SHA-1 hex length for HTML id anchors MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q') }; our $MID_EXTRACT = qr/<([^>]+)>/s; sub mid_clean { my ($mid) = @_; defined($mid) or die "no Message-ID"; # MDA->precheck did more checking for us if ($mid =~ $MID_EXTRACT) { $mid = $1; } $mid; } # this is idempotent, used for HTML anchor/ids and such sub id_compress { my ($id, $force) = @_; if ($force || $id =~ /[^a-zA-Z0-9_\-]/ || length($id) > ID_MAX) { utf8::encode($id); return sha1_hex($id); } $id; } sub mid2path { my ($mid) = @_; my ($x2, $x38) = ($mid =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/); unless (defined $x38) { # compatibility with old links (or short Message-IDs :) $mid = mid_clean($mid); utf8::encode($mid); $mid = sha1_hex($mid); ($x2, $x38) = ($mid =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/); } "$x2/$x38"; } # only intended for Message-ID and X-Alt-Message-ID sub extract_mids { my @mids; for my $v (@_) { my @cur = ($v =~ /$MID_EXTRACT/g); if (@cur) { push(@mids, @cur); } else { push(@mids, $v); } } \@mids; } sub mids ($) { my ($hdr) = @_; my @mids = $hdr->header_raw('Message-ID'); uniq_mids(extract_mids(@mids)); } # for Resent-Message-ID and maybe others sub mids_in ($@) { my ($eml, @headers) = @_; uniq_mids(extract_mids(map { ($eml->header_raw($_)) } @headers)); } # we allow searching on X-Alt-Message-ID since PublicInbox::NNTP uses them # to placate some clients, and we want to ensure NNTP-only clients can # import and index without relying on HTTP endpoints sub mids_for_index ($) { mids_in($_[0], qw(Message-ID X-Alt-Message-ID)); } # last References should be IRT, but some mail clients do things # out of order, so trust IRT over References iff IRT exists sub references ($) { my ($hdr) = @_; my @mids; foreach my $f (qw(References In-Reply-To)) { my @v = $hdr->header_raw($f); foreach my $v (@v) { push(@mids, ($v =~ /$MID_EXTRACT/g)); } } # old versions of git-send-email would prompt users for # In-Reply-To and users' muscle memory would use 'y' or 'n' # as responses: my %addr = ( y => 1, n => 1 ); foreach my $f (qw(To From Cc)) { my @v = $hdr->header_raw($f); foreach my $v (@v) { $addr{$_} = 1 for (PublicInbox::Address::emails($v)); } } uniq_mids(\@mids, \%addr); } sub uniq_mids ($;$) { my ($mids, $seen) = @_; my @ret; $seen ||= {}; foreach my $mid (@$mids) { $mid =~ tr/\n\t\r//d; if (length($mid) > MAX_MID_SIZE) { warn "Message-ID: <$mid> too long, truncating\n"; $mid = substr($mid, 0, MAX_MID_SIZE); } $seen->{$mid} //= push(@ret, $mid); } \@ret; } # RFC3986, section 3.3: sub MID_ESC () { '^A-Za-z0-9\-\._~!\$\&\'\(\)\*\+,;=:@' } sub mid_escape ($) { uri_escape_utf8($_[0], MID_ESC) } 1; public-inbox-1.9.0/lib/PublicInbox/MIME.pm000066400000000000000000000071531430031475700201770ustar00rootroot00000000000000# This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # The license for this file differs from the rest of public-inbox. # # We no longer load this in any of our code outside of maintainer # tests for compatibility. PublicInbox::Eml is favored throughout # our codebase for performance and safety reasons, though we maintain # Email::MIME-compatibility in mail injection and indexing code paths. # # It monkey patches the "parts_multipart" subroutine with patches # from Matthew Horsfall at: # # git clone --mirror https://github.com/rjbs/Email-MIME.git refs/pull/28/head # # commit fe0eb870ab732507aa39a1070a2fd9435c7e4877 # ("Make sure we don't modify the body of a message when injecting a header.") # commit 981d8201a7239b02114489529fd366c4c576a146 # ("GH #14 - Handle CRLF emails properly.") # commit 2338d93598b5e8432df24bda8dfdc231bdeb666e # ("GH #14 - Support multipart messages without content-type in subparts.") # # For Email::MIME >= 1.923 && < 1.935, # commit dcef9be66c49ae89c7a5027a789bbbac544499ce # ("removing all trailing newlines was too much") # is also included package PublicInbox::MIME; use strict; use warnings; use base qw(Email::MIME); use Email::MIME::ContentType; use PublicInbox::MsgIter (); $Email::MIME::ContentType::STRICT_PARAMS = 0; if ($Email::MIME::VERSION <= 1.937) { sub parts_multipart { my $self = shift; my $boundary = $self->{ct}->{attributes}->{boundary}; # Take a message, join all its lines together. Now try to Email::MIME->new # it with 1.861 or earlier. Death! It tries to recurse endlessly on the # body, because every time it splits on boundary it gets itself. Obviously # that means it's a bogus message, but a mangled result (or exception) is # better than endless recursion. -- rjbs, 2008-01-07 return $self->parts_single_part unless $boundary and $self->body_raw =~ /^--\Q$boundary\E\s*$/sm; $self->{body_raw} = Email::Simple::body($self); # rfc1521 7.2.1 my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2; # Split on boundaries, but keep blank lines after them intact my @bits = split /^--\Q$boundary\E\s*?(?=$self->{mycrlf})/m, ($body || ''); Email::Simple::body_set($self, undef); # If there are no headers in the potential MIME part, it's just part of the # body. This is a horrible hack, although it's debatable whether it was # better or worse when it was $self->{body} = shift @bits ... -- rjbs, # 2006-11-27 Email::Simple::body_set($self, shift @bits) if ($bits[0] || '') !~ /.*:.*/; my $bits = @bits; my @parts; for my $bit (@bits) { # Parts don't need headers. If they don't have them, they look like this: # # --90e6ba6e8d06f1723604fc1b809a # # Part 2 # # Part 2a # # $bit will contain two new lines before Part 2. # # Anything with headers will only have one new line. # # RFC 1341 Section 7.2 says parts without headers are to be considered # plain US-ASCII text. -- alh # 2016-08-01 my $added_header; if ($bit =~ /^(?:$self->{mycrlf}){2}/) { $bit = "Content-type: text/plain; charset=us-ascii" . $bit; $added_header = 1; } $bit =~ s/\A[\n\r]+//smg; $bit =~ s/(?{mycrlf}\Z//sm; my $email = (ref $self)->new($bit); if ($added_header) { # Remove our changes so we don't change the raw email content $email->header_str_set('Content-Type'); } push @parts, $email; } $self->{parts} = \@parts; return @{ $self->{parts} }; } } no warnings 'once'; *each_part = \&PublicInbox::MsgIter::em_each_part; 1; public-inbox-1.9.0/lib/PublicInbox/ManifestJsGz.pm000066400000000000000000000064741430031475700220210ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # generates manifest.js.gz for grokmirror(1) package PublicInbox::ManifestJsGz; use strict; use v5.10.1; use parent qw(PublicInbox::WwwListing); use PublicInbox::Config; use IO::Compress::Gzip qw(gzip); use HTTP::Date qw(time2str); my $json = PublicInbox::Config::json(); sub url_filter { my ($ctx) = @_; # grokmirror uses relative paths, so it's domain-dependent # SUPER calls PublicInbox::WwwListing::url_filter $ctx->SUPER::url_filter('publicInbox.grokManifest', 'match=domain'); } sub inject_entry ($$$;$) { my ($ctx, $url_path, $ent, $git_dir) = @_; $ctx->{-abs2urlpath}->{$git_dir // delete $ent->{git_dir}} = $url_path; my $modified = $ent->{modified}; $ctx->{-mtime} = $modified if $modified > ($ctx->{-mtime} // 0); $ctx->{manifest}->{$url_path} = $ent; } sub manifest_add ($$;$$) { # slow path w/o extindex "all" (or per-inbox) my ($ctx, $ibx, $epoch, $default_desc) = @_; my $url_path = "/$ibx->{name}"; my $git; if (defined $epoch) { $url_path .= "/git/$epoch.git"; $git = $ibx->git_epoch($epoch) or return; } else { $git = $ibx->git; } my $ent = $git->manifest_entry($epoch, $default_desc) or return; inject_entry($ctx, $url_path, $ent, $git->{git_dir}); } sub slow_manifest_add ($$) { my ($ctx, $ibx) = @_; eval { if (defined(my $max = $ibx->max_git_epoch)) { my $desc = $ibx->description; for my $epoch (0..$max) { manifest_add($ctx, $ibx, $epoch, $desc); } } else { manifest_add($ctx, $ibx); } }; warn "E: $@" if $@; } sub eidx_manifest_add ($$$) { my ($ctx, $ALL, $ibx) = @_; if (my $data = $ALL->misc->inbox_data($ibx)) { $data = $json->decode($data); delete $data->{''}; # private while (my ($url_path, $ent) = each %$data) { inject_entry($ctx, $url_path, $ent); } } else { warn "E: `${\$ibx->eidx_key}' not indexed by $ALL->{topdir}\n"; # do not use slow path for global manifest since # it can become catastrophically slow. per-inbox manifest # is not too bad with dozens of epochs, so never fail that: slow_manifest_add($ctx, $ibx) if $ibx == $ctx->{ibx}; } } sub response { my ($class, $ctx) = @_; bless $ctx, $class; my ($re, undef) = $ctx->url_filter; $re // return psgi_triple($ctx); my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg}, $ctx->can('list_match_i'), $re, $ctx); sub { $ctx->{-wcb} = $_[0]; # HTTP server callback $ctx->{env}->{'pi-httpd.async'} ? $iter->event_step : $iter->each_section; } } sub ibx_entry { my ($ctx, $ibx) = @_; my $ALL = $ctx->{www}->{pi_cfg}->ALL; $ALL ? eidx_manifest_add($ctx, $ALL, $ibx) : slow_manifest_add($ctx, $ibx); } sub hide_key { 'manifest' } # for WwwListing->list_match_i sub psgi_triple { my ($ctx) = @_; my $abs2urlpath = delete($ctx->{-abs2urlpath}) // {}; my $manifest = delete($ctx->{manifest}) // {}; while (my ($url_path, $repo) = each %$manifest) { defined(my $abs = $repo->{reference}) or next; $repo->{reference} = $abs2urlpath->{$abs}; } $manifest = $json->encode($manifest); gzip(\$manifest => \(my $out)); [ 200, [ qw(Content-Type application/gzip), 'Last-Modified', time2str($ctx->{-mtime}), 'Content-Length', length($out) ], [ $out ] ] } sub per_inbox { my ($ctx) = @_; ibx_entry($ctx, $ctx->{ibx}); psgi_triple($ctx); } 1; public-inbox-1.9.0/lib/PublicInbox/Mbox.pm000066400000000000000000000162761430031475700203630ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # Streaming interface for mboxrd HTTP responses # See PublicInbox::GzipFilter for details. package PublicInbox::Mbox; use strict; use parent 'PublicInbox::GzipFilter'; use PublicInbox::MID qw/mid_escape/; use PublicInbox::Hval qw/to_filename/; use PublicInbox::Smsg; use PublicInbox::Eml; # called by PSGI server as body response # this gets called twice for every message, once to return the header, # once to retrieve the body sub getline { my ($ctx) = @_; # ctx my $smsg = $ctx->{smsg} or return; my $ibx = $ctx->{ibx}; my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return; my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}}); $ctx->zmore(msg_hdr($ctx, $eml)); if ($n) { $ctx->translate(msg_body($eml)); } else { # last message $ctx->zflush(msg_body($eml)); } } # called by PublicInbox::DS::write after http->next_step sub async_next { my ($http) = @_; # PublicInbox::HTTP my $ctx = $http->{forward} or return; # client aborted eval { my $smsg = $ctx->{smsg} or return $ctx->close; $ctx->smsg_blob($smsg); }; warn "E: $@" if $@; } sub async_eml { # for async_blob_cb my ($ctx, $eml) = @_; my $smsg = delete $ctx->{smsg}; # next message $ctx->{smsg} = $ctx->{ibx}->over->next_by_mid(@{$ctx->{next_arg}}); local $ctx->{eml} = $eml; # for mbox_hdr $ctx->zmore(msg_hdr($ctx, $eml)); $ctx->write(msg_body($eml)); } sub mbox_hdr ($) { my ($ctx) = @_; my $eml = $ctx->{eml} //= $ctx->{ibx}->smsg_eml($ctx->{smsg}); my $fn = $eml->header_str('Subject') // ''; $fn =~ s/^re:\s+//i; $fn = to_filename($fn) // 'no-subject'; my @hdr = ('Content-Type'); if ($ctx->{ibx}->{obfuscate}) { # obfuscation is stupid, but maybe scrapers are, too... push @hdr, 'application/mbox'; $fn .= '.mbox'; } else { push @hdr, 'text/plain'; $fn .= '.txt'; } my $cs = $ctx->{eml}->ct->{attributes}->{charset} // 'UTF-8'; $cs = 'UTF-8' if $cs =~ /[^a-zA-Z0-9\-\_]/; # avoid header injection $hdr[-1] .= "; charset=$cs"; push @hdr, 'Content-Disposition', "inline; filename=$fn"; [ 200, \@hdr ]; } # for rare cases where v1 inboxes aren't indexed w/ ->over at all sub no_over_raw ($) { my ($ctx) = @_; my $mref = $ctx->{ibx}->msg_by_mid($ctx->{mid}) or return; my $eml = $ctx->{eml} = PublicInbox::Eml->new($mref); [ @{mbox_hdr($ctx)}, [ msg_hdr($ctx, $eml) . msg_body($eml) ] ] } # /$INBOX/$MESSAGE_ID/raw sub emit_raw { my ($ctx) = @_; my $over = $ctx->{ibx}->over or return no_over_raw($ctx); my ($id, $prev); my $mip = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ]; my $smsg = $ctx->{smsg} = $over->next_by_mid(@$mip) or return; bless $ctx, __PACKAGE__; $ctx->psgi_response(\&mbox_hdr); } sub msg_hdr ($$) { my ($ctx, $eml) = @_; my $header_obj = $eml->header_obj; # drop potentially confusing headers, ssoma already should've dropped # Lines and Content-Length foreach my $d (qw(Lines Bytes Content-Length Status)) { $header_obj->header_set($d); } my $crlf = $header_obj->crlf; my $buf = $header_obj->as_string; # fixup old bug from import (pre-a0c07cba0e5d8b6a) $buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; "From mboxrd\@z Thu Jan 1 00:00:00 1970" . $crlf . $buf . $crlf; } sub msg_body ($) { my $bdy = $_[0]->{bdy} // return "\n"; # mboxrd quoting style # https://en.wikipedia.org/wiki/Mbox#Modified_mbox # https://www.loc.gov/preservation/digital/formats/fdd/fdd000385.shtml # https://web.archive.org/http://www.qmail.org/man/man5/mbox.html $$bdy =~ s/^(>*From )/>$1/gm; $$bdy .= "\n"; } sub thread_cb { my ($ctx) = @_; my $msgs = $ctx->{msgs}; while (1) { if (my $smsg = shift @$msgs) { return $smsg; } # refill result set my $over = $ctx->{ibx}->over or return $ctx->gone('over'); $ctx->{msgs} = $msgs = $over->get_thread($ctx->{mid}, $ctx->{prev}); return unless @$msgs; $ctx->{prev} = $msgs->[-1]; } } sub thread_mbox { my ($ctx, $over, $sfx) = @_; my $msgs = $ctx->{msgs} = $over->get_thread($ctx->{mid}, {}); return [404, [qw(Content-Type text/plain)], []] if !@$msgs; $ctx->{prev} = $msgs->[-1]; require PublicInbox::MboxGz; PublicInbox::MboxGz::mbox_gz($ctx, \&thread_cb, $msgs->[0]->{subject}); } sub emit_range { my ($ctx, $range) = @_; my $q; if ($range eq 'all') { # TODO: YYYY[-MM] $q = ''; } else { return [404, [qw(Content-Type text/plain)], []]; } mbox_all($ctx, { q => $q }); } sub all_ids_cb { my ($ctx) = @_; my $over = $ctx->{ibx}->over or return $ctx->gone('over'); my $ids = $ctx->{ids}; do { while ((my $num = shift @$ids)) { my $smsg = $over->get_art($num) or next; return $smsg; } $ctx->{ids} = $ids = $over->ids_after(\($ctx->{prev})); } while (@$ids); } sub mbox_all_ids { my ($ctx) = @_; my $prev = 0; my $over = $ctx->{ibx}->over or return PublicInbox::WWW::need($ctx, 'Overview'); my $ids = $over->ids_after(\$prev) or return [404, [qw(Content-Type text/plain)], ["No results found\n"]]; $ctx->{ids} = $ids; $ctx->{prev} = $prev; $ctx->{-low_prio} = 1; require PublicInbox::MboxGz; PublicInbox::MboxGz::mbox_gz($ctx, \&all_ids_cb, 'all'); } sub results_cb { my ($ctx) = @_; my $over = $ctx->{ibx}->over or return $ctx->gone('over'); while (1) { while (defined(my $num = shift(@{$ctx->{ids}}))) { my $smsg = $over->get_art($num) or next; return $smsg; } # refill result set, deprioritize since there's many results my $srch = $ctx->{ibx}->isrch or return $ctx->gone('search'); my $mset = $srch->mset($ctx->{query}, $ctx->{qopts}); my $size = $mset->size or return; $ctx->{qopts}->{offset} += $size; $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts}); $ctx->{-low_prio} = 1; } } sub results_thread_cb { my ($ctx) = @_; my $over = $ctx->{ibx}->over or return $ctx->gone('over'); while (1) { while (defined(my $num = shift(@{$ctx->{xids}}))) { my $smsg = $over->get_art($num) or next; return $smsg; } # refills ctx->{xids} next if $over->expand_thread($ctx); # refill result set, deprioritize since there's many results my $srch = $ctx->{ibx}->isrch or return $ctx->gone('search'); my $mset = $srch->mset($ctx->{query}, $ctx->{qopts}); my $size = $mset->size or return; $ctx->{qopts}->{offset} += $size; $ctx->{ids} = $srch->mset_to_artnums($mset, $ctx->{qopts}); $ctx->{-low_prio} = 1; } } sub mbox_all { my ($ctx, $q) = @_; my $q_string = $q->{'q'}; return mbox_all_ids($ctx) if $q_string !~ /\S/; my $srch = $ctx->{ibx}->isrch or return PublicInbox::WWW::need($ctx, 'Search'); my $over = $ctx->{ibx}->over or return PublicInbox::WWW::need($ctx, 'Overview'); my $qopts = $ctx->{qopts} = { relevance => -2 }; # ORDER BY docid DESC $qopts->{threads} = 1 if $q->{t}; $srch->query_approxidate($ctx->{ibx}->git, $q_string); my $mset = $srch->mset($q_string, $qopts); $qopts->{offset} = $mset->size or return [404, [qw(Content-Type text/plain)], ["No results found\n"]]; $ctx->{query} = $q_string; $ctx->{ids} = $srch->mset_to_artnums($mset, $qopts); require PublicInbox::MboxGz; my $fn; if ($q->{t} && $srch->has_threadid) { $fn = 'results-thread-'.$q_string; PublicInbox::MboxGz::mbox_gz($ctx, \&results_thread_cb, $fn); } else { $fn = 'results-'.$q_string; PublicInbox::MboxGz::mbox_gz($ctx, \&results_cb, $fn); } } 1; public-inbox-1.9.0/lib/PublicInbox/MboxGz.pm000066400000000000000000000026741430031475700206610ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ package PublicInbox::MboxGz; use strict; use parent 'PublicInbox::GzipFilter'; use PublicInbox::Eml; use PublicInbox::Hval qw/to_filename/; use PublicInbox::Mbox; *msg_hdr = \&PublicInbox::Mbox::msg_hdr; *msg_body = \&PublicInbox::Mbox::msg_body; sub async_next ($) { my ($http) = @_; # PublicInbox::HTTP my $ctx = $http->{forward} or return; eval { $ctx->{smsg} = $ctx->{cb}->($ctx) or return $ctx->close; $ctx->smsg_blob($ctx->{smsg}); }; warn "E: $@" if $@; } sub mbox_gz { my ($self, $cb, $fn) = @_; $self->{cb} = $cb; $self->{gz} = PublicInbox::GzipFilter::gzip_or_die(); $fn = to_filename($fn // '') // 'no-subject'; # http://www.iana.org/assignments/media-types/application/gzip bless $self, __PACKAGE__; my $res_hdr = [ 'Content-Type' => 'application/gzip', 'Content-Disposition' => "inline; filename=$fn.mbox.gz" ]; $self->psgi_response(200, $res_hdr); } # called by Plack::Util::foreach or similar (generic PSGI) sub getline { my ($self) = @_; my $cb = $self->{cb} or return; while (my $smsg = $cb->($self)) { my $eml = $self->{ibx}->smsg_eml($smsg) or next; $self->zmore(msg_hdr($self, $eml)); return $self->translate(msg_body($eml)); } # signal that we're done and can return undef next call: delete $self->{cb}; $self->zflush; } no warnings 'once'; *async_eml = \&PublicInbox::Mbox::async_eml; 1; public-inbox-1.9.0/lib/PublicInbox/MboxLock.pm000066400000000000000000000067501430031475700211700ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # Various mbox locking methods package PublicInbox::MboxLock; use strict; use v5.10.1; use PublicInbox::OnDestroy; use Fcntl qw(:flock F_SETLK F_SETLKW F_RDLCK F_WRLCK O_CREAT O_EXCL O_WRONLY SEEK_SET); use Carp qw(croak); use PublicInbox::DS qw(now); # ugh... our $TMPL = do { if ($^O eq 'linux') { \'s @32' } elsif ($^O =~ /bsd/) { \'@20 s @256' } # n.b. @32 may be enough... else { eval { require File::FcntlLock; 1 } } }; # This order matches Debian policy on Linux systems. # See policy/ch-customized-programs.rst in # https://salsa.debian.org/dbnpolicy/policy.git sub defaults { [ qw(fcntl dotlock) ] } sub acq_fcntl { my ($self) = @_; my $op = $self->{nb} ? F_SETLK : F_SETLKW; my $t = $self->{rw} ? F_WRLCK : F_RDLCK; my $end = now + $self->{timeout}; $TMPL or die <{fh}, $op, pack($$TMPL, $t)); } else { my $fl = File::FcntlLock->new; $fl->l_type($t); $fl->l_whence(SEEK_SET); $fl->l_start(0); $fl->l_len(0); return if $fl->lock($self->{fh}, $op); } select(undef, undef, undef, $self->{delay}); } while (now < $end); die "fcntl lock timeout $self->{f}: $!\n"; } sub acq_dotlock { my ($self) = @_; my $dot_lock = "$self->{f}.lock"; my ($pfx, $base) = ($self->{f} =~ m!(\A.*?/)?([^/]+)\z!); $pfx //= ''; my $pid = $$; my $end = now + $self->{timeout}; do { my $tmp = "$pfx.$base-".sprintf('%x,%x,%x', rand(0xffffffff), $pid, time); if (sysopen(my $fh, $tmp, O_CREAT|O_EXCL|O_WRONLY)) { if (link($tmp, $dot_lock)) { unlink($tmp) or die "unlink($tmp): $!"; $self->{".lock$pid"} = $dot_lock; if (substr($dot_lock, 0, 1) ne '/') { opendir(my $dh, '.') or die "opendir . $!"; $self->{dh} = $dh; } return; } unlink($tmp) or die "unlink($tmp): $!"; select(undef, undef, undef, $self->{delay}); } else { croak "open $tmp (for $dot_lock): $!" if !$!{EXIST}; } } while (now < $end); die "dotlock timeout $dot_lock\n"; } sub acq_flock { my ($self) = @_; my $op = $self->{rw} ? LOCK_EX : LOCK_SH; $op |= LOCK_NB if $self->{nb}; my $end = now + $self->{timeout}; do { return if flock($self->{fh}, $op); select(undef, undef, undef, $self->{delay}); } while (now < $end); die "flock timeout $self->{f}: $!\n"; } sub acq { my ($cls, $f, $rw, $methods) = @_; my $fh; unless (open $fh, $rw ? '+>>' : '<', $f) { croak "open($f): $!" if $rw || !$!{ENOENT}; } my $self = bless { f => $f, fh => $fh, rw => $rw }, $cls; my $m = "@$methods"; if ($m ne 'none') { my @m = map { if (/\A(timeout|delay)=([0-9\.]+)s?\z/) { $self->{$1} = $2 + 0; (); } else { $cls->can("acq_$_") // $_ } } split(/[, ]/, $m); my @bad = grep { !ref } @m; croak "Unsupported lock methods: @bad\n" if @bad; croak "No lock methods supplied with $m\n" if !@m; $self->{nb} = $#m || defined($self->{timeout}); $self->{delay} //= 0.1; $self->{timeout} //= 5; $_->($self) for @m; } $self; } sub _fchdir { chdir($_[0]) } # OnDestroy callback sub DESTROY { my ($self) = @_; if (my $f = $self->{".lock$$"}) { my $x; if (my $dh = delete $self->{dh}) { opendir my $c, '.' or die "opendir . $!"; $x = PublicInbox::OnDestroy->new(\&_fchdir, $c); chdir($dh) or die "chdir (for $f): $!"; } unlink($f) or die "unlink($f): $! (lock stolen?)"; undef $x; } } 1; public-inbox-1.9.0/lib/PublicInbox/MboxReader.pm000066400000000000000000000137621430031475700215030ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # reader for mbox variants we support package PublicInbox::MboxReader; use strict; use v5.10.1; use Data::Dumper; $Data::Dumper::Useqq = 1; # should've been the default, for bad data my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^\n:]+:[^\n:]+:[^\n:]+ [^\n:]+\n/sm; # cf: https://doc.dovecot.org/configuration_manual/mail_location/mbox/ my %status2kw = (F => 'flagged', A => 'answered', R => 'seen', T => 'draft'); # O (old/non-recent), and D (deleted) aren't in JMAP, # so probably won't be supported by us. sub mbox_keywords { my $eml = $_[-1]; my $s = "@{[$eml->header_raw('X-Status'),$eml->header_raw('Status')]}"; my %kw; $s =~ s/([FART])/$kw{$status2kw{$1}} = 1/sge; [ sort(keys %kw) ]; } sub _mbox_from { my ($mbfh, $from_re, $eml_cb, @arg) = @_; my $buf = ''; my @raw; while (defined(my $r = read($mbfh, $buf, 65536, length($buf)))) { if ($r == 0) { # close here to check for "curl --fail" close($mbfh) or die "error closing mbox: \$?=$? $!"; @raw = ($buf); } else { @raw = split(/$from_strict/mos, $buf, -1); next if scalar(@raw) == 0; $buf = pop(@raw); # last bit may be incomplete } @raw = grep /[^ \t\r\n]/s, @raw; # skip empty messages while (defined(my $raw = shift @raw)) { $raw =~ s/^\r?\n\z//ms; $raw =~ s/$from_re/$1/gms; my $eml = PublicInbox::Eml->new(\$raw); $eml_cb->($eml, @arg) if $eml->raw_size; } return if $r == 0; # EOF } die "error reading mboxo/mboxrd handle: $!"; } sub mboxrd { my (undef, $mbfh, $eml_cb, @arg) = @_; _mbox_from($mbfh, qr/^>(>*From )/ms, $eml_cb, @arg); } sub mboxo { my (undef, $mbfh, $eml_cb, @arg) = @_; _mbox_from($mbfh, qr/^>(From )/ms, $eml_cb, @arg); } sub _cl_body { my ($mbfh, $bref, $cl) = @_; my $body = substr($$bref, 0, $cl, ''); my $need = $cl - length($body); if ($need > 0) { $mbfh or die "E: needed $need bytes after EOF"; defined(my $r = read($mbfh, $body, $need, length($body))) or die "E: read error: $!\n"; $r == $need or die "E: read $r of $need bytes\n"; } \$body; } sub _extract_hdr { my ($ref) = @_; if (index($$ref, "\r\n") < 0 && (my $pos = index($$ref, "\n\n")) >= 0) { # likely on *nix \substr($$ref, 0, $pos + 2, ''); # sv_chop on $$ref } elsif ($$ref =~ /\r?\n\r?\n/s) { \substr($$ref, 0, $+[0], ''); # sv_chop on $$ref } else { undef } } sub _mbox_cl ($$$;@) { my ($mbfh, $uxs_from, $eml_cb, @arg) = @_; my $buf = ''; while (defined(my $r = read($mbfh, $buf, 65536, length($buf)))) { if ($r == 0) { # detect "curl --fail" close($mbfh) or die "error closing mboxcl/mboxcl2: \$?=$? $!"; undef $mbfh; } while (my $hdr = _extract_hdr(\$buf)) { $$hdr =~ s/\A[\r\n]*From [^\n]*\n//s or die "E: no 'From ' line in:\n", Dumper($hdr); my $eml = PublicInbox::Eml->new($hdr); next unless $eml->raw_size; my @cl = $eml->header_raw('Content-Length'); my $n = scalar(@cl); $n == 0 and die "E: Content-Length missing in:\n", Dumper($eml->as_string); $n == 1 or die "E: multiple ($n) Content-Length in:\n", Dumper($eml->as_string); $cl[0] =~ /\A[0-9]+\z/ or die "E: Content-Length `$cl[0]' invalid\n", Dumper($eml->as_string); if (($eml->{bdy} = _cl_body($mbfh, \$buf, $cl[0]))) { $uxs_from and ${$eml->{bdy}} =~ s/^>From /From /sgm; } $eml_cb->($eml, @arg); } if ($r == 0) { $buf =~ /[^ \r\n\t]/ and warn "W: leftover at end of mboxcl/mboxcl2:\n", Dumper(\$buf); return; } } die "error reading mboxcl/mboxcl2 handle: $!"; } sub mboxcl { my (undef, $mbfh, $eml_cb, @arg) = @_; _mbox_cl($mbfh, 1, $eml_cb, @arg); } sub mboxcl2 { my (undef, $mbfh, $eml_cb, @arg) = @_; _mbox_cl($mbfh, undef, $eml_cb, @arg); } sub new { bless \(my $x), __PACKAGE__ } sub reads { my $ifmt = $_[-1]; $ifmt =~ /\Ambox(?:rd|cl|cl2|o)\z/ ? __PACKAGE__->can($ifmt) : undef } # all of these support -c for stdout and -d for decompression, # mutt is commonly distributed with hooks for gz, bz2 and xz, at least # { foo => '' } means "--foo" is passed to the command-line, # otherwise { foo => '--bar' } passes "--bar" my %zsfx2cmd = ( gz => [ qw(GZIP pigz gzip) ], bz2 => [ 'bzip2', {} ], xz => [ 'xz', {} ], # don't add new entries here unless MUA support is widely available ); sub zsfx ($) { my ($pathname) = @_; my $allow = join('|', keys %zsfx2cmd); $pathname =~ /\.($allow)\z/ ? $1 : undef; } sub zsfx2cmd ($$$) { my ($zsfx, $decompress, $lei) = @_; my $x = $zsfx2cmd{$zsfx} // die "BUG: no support for suffix=.$zsfx"; my @info = @$x; my $cmd_opt = ref($info[-1]) ? pop(@info) : undef; my @cmd = (undef, $decompress ? qw(-dc) : qw(-c)); require PublicInbox::Spawn; for my $exe (@info) { # I think respecting client's ENV{GZIP} is OK, not sure # about ENV overrides for other, less-common compressors if ($exe eq uc($exe)) { $exe = $lei->{env}->{$exe} or next; } $cmd[0] = PublicInbox::Spawn::which($exe) and last; } $cmd[0] // die join(' or ', @info)." missing for .$zsfx"; # not all gzip support --rsyncable, FreeBSD gzip doesn't even exit # with an error code if (!$decompress && $cmd[0] =~ m!/gzip\z! && !defined($cmd_opt)) { pipe(my ($r, $w)) or die "pipe: $!"; open my $null, '+>', '/dev/null' or die "open: $!"; my $rdr = { 0 => $null, 1 => $null, 2 => $w }; my $tst = [ $cmd[0], '--rsyncable' ]; my $pid = PublicInbox::Spawn::spawn($tst, undef, $rdr); close $w; my $err = do { local $/; <$r> }; waitpid($pid, 0) == $pid or die "BUG: waitpid: $!"; $cmd_opt = $err ? {} : { rsyncable => '' }; push(@$x, $cmd_opt); } for my $bool (keys %$cmd_opt) { my $switch = $cmd_opt->{$bool} // next; push @cmd, '--'.($switch || $bool); } for my $key (qw(rsyncable)) { # support compression level? my $switch = $cmd_opt->{$key} // next; my $val = $lei->{opt}->{$key} // next; push @cmd, $switch, $val; } \@cmd; } sub zsfxcat ($$$) { my ($in, $zsfx, $lei) = @_; my $cmd = zsfx2cmd($zsfx, 1, $lei); PublicInbox::Spawn::popen_rd($cmd, undef, { 0 => $in, 2 => $lei->{2} }); } 1; public-inbox-1.9.0/lib/PublicInbox/MdirReader.pm000066400000000000000000000057311430031475700214660ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # Maildirs for now, MH eventually # ref: https://cr.yp.to/proto/maildir.html # https://wiki2.dovecot.org/MailboxFormat/Maildir package PublicInbox::MdirReader; use strict; use v5.10.1; use PublicInbox::InboxWritable qw(eml_from_path); use Digest::SHA qw(sha256_hex); # returns Maildir flags from a basename ('' for no flags, undef for invalid) sub maildir_basename_flags { my (@f) = split(/:/, $_[0], -1); return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.'); $f[1] // return ''; # "new" $f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur" } # same as above, but for full path name sub maildir_path_flags { my ($f) = @_; my $i = rindex($f, '/'); $i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef; } sub shard_ok ($$$) { my ($bn, $mod, $shard) = @_; # can't get dirent.d_ino w/ pure Perl readdir, so we extract # the OID if it looks like one instead of doing stat(2) my $hex = $bn =~ m!\A([a-f0-9]{40,})! ? $1 : sha256_hex($bn); my $recno = hex(substr($hex, 0, 8)); ($recno % $mod) == $shard; } sub maildir_each_file { my ($self, $dir, $cb, @arg) = @_; $dir .= '/' unless substr($dir, -1) eq '/'; my ($mod, $shard) = @{$self->{shard_info} // []}; for my $d (qw(new/ cur/)) { my $pfx = $dir.$d; opendir my $dh, $pfx or next; while (defined(my $bn = readdir($dh))) { my $fl = maildir_basename_flags($bn) // next; next if defined($mod) && !shard_ok($bn, $mod, $shard); next if index($fl, 'T') >= 0; # no Trashed messages $cb->($pfx.$bn, $fl, @arg); } } } my %c2kw = ('D' => 'draft', F => 'flagged', P => 'forwarded', R => 'answered', S => 'seen'); sub maildir_each_eml { my ($self, $dir, $cb, @arg) = @_; $dir .= '/' unless substr($dir, -1) eq '/'; my ($mod, $shard) = @{$self->{shard_info} // []}; my $pfx = $dir . 'new/'; if (opendir(my $dh, $pfx)) { while (defined(my $bn = readdir($dh))) { next if substr($bn, 0, 1) eq '.'; my @f = split(/:/, $bn, -1); # mbsync and offlineimap both use "2," in "new/" next if ($f[1] // '2,') ne '2,' || defined($f[2]); next if defined($mod) && !shard_ok($bn, $mod, $shard); my $f = $pfx.$bn; my $eml = eml_from_path($f) or next; $cb->($f, [], $eml, @arg); } } $pfx = $dir . 'cur/'; opendir my $dh, $pfx or return; while (defined(my $bn = readdir($dh))) { my $fl = maildir_basename_flags($bn) // next; next if index($fl, 'T') >= 0; next if defined($mod) && !shard_ok($bn, $mod, $shard); my $f = $pfx.$bn; my $eml = eml_from_path($f) or next; my @kw = sort(map { $c2kw{$_} // () } split(//, $fl)); $cb->($f, \@kw, $eml, @arg); } } sub new { bless {}, __PACKAGE__ } sub flags2kw ($) { if (wantarray) { my @unknown; my %kw; for (split(//, $_[0])) { my $k = $c2kw{$_}; if (defined($k)) { $kw{$k} = 1; } else { push @unknown, $_; } } (\%kw, \@unknown); } else { [ sort(map { $c2kw{$_} // () } split(//, $_[0])) ]; } } 1; public-inbox-1.9.0/lib/PublicInbox/MiscIdx.pm000066400000000000000000000110771430031475700210100ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # like PublicInbox::SearchIdx, but for searching for non-mail messages. # Things indexed include: # * inboxes themselves # * epoch information # * (maybe) git code repository information # Expect ~100K-1M documents with no parallelism opportunities, # so no sharding, here. # # See MiscSearch for read-only counterpart package PublicInbox::MiscIdx; use strict; use v5.10.1; use PublicInbox::InboxWritable; use PublicInbox::Search; # for SWIG Xapian and Search::Xapian compat use PublicInbox::SearchIdx qw(index_text term_generator add_val); use Carp qw(croak); use File::Path (); use PublicInbox::MiscSearch; use PublicInbox::Config; use PublicInbox::Syscall; my $json; sub new { my ($class, $eidx) = @_; PublicInbox::SearchIdx::load_xapian_writable(); my $mi_dir = "$eidx->{xpfx}/misc"; File::Path::mkpath($mi_dir); PublicInbox::Syscall::nodatacow_dir($mi_dir); my $flags = $PublicInbox::SearchIdx::DB_CREATE_OR_OPEN; $flags |= $PublicInbox::SearchIdx::DB_NO_SYNC if $eidx->{-no_fsync}; $flags |= $PublicInbox::SearchIdx::DB_DANGEROUS if $eidx->{-dangerous}; $json //= PublicInbox::Config::json(); bless { mi_dir => $mi_dir, flags => $flags, indexlevel => 'full', # small DB, no point in medium? }, $class; } sub _begin_txn ($) { my ($self) = @_; my $wdb = $PublicInbox::Search::X{WritableDatabase}; my $xdb = eval { $wdb->new($self->{mi_dir}, $self->{flags}) }; croak "Failed opening $self->{mi_dir}: $@" if $@; $xdb->begin_transaction; $xdb; } sub commit_txn { my ($self) = @_; my $xdb = delete $self->{xdb} or return; $xdb->commit_transaction; } sub create_xdb { my ($self) = @_; $self->{xdb} //= _begin_txn($self); commit_txn($self); } sub remove_eidx_key { my ($self, $eidx_key) = @_; my $xdb = $self->{xdb} //= _begin_txn($self); my $head = $xdb->postlist_begin('Q'.$eidx_key); my $tail = $xdb->postlist_end('Q'.$eidx_key); my @docids; # only one, unless we had bugs for (; $head != $tail; $head++) { push @docids, $head->get_docid; } for my $docid (@docids) { $xdb->delete_document($docid); warn "I: remove inbox docid #$docid ($eidx_key)\n"; } } # adds or updates according to $eidx_key sub index_ibx { my ($self, $ibx) = @_; my $eidx_key = $ibx->eidx_key; my $xdb = $self->{xdb} //= _begin_txn($self); # Q = uniQue in Xapian terminology my $head = $xdb->postlist_begin('Q'.$eidx_key); my $tail = $xdb->postlist_end('Q'.$eidx_key); my ($docid, @drop); for (; $head != $tail; $head++) { if (defined $docid) { my $i = $head->get_docid; push @drop, $i; warn <get_docid; } } $xdb->delete_document($_) for @drop; # just in case my $doc = $PublicInbox::Search::X{Document}->new; term_generator($self)->set_document($doc); # allow sorting by modified and uidvalidity (created at) add_val($doc, $PublicInbox::MiscSearch::MODIFIED, $ibx->modified); add_val($doc, $PublicInbox::MiscSearch::UIDVALIDITY, $ibx->uidvalidity); $doc->add_boolean_term('Q'.$eidx_key); # uniQue id $doc->add_boolean_term('T'.'inbox'); # Type # force reread from disk, {description} could be loaded from {misc} delete @$ibx{qw(-art_min -art_max description)}; if (defined($ibx->{newsgroup}) && $ibx->nntp_usable) { $doc->add_boolean_term('T'.'newsgroup'); # additional Type my $n = $ibx->art_min; add_val($doc, $PublicInbox::MiscSearch::ART_MIN, $n) if $n; $n = $ibx->art_max; add_val($doc, $PublicInbox::MiscSearch::ART_MAX, $n) if $n; } my $desc = $ibx->description; # description = S/Subject (or title) # address = A/Author index_text($self, $desc, 1, 'S'); index_text($self, $ibx->{name}, 1, 'XNAME'); my %map = ( address => 'A', listid => 'XLISTID', infourl => 'XINFOURL', url => 'XURL' ); while (my ($f, $pfx) = each %map) { for my $v (@{$ibx->{$f} // []}) { index_text($self, $v, 1, $pfx); } } my $data = {}; if (defined(my $max = $ibx->max_git_epoch)) { # v2 my $pfx = "/$ibx->{name}/git/"; for my $epoch (0..$max) { my $git = $ibx->git_epoch($epoch) or return; if (my $ent = $git->manifest_entry($epoch, $desc)) { $data->{"$pfx$epoch.git"} = $ent; $ent->{git_dir} = $git->{git_dir}; } $git->cleanup; # ->modified starts cat-file --batch } } elsif (my $ent = $ibx->git->manifest_entry) { # v1 $ent->{git_dir} = $ibx->{inboxdir}; $data->{"/$ibx->{name}"} = $ent; } $doc->set_data($json->encode($data)); if (defined $docid) { $xdb->replace_document($docid, $doc); } else { $xdb->add_document($doc); } } 1; public-inbox-1.9.0/lib/PublicInbox/MiscSearch.pm000066400000000000000000000105731430031475700214710ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # read-only counterpart to MiscIdx package PublicInbox::MiscSearch; use strict; use v5.10.1; use PublicInbox::Search qw(retry_reopen int_val xap_terms); my $json; # Xapian value columns: our $MODIFIED = 0; our $UIDVALIDITY = 1; # (created time) our $ART_MIN = 2; # NNTP article number our $ART_MAX = 3; # NNTP article number # avoid conflicting with message Search::prob_prefix for UI/UX reasons my %PROB_PREFIX = ( description => 'S', # $INBOX_DIR/description address => 'A', listid => 'XLISTID', url => 'XURL', infourl => 'XINFOURL', name => 'XNAME', '' => 'S A XLISTID XNAME XURL XINFOURL' ); sub new { my ($class, $dir) = @_; PublicInbox::Search::load_xapian(); $json //= PublicInbox::Config::json(); bless { xdb => $PublicInbox::Search::X{Database}->new($dir) }, $class; } # read-only sub mi_qp_new ($) { my ($self) = @_; my $xdb = $self->{xdb}; my $qp = $PublicInbox::Search::X{QueryParser}->new; $qp->set_default_op(PublicInbox::Search::OP_AND()); $qp->set_database($xdb); $qp->set_stemmer(PublicInbox::Search::stemmer($self)); $qp->set_stemming_strategy(PublicInbox::Search::STEM_SOME()); my $cb = $qp->can('set_max_wildcard_expansion') // $qp->can('set_max_expansion'); # Xapian 1.5.0+ $cb->($qp, 100); $cb = $qp->can('add_valuerangeprocessor') // $qp->can('add_rangeprocessor'); # Xapian 1.5.0+ while (my ($name, $prefix) = each %PROB_PREFIX) { $qp->add_prefix($name, $_) for split(/ /, $prefix); } $qp->add_boolean_prefix('type', 'T'); $qp; } sub misc_enquire_once { # retry_reopen callback my ($self, $qr, $opt) = @_; my $eq = $PublicInbox::Search::X{Enquire}->new($self->{xdb}); $eq->set_query($qr); my $desc = !$opt->{asc}; my $rel = $opt->{relevance} // 0; if ($rel == -1) { # ORDER BY docid $eq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING); $eq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new); } elsif ($rel) { $eq->set_sort_by_relevance_then_value($MODIFIED, $desc); } else { $eq->set_sort_by_value_then_relevance($MODIFIED, $desc); } $eq->get_mset($opt->{offset} || 0, $opt->{limit} || 200); } sub mset { my ($self, $qs, $opt) = @_; $opt ||= {}; reopen($self); my $qp = $self->{qp} //= mi_qp_new($self); $qs = 'type:inbox' if $qs eq ''; my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS); $opt->{relevance} = 1 unless exists $opt->{relevance}; retry_reopen($self, \&misc_enquire_once, $qr, $opt); } sub ibx_data_once { my ($self, $ibx) = @_; my $xdb = $self->{xdb}; my $term = 'Q'.$ibx->eidx_key; # may be {inboxdir}, so private my $head = $xdb->postlist_begin($term); my $tail = $xdb->postlist_end($term); return if $head == $tail; my $doc = $xdb->get_document($head->get_docid); $ibx->{uidvalidity} //= int_val($doc, $UIDVALIDITY); $ibx->{-modified} = int_val($doc, $MODIFIED); $ibx->{-art_min} = int_val($doc, $ART_MIN); $ibx->{-art_max} = int_val($doc, $ART_MAX); $doc->get_data; } sub doc2ibx_cache_ent { # @_ == ($self, $doc) OR ($doc) my ($doc) = $_[-1]; my $d; my $data = $json->decode($doc->get_data); for (values %$data) { $d = $_->{description} // next; $d =~ s/ \[epoch [0-9]+\]\z// or next; last; } { uidvalidity => int_val($doc, $UIDVALIDITY), -modified => int_val($doc, $MODIFIED), -art_min => int_val($doc, $ART_MIN), # may be undef -art_max => int_val($doc, $ART_MAX), # may be undef # extract description from manifest.js.gz epoch description description => $d }; } sub inbox_data { my ($self, $ibx) = @_; retry_reopen($self, \&ibx_data_once, $ibx); } sub ibx_cache_load { my ($doc, $cache) = @_; my ($eidx_key) = xap_terms('Q', $doc); return unless defined($eidx_key); # expired $cache->{$eidx_key} = doc2ibx_cache_ent($doc); } sub _nntpd_cache_load { # retry_reopen callback my ($self) = @_; my $opt = { limit => $self->{xdb}->get_doccount * 10, relevance => -1 }; my $mset = mset($self, 'type:newsgroup type:inbox', $opt); my $cache = {}; for my $it ($mset->items) { ibx_cache_load($it->get_document, $cache); } $cache } # returns { newsgroup => $cache_entry } mapping, $cache_entry contains # anything which may trigger seeks at startup, currently: description, # -modified, and uidvalidity. sub nntpd_cache_load { my ($self) = @_; retry_reopen($self, \&_nntpd_cache_load); } no warnings 'once'; *reopen = \&PublicInbox::Search::reopen; 1; public-inbox-1.9.0/lib/PublicInbox/MsgIter.pm000066400000000000000000000067621430031475700210270ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # read-only utilities for Email::MIME package PublicInbox::MsgIter; use strict; use warnings; use base qw(Exporter); our @EXPORT = qw(msg_iter msg_part_text); # This becomes PublicInbox::MIME->each_part: # Like Email::MIME::walk_parts, but this is: # * non-recursive # * passes depth and indices to the iterator callback sub em_each_part ($$;$$) { my ($mime, $cb, $cb_arg, $do_undef) = @_; my @parts = $mime->subparts; if (@parts) { $mime = $_[0] = undef if $do_undef; # saves some memory my $i = 0; @parts = map { [ $_, 1, ++$i ] } @parts; while (my $p = shift @parts) { my ($part, $depth, $idx) = @$p; my @sub = $part->subparts; if (@sub) { $depth++; $i = 0; @sub = map { [ $_, $depth, "$idx.".(++$i) ] } @sub; @parts = (@sub, @parts); } else { $cb->($p, $cb_arg); } } } else { $cb->([$mime, 0, 1], $cb_arg); } } # Use this when we may accept Email::MIME from user scripts # (not just PublicInbox::MIME) sub msg_iter ($$;$$) { # $_[0] = PublicInbox::MIME/Email::MIME-like obj my (undef, $cb, $cb_arg, $once) = @_; if (my $ep = $_[0]->can('each_part')) { # PublicInbox::{MIME,*} $ep->($_[0], $cb, $cb_arg, $once); } else { # for compatibility with existing Email::MIME users: em_each_part($_[0], $cb, $cb_arg, $once); } } sub msg_part_text ($$) { my ($part, $ct) = @_; # TODO: we may offer a separate sub for people who need to index # HTML-only mail, but the majority of HTML mail is multipart/alternative # with a text part which we don't have to waste cycles decoding return if $ct =~ m!\btext/x?html\b!; my $s = eval { $part->body_str }; my $err = $@; # text/plain is the default, multipart/mixed happened a few # times when it should not have been: # <87llgalspt.fsf@free.fr> # <200308111450.h7BEoOu20077@mail.osdl.org> # But also do not try this with ->{is_submsg} (message/rfc822), # since a broken multipart/mixed inside a message/rfc822 part # has not been seen in the wild, yet... if ($err && ($ct =~ m!\btext/\b!i || (!$part->{is_submsg} && $ct =~ m!\bmultipart/mixed\b!i) ) ) { my $cte = $part->header_raw('Content-Transfer-Encoding'); if (defined($cte) && $cte =~ /\b7bit\b/i) { $s = $part->body; $err = undef if $s =~ /\A[[:ascii:]]+\z/s; } else { # Try to assume UTF-8 because Alpine seems to # do wacky things and set charset=X-UNKNOWN $part->charset_set('UTF-8'); $s = eval { $part->body_str }; } # If forcing charset=UTF-8 failed, # caller will warn further down... $s = $part->body if $@; } elsif ($err && $ct =~ m!\bapplication/octet-stream\b!i) { # Some unconfigured/poorly-configured MUAs will set # application/octet-stream even for all text attachments. # Try to see if it's printable text that we can index # and display: $s = $part->body; utf8::decode($s); undef($s =~ /[^\p{XPosixPrint}\s]/s ? $s : $err); } ($s, $err); } # returns an array of quoted or unquoted sections sub split_quotes { # some editors don't put trailing newlines at the end, # make sure split_quotes can work: $_[0] .= "\n" if substr($_[0], -1) ne "\n"; # Quiet "Complex regular subexpression recursion limit" warning # in case an inconsiderate sender quotes 32K of text at once. # The warning from Perl is harmless for us since our callers can # tolerate less-than-ideal matches which work within Perl limits. no warnings 'regexp'; split(/((?:^>[^\n]*\n)+)/sm, $_[0]); } 1; public-inbox-1.9.0/lib/PublicInbox/MsgTime.pm000066400000000000000000000123701430031475700210120ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ # Various date/time-related functions package PublicInbox::MsgTime; use strict; use warnings; use base qw(Exporter); our @EXPORT_OK = qw(msg_timestamp msg_datestamp); use Time::Local qw(timegm); my @MoY = qw(january february march april may june july august september october november december); my %MoY; @MoY{@MoY} = (0..11); @MoY{map { substr($_, 0, 3) } @MoY} = (0..11); my %OBSOLETE_TZ = ( # RFC2822 4.3 (Obsolete Date and Time) EST => '-0500', EDT => '-0400', CST => '-0600', CDT => '-0500', MST => '-0700', MDT => '-0600', PST => '-0800', PDT => '-0700', UT => '+0000', GMT => '+0000', Z => '+0000', # RFC2822 states: # The 1 character military time zones were defined in a non-standard # way in [RFC822] and are therefore unpredictable in their meaning. ); my $OBSOLETE_TZ = join('|', keys %OBSOLETE_TZ); sub str2date_zone ($) { my ($date) = @_; my ($ts, $zone); # RFC822 is most likely for email, but we can tolerate an extra comma # or punctuation as long as all the data is there. # We'll use '\s' since Unicode spaces won't affect our parsing. # SpamAssassin ignores commas and redundant spaces, too. if ($date =~ /(?:[A-Za-z]+,?\s+)? # day-of-week ([0-9]+),?\s+ # dd ([A-Za-z]+)\s+ # mon ([0-9]{2,4})\s+ # YYYY or YY (or YYY :P) ([0-9]+)[:\.] # HH: ((?:[0-9]{2})|(?:\s?[0-9])) # MM (?:[:\.]((?:[0-9]{2})|(?:\s?[0-9])))? # :SS \s+ # a TZ offset is required: ([\+\-])? # TZ sign [\+\-]* # I've seen extra "-" e.g. "--500" ([0-9]+|$OBSOLETE_TZ)(?:\s|$) # TZ offset /xo) { my ($dd, $m, $yyyy, $hh, $mm, $ss, $sign, $tz) = ($1, $2, $3, $4, $5, $6, $7, $8); # don't accept non-English months defined(my $mon = $MoY{lc($m)}) or return; if (defined(my $off = $OBSOLETE_TZ{$tz})) { $sign = substr($off, 0, 1); $tz = substr($off, 1); } # Y2K problems: 3-digit years, follow RFC2822 if (length($yyyy) <= 3) { $yyyy += 1900; # and 2-digit years from '09 (2009) (0..49) $yyyy += 100 if $yyyy < 1950; } $ts = timegm($ss // 0, $mm, $hh, $dd, $mon, $yyyy); # 4-digit dates in non-spam from 1900s and 1910s exist in # lore archives return if $ts < 0; # Compute the time offset from [+-]HHMM $tz //= 0; my ($tz_hh, $tz_mm); if (length($tz) == 1) { $tz_hh = $tz; $tz_mm = 0; } elsif (length($tz) == 2) { $tz_hh = 0; $tz_mm = $tz; } else { $tz_hh = $tz; $tz_hh =~ s/([0-9]{2})\z//; $tz_mm = $1; } while ($tz_mm >= 60) { $tz_mm -= 60; $tz_hh += 1; } $sign //= '+'; my $off = $sign . ($tz_mm * 60 + ($tz_hh * 60 * 60)); $ts -= $off; $sign = '+' if $off == 0; $zone = sprintf('%s%02d%02d', $sign, $tz_hh, $tz_mm); # Time::Zone and Date::Parse are part of the same distribution, # and we need Time::Zone to deal with tz names like "EDT" } elsif (eval { require Date::Parse }) { $ts = Date::Parse::str2time($date); return undef unless(defined $ts); # off is the time zone offset in seconds from GMT my ($ss,$mm,$hh,$day,$month,$year,$off) = Date::Parse::strptime($date); return unless defined($year); $off //= 0; # Compute the time zone from offset my $sign = ($off < 0) ? '-' : '+'; my $hour = abs(int($off / 3600)); my $min = ($off / 60) % 60; # deal with weird offsets like '-0420' properly $min = 60 - $min if ($min && $off < 0); $zone = sprintf('%s%02d%02d', $sign, $hour, $min); } else { warn "Date::Parse missing for non-RFC822 date: $date\n"; return undef; } # Note: we've already applied the offset to $ts at this point, # but we want to keep "git fsck" happy. # "-1200" is the furthest westermost zone offset, # but git fast-import is liberal so we use "-1400" if ($zone >= 1400 || $zone <= -1400) { warn "bogus TZ offset: $zone, ignoring and assuming +0000\n"; $zone = '+0000'; } [$ts, $zone]; } sub time_response ($) { my ($ret) = @_; wantarray ? @$ret : $ret->[0]; } sub msg_received_at ($) { my ($hdr) = @_; # PublicInbox::Eml my @recvd = $hdr->header_raw('Received'); my ($ts); foreach my $r (@recvd) { $r =~ /\s*([0-9]+\s+[a-zA-Z]+\s+[0-9]{2,4}\s+ [0-9]+[^0-9][0-9]+(?:[^0-9][0-9]+) \s+([\+\-][0-9]+))/sx or next; $ts = eval { str2date_zone($1) } and return $ts; my $mid = $hdr->header_raw('Message-ID'); warn "no date in $mid Received: $r\n"; } undef; } sub msg_date_only ($) { my ($hdr) = @_; # PublicInbox::Eml my @date = $hdr->header_raw('Date'); my ($ts); foreach my $d (@date) { $ts = eval { str2date_zone($d) } and return $ts; if ($@) { my $mid = $hdr->header_raw('Message-ID'); warn "bad Date: $d in $mid: $@\n"; } } undef; } # Favors Received header for sorting globally sub msg_timestamp ($;$) { my ($hdr, $fallback) = @_; # PublicInbox::Eml my $ret; $ret = msg_received_at($hdr) and return time_response($ret); $ret = msg_date_only($hdr) and return time_response($ret); time_response([ $fallback // time, '+0000' ]); } # Favors the Date: header for display and sorting within a thread sub msg_datestamp ($;$) { my ($hdr, $fallback) = @_; # PublicInbox::Eml my $ret; $ret = msg_date_only($hdr) and return time_response($ret); $ret = msg_received_at($hdr) and return time_response($ret); time_response([ $fallback // time, '+0000' ]); } 1; public-inbox-1.9.0/lib/PublicInbox/Msgmap.pm000066400000000000000000000155251430031475700206760ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # bidirectional Message-ID <-> Article Number mapping for the NNTP # and web interfaces. This is required for implementing stable article # numbers for NNTP and allows prefix lookups for partial Message-IDs # in case URLs get truncated from copy-n-paste errors by users. # # This is maintained by ::SearchIdx (v1) and ::V2Writable (v2) package PublicInbox::Msgmap; use strict; use v5.10.1; use DBI; use DBD::SQLite; use PublicInbox::Over; use Scalar::Util qw(blessed); sub new_file { my ($class, $ibx, $rw) = @_; my $f; if (blessed($ibx)) { $f = $ibx->mm_file; $rw = 2 if $rw && $ibx->{-no_fsync}; } else { $f = $ibx; } return if !$rw && !-r $f; my $self = bless { filename => $f }, $class; my $dbh = $self->{dbh} = PublicInbox::Over::dbh_new($self, $rw); if ($rw) { $dbh->begin_work; create_tables($dbh); unless ($self->created_at) { my $t; if (blessed($ibx) && -f "$ibx->{inboxdir}/inbox.config.example") { $t = (stat(_))[9]; # mtime set by "curl -R" } $self->created_at($t // time); } $self->num_highwater(max($self)); $dbh->commit; } $self; } # used to keep track of used numeric mappings for v2 reindex sub tmp_clone { my ($self, $dir) = @_; require File::Temp; my $tmp = "mm_tmp-$$-XXXX"; my ($fh, $fn) = File::Temp::tempfile($tmp, EXLOCK => 0, DIR => $dir); require PublicInbox::Syscall; PublicInbox::Syscall::nodatacow_fh($fh); $self->{dbh}->sqlite_backup_to_file($fn); $tmp = ref($self)->new_file($fn, 2); $tmp->{dbh}->do('PRAGMA journal_mode = MEMORY'); $tmp->{pid} = $$; $tmp; } # n.b. invoked directly by scripts/xhdr-num2mid sub meta_accessor { my ($self, $key, $value) = @_; my $sql = 'SELECT val FROM meta WHERE key = ? LIMIT 1'; my $prev = $self->{dbh}->selectrow_array($sql, undef, $key); $value // return $prev; if (defined $prev) { $sql = 'UPDATE meta SET val = ? WHERE key = ?'; $self->{dbh}->do($sql, undef, $value, $key); } else { $sql = 'INSERT INTO meta (key,val) VALUES (?,?)'; $self->{dbh}->do($sql, undef, $key, $value); } $prev; } sub last_commit { my ($self, $commit) = @_; $self->meta_accessor('last_commit', $commit); } # v2 uses this to keep track of how up-to-date Xapian is # old versions may be automatically GC'ed away in the future, # but it's a trivial amount of storage. sub last_commit_xap { my ($self, $version, $i, $commit) = @_; $self->meta_accessor("last_xap$version-$i", $commit); } # this is the UIDVALIDITY for IMAP (cf. RFC 3501 sec 2.3.1.1. item 3) sub created_at { my ($self, $second) = @_; $self->meta_accessor('created_at', $second); } sub num_highwater { my ($self, $num) = @_; my $high = $self->meta_accessor('num_highwater'); if (defined($num) && (!defined($high) || ($num > $high))) { $high = $num; $self->meta_accessor('num_highwater', $num); } $high } sub mid_insert { my ($self, $mid) = @_; my $sth = $self->{dbh}->prepare_cached(<<''); INSERT INTO msgmap (mid) VALUES (?) return unless eval { $sth->execute($mid) }; my $num = $self->{dbh}->last_insert_id(undef, undef, 'msgmap', 'num'); $self->num_highwater($num) if defined($num); $num; } sub mid_for { my ($self, $num) = @_; my $sth = $self->{dbh}->prepare_cached(<<"", undef, 1); SELECT mid FROM msgmap WHERE num = ? LIMIT 1 $sth->execute($num); $sth->fetchrow_array; } sub num_for { my ($self, $mid) = @_; my $sth = $self->{dbh}->prepare_cached(<<"", undef, 1); SELECT num FROM msgmap WHERE mid = ? LIMIT 1 $sth->execute($mid); $sth->fetchrow_array; } sub max { my $sth = $_[0]->{dbh}->prepare_cached('SELECT MAX(num) FROM msgmap', undef, 1); $sth->execute; $sth->fetchrow_array // 0; } sub min { my $sth = $_[0]->{dbh}->prepare_cached('SELECT MIN(num) FROM msgmap', undef, 1); $sth->execute; $sth->fetchrow_array // 0; } sub minmax { # breaking MIN and MAX into separate queries speeds up from 250ms # to around 700us with 2.7million messages. (min($_[0]), max($_[0])); } sub mid_delete { my ($self, $mid) = @_; $self->{dbh}->do('DELETE FROM msgmap WHERE mid = ?', undef, $mid); } sub num_delete { my ($self, $num) = @_; $self->{dbh}->do('DELETE FROM msgmap WHERE num = ?', undef, $num); } sub create_tables { my ($dbh) = @_; $dbh->do(<<''); CREATE TABLE IF NOT EXISTS msgmap ( num INTEGER PRIMARY KEY AUTOINCREMENT, mid VARCHAR(1000) NOT NULL, UNIQUE (mid) ) $dbh->do(<<''); CREATE TABLE IF NOT EXISTS meta ( key VARCHAR(32) PRIMARY KEY, val VARCHAR(255) NOT NULL ) } sub msg_range { my ($self, $beg, $end, $cols) = @_; $cols //= 'num,mid'; my $attr = { Columns => [] }; my $mids = $self->{dbh}->selectall_arrayref(<<"", $attr, $$beg, $end); SELECT $cols FROM msgmap WHERE num >= ? AND num <= ? ORDER BY num ASC LIMIT 1000 $$beg = $mids->[-1]->[0] + 1 if @$mids; $mids } # only used for mapping external serial numbers (e.g. articles from gmane) # see scripts/xhdr-num2mid or PublicInbox::Filter::RubyLang for usage sub mid_set { my ($self, $num, $mid) = @_; my $sth = $self->{dbh}->prepare_cached(<<""); INSERT OR IGNORE INTO msgmap (num,mid) VALUES (?,?) my $result = $sth->execute($num, $mid); $self->num_highwater($num) if (defined($result) && $result == 1); $result; } sub DESTROY { my ($self) = @_; my $dbh = $self->{dbh} or return; if (($self->{pid} // 0) == $$) { my $f = $dbh->sqlite_db_filename; unlink $f or warn "failed to unlink $f: $!\n"; } } sub atfork_parent { my ($self) = @_; $self->{pid} or die 'BUG: not a temporary clone'; $self->{dbh} and die 'BUG: tmp_clone dbh not prepared for parent'; defined($self->{filename}) or die 'BUG: {filename} not defined'; $self->{dbh} = PublicInbox::Over::dbh_new($self, 2); $self->{dbh}->do('PRAGMA journal_mode = MEMORY'); } sub atfork_prepare { my ($self) = @_; my $pid = $self->{pid} or die 'BUG: not a temporary clone'; $pid == $$ or die "BUG: atfork_prepare not called by $pid"; my $dbh = $self->{dbh} or die 'BUG: temporary clone not open'; # must clobber prepared statements %$self = (filename => $dbh->sqlite_db_filename, pid => $pid); } sub skip_artnum { my ($self, $skip_artnum) = @_; return meta_accessor($self, 'skip_artnum') if !defined($skip_artnum); my $cur = num_highwater($self) // 0; if ($skip_artnum < $cur) { die "E: current article number $cur ", "exceeds --skip-artnum=$skip_artnum\n"; } else { my $ok; for (1..10) { my $mid = 'skip'.rand.'@'.rand.'.example.com'; $ok = mid_set($self, $skip_artnum, $mid); if ($ok) { mid_delete($self, $mid); last; } } $ok or die '--skip-artnum failed'; # in the future, the indexer may use this value for # new messages in old epochs meta_accessor($self, 'skip_artnum', $skip_artnum); } } sub check_inodes { my ($self) = @_; $self->{dbh} // return; my $rw = !$self->{dbh}->{ReadOnly}; PublicInbox::Over::check_inodes($self); $self->{dbh} //= PublicInbox::Over::dbh_new($self, !$rw); } 1; public-inbox-1.9.0/lib/PublicInbox/MultiGit.pm000066400000000000000000000073171430031475700212100ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # common git alternates + all.git||ALL.git management code package PublicInbox::MultiGit; use strict; use v5.10.1; use PublicInbox::Spawn qw(run_die); use PublicInbox::Import; use File::Temp 0.19; use List::Util qw(max); sub new { my ($cls, $topdir, $all, $epfx) = @_; bless { topdir => $topdir, # inboxdir || extindex.*.topdir all => $all, # all.git or ALL.git epfx => $epfx, # "git" (inbox) or "local" (lei/store) }, $cls; } sub read_alternates { my ($self, $moderef, $prune) = @_; my $objpfx = "$self->{topdir}/$self->{all}/objects/"; my $f = "${objpfx}info/alternates"; my %alt; # line => score my %seen; # $st_dev\0$st_ino => count my $other = 0; if (open(my $fh, '<', $f)) { my $is_edir = defined($self->{epfx}) ? qr!\A\Q../../$self->{epfx}\E/([0-9]+)\.git/objects\z! : undef; $$moderef = (stat($fh))[2] & 07777; for my $rel (split(/^/m, do { local $/; <$fh> })) { chomp(my $dir = $rel); my $score; if (defined($is_edir) && $dir =~ $is_edir) { $score = $1 + 0; substr($dir, 0, 0) = $objpfx; } else { # absolute paths, if any (extindex) $score = --$other; } if (my @st = stat($dir)) { next if $seen{"$st[0]\0$st[1]"}++; $alt{$rel} = $score; } else { warn "W: stat($dir) failed: $! ($f)"; if ($prune) { ++$$prune; } else { $alt{$rel} = $score; } } } } elsif (!$!{ENOENT}) { die "E: open($f): $!"; } (\%alt, \%seen); } sub epoch_dir { "$_[0]->{topdir}/$_[0]->{epfx}" } sub write_alternates { my ($self, $mode, $alt, @new) = @_; my $all_dir = "$self->{topdir}/$self->{all}"; PublicInbox::Import::init_bare($all_dir); my $out = join('', sort { $alt->{$b} <=> $alt->{$a} } keys %$alt); my $info_dir = "$all_dir/objects/info"; my $fh = File::Temp->new(TEMPLATE => 'alt-XXXX', DIR => $info_dir); my $f = $fh->filename; print $fh $out, @new or die "print($f): $!"; chmod($mode, $fh) or die "fchmod($f): $!"; close $fh or die "close($f): $!"; my $fn = "$info_dir/alternates"; rename($f, $fn) or die "rename($f, $fn): $!"; $fh->unlink_on_destroy(0); } # returns true if new epochs exist sub merge_epochs { my ($self, $alt, $seen) = @_; my $epoch_dir = epoch_dir($self); if (opendir my $dh, $epoch_dir) { my $has_new; for my $bn (grep(/\A[0-9]+\.git\z/, readdir($dh))) { my $rel = "../../$self->{epfx}/$bn/objects\n"; next if exists($alt->{$rel}); if (my @st = stat("$epoch_dir/$bn/objects")) { next if $seen->{"$st[0]\0$st[1]"}++; $alt->{$rel} = substr($bn, 0, -4) + 0; $has_new = 1; } else { warn "E: stat($epoch_dir/$bn/objects): $!"; } } $has_new; } else { $!{ENOENT} ? undef : die "opendir($epoch_dir): $!"; } } sub fill_alternates { my ($self) = @_; my ($alt, $seen) = read_alternates($self, \(my $mode = 0644)); merge_epochs($self, $alt, $seen) and write_alternates($self, $mode, $alt); } sub epoch_cfg_set { my ($self, $epoch_nr) = @_; run_die([qw(git config -f), epoch_dir($self)."/$epoch_nr.git/config", 'include.path', "../../$self->{all}/config" ]); } sub add_epoch { my ($self, $epoch_nr) = @_; my $git_dir = epoch_dir($self)."/$epoch_nr.git"; my $f = "$git_dir/config"; my $existing = -f $f; PublicInbox::Import::init_bare($git_dir); epoch_cfg_set($self, $epoch_nr) unless $existing; fill_alternates($self); $git_dir; } sub git_epochs { my ($self) = @_; if (opendir(my $dh, epoch_dir($self))) { my @epochs = map { substr($_, 0, -4) + 0; # drop ".git" suffix } grep(/\A[0-9]+\.git\z/, readdir($dh)); wantarray ? sort { $b <=> $a } @epochs : (max(@epochs) // 0); } elsif ($!{ENOENT}) { wantarray ? () : 0; } else { die(epoch_dir($self).": $!"); } } 1; public-inbox-1.9.0/lib/PublicInbox/NNTP.pm000066400000000000000000000656041430031475700202340ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # # Each instance of this represents a NNTP client socket # fields: # nntpd: PublicInbox::NNTPD ref # article: per-session current article number # ibx: PublicInbox::Inbox ref # long_cb: long_response private data package PublicInbox::NNTP; use strict; use v5.10.1; use parent qw(PublicInbox::DS); use PublicInbox::MID qw(mid_escape $MID_EXTRACT); use PublicInbox::Eml; use POSIX qw(strftime); use PublicInbox::DS qw(now); use Digest::SHA qw(sha1_hex); use Time::Local qw(timegm timelocal); use PublicInbox::GitAsyncCat; use PublicInbox::Address; use constant { LINE_MAX => 512, # RFC 977 section 2.3 r501 => "501 command syntax error\r\n", r502 => "502 Command unavailable\r\n", r221 => "221 Header follows\r\n", r225 => "225 Headers follow (multi-line)\r\n", r430 => "430 No article with that message-id\r\n", }; use Errno qw(EAGAIN); my $ONE_MSGID = qr/\A$MID_EXTRACT\z/; my @OVERVIEW = qw(Subject From Date Message-ID References); my $OVERVIEW_FMT = join(":\r\n", @OVERVIEW, qw(Bytes Lines), '') . "Xref:full\r\n.\r\n"; my $LIST_HEADERS = join("\r\n", @OVERVIEW, qw(:bytes :lines Xref To Cc)) . "\r\n.\r\n"; my $CAPABILITIES = <<""; 101 Capability list:\r VERSION 2\r READER\r NEWNEWS\r LIST ACTIVE ACTIVE.TIMES NEWSGROUPS OVERVIEW.FMT\r HDR\r OVER\r COMPRESS DEFLATE\r sub do_greet ($) { $_[0]->write($_[0]->{nntpd}->{greet}) }; sub new { my ($cls, $sock, $nntpd) = @_; (bless { nntpd => $nntpd }, $cls)->greet($sock) } sub args_ok ($$) { my ($cb, $argc) = @_; my $tot = prototype $cb; my ($nreq, undef) = split(/;/, $tot); $nreq = ($nreq =~ tr/$//) - 1; $tot = ($tot =~ tr/$//) - 1; ($argc <= $tot && $argc >= $nreq); } # returns 1 if we can continue, 0 if not due to buffered writes or disconnect sub process_line ($$) { my ($self, $l) = @_; my ($req, @args) = split(/[ \t]+/, $l); return 1 unless defined($req); # skip blank line $req = $self->can('cmd_'.lc($req)) // return $self->write(\"500 command not recognized\r\n"); return $self->write(\r501) unless args_ok($req, scalar @args); my $res = eval { $req->($self, @args) }; my $err = $@; if ($err && $self->{sock}) { $l =~ s/\r?\n//s; warn("error from: $l ($err)\n"); $res = \"503 program fault - command not performed\r\n"; } defined($res) ? $self->write($res) : 0; } # The keyword argument is not used (rfc3977 5.2.2) sub cmd_capabilities ($;$) { my ($self, undef) = @_; my $res = $CAPABILITIES; if (!$self->{sock}->can('accept_SSL') && $self->{nntpd}->{ssl_ctx_opt}) { $res .= "STARTTLS\r\n"; } $res .= ".\r\n"; } sub cmd_mode ($$) { my ($self, $arg) = @_; uc($arg) eq 'READER' ? \"201 Posting prohibited\r\n" : \r501; } sub cmd_slave ($) { \"202 slave status noted\r\n" } sub cmd_xgtitle ($;$) { my ($self, $wildmat) = @_; $self->msg_more("282 list of groups and descriptions follows\r\n"); list_newsgroups($self, $wildmat); } sub list_overview_fmt ($) { $OVERVIEW_FMT } sub list_headers ($;$) { $LIST_HEADERS } sub names2ibx ($;$) { my ($self, $names) = @_; my $groups = $self->{nntpd}->{pi_cfg}->{-by_newsgroup}; if ($names) { # modify arrayref in-place $_ = $groups->{$_} for @$names; $names; # now an arrayref of ibx } else { my @ret = map { $groups->{$_} } @{$self->{nntpd}->{groupnames}}; \@ret; } } sub list_active_i { # "LIST ACTIVE" and also just "LIST" (no args) my ($self, $ibxs) = @_; my @window = splice(@$ibxs, 0, 1000); emit_group_lines($self, \@window); scalar @$ibxs; # continue if there's more } sub list_active ($;$) { # called by cmd_list my ($self, $wildmat) = @_; wildmat2re($wildmat); my @names = grep(/$wildmat/, @{$self->{nntpd}->{groupnames}}); $self->long_response(\&list_active_i, names2ibx($self, \@names)); } sub list_active_times_i { my ($self, $ibxs) = @_; my @window = splice(@$ibxs, 0, 1000); $self->msg_more(join('', map { my $c = eval { $_->uidvalidity } // time; "$_->{newsgroup} $c <$_->{-primary_address}>\r\n"; } @window)); scalar @$ibxs; # continue if there's more } sub list_active_times ($;$) { # called by cmd_list my ($self, $wildmat) = @_; wildmat2re($wildmat); my @names = grep(/$wildmat/, @{$self->{nntpd}->{groupnames}}); $self->long_response(\&list_active_times_i, names2ibx($self, \@names)); } sub list_newsgroups_i { my ($self, $ibxs) = @_; my @window = splice(@$ibxs, 0, 1000); $self->msg_more(join('', map { "$_->{newsgroup} ".$_->description."\r\n" } @window)); scalar @$ibxs; # continue if there's more } sub list_newsgroups ($;$) { # called by cmd_list my ($self, $wildmat) = @_; wildmat2re($wildmat); my @names = grep(/$wildmat/, @{$self->{nntpd}->{groupnames}}); $self->long_response(\&list_newsgroups_i, names2ibx($self, \@names)); } # LIST SUBSCRIPTIONS, DISTRIB.PATS are not supported sub cmd_list ($;$$) { my ($self, @args) = @_; if (scalar @args) { my $arg = shift @args; $arg =~ tr/A-Z./a-z_/; my $ret = $arg eq 'active'; $arg = "list_$arg"; $arg = $self->can($arg); return r501 unless $arg && args_ok($arg, scalar @args); $self->msg_more("215 information follows\r\n"); $arg->($self, @args); } else { $self->msg_more("215 list of newsgroups follows\r\n"); $self->long_response(\&list_active_i, names2ibx($self)); } } sub listgroup_range_i { my ($self, $beg, $end) = @_; my $r = $self->{ibx}->mm(1)->msg_range($beg, $end, 'num'); scalar(@$r) or return; $self->msg_more(join("\r\n", @$r, '')); 1; } sub listgroup_all_i { my ($self, $num) = @_; my $ary = $self->{ibx}->over(1)->ids_after($num); scalar(@$ary) or return; $self->msg_more(join("\r\n", @$ary, '')); 1; } sub cmd_listgroup ($;$$) { my ($self, $group, $range) = @_; if (defined $group) { my $res = cmd_group($self, $group); return $res if ref($res); # error if const strref $self->msg_more($res); } $self->{ibx} or return \"412 no newsgroup selected\r\n"; if (defined $range) { my $r = get_range($self, $range); return $r unless ref $r; $self->long_response(\&listgroup_range_i, @$r); } else { # grab every article number $self->long_response(\&listgroup_all_i, \(my $num = 0)); } } sub parse_time ($$;$) { my ($date, $time, $gmt) = @_; my ($hh, $mm, $ss) = unpack('A2A2A2', $time); if (defined $gmt) { $gmt =~ /\A(?:UTC|GMT)\z/i or die "GM invalid: $gmt"; $gmt = 1; } my ($YYYY, $MM, $DD); if (length($date) == 8) { # RFC 3977 allows YYYYMMDD ($YYYY, $MM, $DD) = unpack('A4A2A2', $date); } else { # legacy clients send YYMMDD my $YY; ($YY, $MM, $DD) = unpack('A2A2A2', $date); my @now = $gmt ? gmtime : localtime; my $cur_year = $now[5] + 1900; my $cur_cent = int($cur_year / 100) * 100; $YYYY = (($YY + $cur_cent) > $cur_year) ? ($YY + 1900) : ($YY + $cur_cent); } if ($gmt) { timegm($ss, $mm, $hh, $DD, $MM - 1, $YYYY); } else { timelocal($ss, $mm, $hh, $DD, $MM - 1, $YYYY); } } sub emit_group_lines { my ($self, $ibxs) = @_; my ($min, $max); my $ALL = $self->{nntpd}->{pi_cfg}->ALL; my $misc = $ALL->misc if $ALL; my $buf = ''; for my $ibx (@$ibxs) { $misc ? $misc->inbox_data($ibx) : delete(@$ibx{qw(-art_min -art_max)}); ($min, $max) = ($ibx->art_min, $ibx->art_max); $buf .= "$ibx->{newsgroup} $max $min n\r\n"; } $self->msg_more($buf); } sub newgroups_i { my ($self, $ts, $ibxs) = @_; my @window = splice(@$ibxs, 0, 1000); @window = grep { (eval { $_->uidvalidity } // 0) > $ts } @window; emit_group_lines($self, \@window); scalar @$ibxs; # any more? } sub cmd_newgroups ($$$;$$) { my ($self, $date, $time, $gmt, $dists) = @_; my $ts = eval { parse_time($date, $time, $gmt) }; return r501 if $@; # TODO dists $self->msg_more("231 list of new newsgroups follows\r\n"); $self->long_response(\&newgroups_i, $ts, names2ibx($self)); } sub wildmat2re (;$) { return $_[0] = qr/.*/ if (!defined $_[0] || $_[0] eq '*'); my %keep; my $salt = rand; my $tmp = $_[0]; $tmp =~ s#(? '.*', '?' => '.' ); $tmp =~ s#(? '.*', ',' => '|'); $_[0] =~ s!(.)!$map{$1} || "\Q$1"!ge; $_[0] = qr/\A(?:$_[0])\z/; } sub newnews_i { my ($self, $ibxs, $ts, $prev) = @_; if (my $over = $ibxs->[0]->over) { my $msgs = $over->query_ts($ts, $$prev); if (scalar @$msgs) { $self->msg_more(join('', map { "<$_->{mid}>\r\n"; } @$msgs)); $$prev = $msgs->[-1]->{num}; return 1; # continue on current group } } shift @$ibxs; if (@$ibxs) { # continue onto next newsgroup $$prev = 0; 1; } else { # all done, break out of the long_response undef; } } sub cmd_newnews ($$$$;$$) { my ($self, $newsgroups, $date, $time, $gmt, $dists) = @_; my $ts = eval { parse_time($date, $time, $gmt) }; return r501 if $@; $self->msg_more("230 list of new articles by message-id follows\r\n"); my ($keep, $skip) = split(/!/, $newsgroups, 2); ngpat2re($keep); ngpat2re($skip); my @names = grep(/$keep/, @{$self->{nntpd}->{groupnames}}); @names = grep(!/$skip/, @names); return \".\r\n" unless scalar(@names); my $prev = 0; $self->long_response(\&newnews_i, names2ibx($self, \@names), $ts, \$prev); } sub cmd_group ($$) { my ($self, $group) = @_; my $nntpd = $self->{nntpd}; my $ibx = $nntpd->{pi_cfg}->{-by_newsgroup}->{$group} or return \"411 no such news group\r\n"; $nntpd->idler_start; $self->{ibx} = $ibx; my ($min, $max) = $ibx->mm(1)->minmax; $self->{article} = $min; my $est_size = $max - $min; "211 $est_size $min $max $group\r\n"; } sub article_adj ($$) { my ($self, $off) = @_; my $ibx = $self->{ibx} // return \"412 no newsgroup selected\r\n"; my $n = $self->{article} // return \"420 no current article has been selected\r\n"; $n += $off; my $mid = $ibx->mm(1)->mid_for($n) // do { $n = $off > 0 ? 'next' : 'previous'; return "421 no $n article in this group\r\n"; }; $self->{article} = $n; "223 $n <$mid> article retrieved - request text separately\r\n"; } sub cmd_next ($) { article_adj($_[0], 1) } sub cmd_last ($) { article_adj($_[0], -1) } # We want to encourage using email and CC-ing everybody involved to avoid # the single-point-of-failure a single server provides. sub cmd_post ($) { my ($self) = @_; my $ibx = $self->{ibx}; $ibx ? "440 mailto:$ibx->{-primary_address} to post\r\n" : \"440 posting not allowed\r\n" } sub cmd_quit ($) { my ($self) = @_; $self->write(\"205 closing connection - goodbye!\r\n"); $self->shutdn; undef; } sub xref_by_tc ($$$) { my ($xref, $pi_cfg, $smsg) = @_; my $by_addr = $pi_cfg->{-by_addr}; my $mid = $smsg->{mid}; for my $f (qw(to cc)) { my @ibxs = map { $by_addr->{lc($_)} // () } (PublicInbox::Address::emails($smsg->{$f} // '')); for my $ibx (@ibxs) { $xref->{$ibx->{newsgroup}} //= $ibx->mm(1)->num_for($mid); } } } sub xref ($$$) { my ($self, $cur_ibx, $smsg) = @_; my $nntpd = $self->{nntpd}; my $cur_ng = $cur_ibx->{newsgroup}; my $xref; if (my $ALL = $nntpd->{pi_cfg}->ALL) { $xref = $ALL->nntp_xref_for($cur_ibx, $smsg); xref_by_tc($xref, $nntpd->{pi_cfg}, $smsg); } else { # slow path $xref = { $cur_ng => $smsg->{num} }; my $mid = $smsg->{mid}; for my $ibx (values %{$nntpd->{pi_cfg}->{-by_newsgroup}}) { $xref->{$ibx->{newsgroup}} //= $ibx->mm(1)->num_for($mid); } } my $ret = "$nntpd->{servername} $cur_ng:".delete($xref->{$cur_ng}); for my $ng (sort keys %$xref) { my $num = $xref->{$ng} // next; $ret .= " $ng:$num"; } $ret; } sub set_nntp_headers ($$) { my ($hdr, $smsg) = @_; my ($mid) = $smsg->{mid}; # leafnode (and maybe other NNTP clients) have trouble dealing # with v2 messages which have multiple Message-IDs (either due # to our own content-based dedupe or buggy git-send-email versions). my @mids = $hdr->header_raw('Message-ID'); if (scalar(@mids) > 1) { my $mid0 = "<$mid>"; $hdr->header_set('Message-ID', $mid0); my @alt = $hdr->header_raw('X-Alt-Message-ID'); my %seen = map { $_ => 1 } (@alt, $mid0); push(@alt, grep { !$seen{$_}++ } @mids); $hdr->header_set('X-Alt-Message-ID', @alt); } # clobber some existing headers my $ibx = $smsg->{-ibx}; my $xref = xref($smsg->{nntp}, $ibx, $smsg); $hdr->header_set('Xref', $xref); # RFC 5536 3.1.4 my ($server_name, $newsgroups) = split(/ /, $xref, 2); $newsgroups =~ s/:[0-9]+\b//g; # drop NNTP article numbers $newsgroups =~ tr/ /,/; $hdr->header_set('Newsgroups', $newsgroups); # *something* here is required for leafnode, try to follow # RFC 5536 3.1.5... $hdr->header_set('Path', $server_name . '!not-for-mail'); } sub art_lookup ($$$) { my ($self, $art, $code) = @_; my ($ibx, $n); my $err; if (defined $art) { if ($art =~ /\A[0-9]+\z/) { $err = \"423 no such article number in this group\r\n"; $n = int($art); goto find_ibx; } elsif ($art =~ $ONE_MSGID) { ($ibx, $n) = mid_lookup($self, $1); goto found if $ibx; return \r430; } else { return \r501; } } else { $err = \"420 no current article has been selected\r\n"; $n = $self->{article} // return $err; find_ibx: $ibx = $self->{ibx} or return \"412 no newsgroup has been selected\r\n"; } found: my $smsg = $ibx->over(1)->get_art($n) or return $err; $smsg->{-ibx} = $ibx; if ($code == 223) { # STAT set_art($self, $n); "223 $n <$smsg->{mid}> article retrieved - " . "request text separately\r\n"; } else { # HEAD | BODY | ARTICLE $smsg->{nntp} = $self; $smsg->{nntp_code} = $code; set_art($self, $art); # this dereferences to `undef' ${ibx_async_cat($ibx, $smsg->{blob}, \&blob_cb, $smsg)}; } } sub msg_body_write ($$) { my ($self, $msg) = @_; # these can momentarily double the memory consumption :< $$msg =~ s/^\./../smg; $$msg =~ s/(?msg_more($$msg); } sub set_art { my ($self, $art) = @_; $self->{article} = $art if defined $art && $art =~ /\A[0-9]+\z/; } sub msg_hdr_write ($$) { my ($eml, $smsg) = @_; set_nntp_headers($eml, $smsg); my $hdr = $eml->{hdr} // \(my $x = ''); # fixup old bug from import (pre-a0c07cba0e5d8b6a) $$hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; $$hdr =~ s/(?{nntp}->msg_more($$hdr); } sub blob_cb { # called by git->cat_async via ibx_async_cat my ($bref, $oid, $type, $size, $smsg) = @_; my $self = $smsg->{nntp}; my $code = $smsg->{nntp_code}; if (!defined($oid)) { # it's possible to have TOCTOU if an admin runs # public-inbox-(edit|purge), just move onto the next message warn "E: $smsg->{blob} missing in $smsg->{-ibx}->{inboxdir}\n"; return $self->requeue; } elsif ($smsg->{blob} ne $oid) { $self->close; die "BUG: $smsg->{blob} != $oid"; } my $r = "$code $smsg->{num} <$smsg->{mid}> article retrieved - "; my $eml = PublicInbox::Eml->new($bref); if ($code == 220) { $self->msg_more($r .= "head and body follow\r\n"); msg_hdr_write($eml, $smsg); $self->msg_more("\r\n"); msg_body_write($self, $bref); } elsif ($code == 221) { $self->msg_more($r .= "head follows\r\n"); msg_hdr_write($eml, $smsg); } elsif ($code == 222) { $self->msg_more($r .= "body follows\r\n"); msg_body_write($self, $bref); } else { $self->close; die "BUG: bad code: $r"; } $self->write(\".\r\n"); # flushes (includes ->dflush) $self->requeue; } sub cmd_article ($;$) { my ($self, $art) = @_; art_lookup($self, $art, 220); } sub cmd_head ($;$) { my ($self, $art) = @_; art_lookup($self, $art, 221); } sub cmd_body ($;$) { my ($self, $art) = @_; art_lookup($self, $art, 222); } sub cmd_stat ($;$) { my ($self, $art) = @_; art_lookup($self, $art, 223); # art may be msgid } sub cmd_ihave ($) { \"435 article not wanted - do not send it\r\n" } sub cmd_date ($) { '111 '.strftime('%Y%m%d%H%M%S', gmtime(time))."\r\n" } sub cmd_help ($) { \"100 help text follows\r\n.\r\n" } # returns a ref on success sub get_range ($$) { my ($self, $range) = @_; my $ibx = $self->{ibx} // return "412 no news group has been selected\r\n"; $range // return "420 No article(s) selected\r\n"; my ($beg, $end); my ($min, $max) = $ibx->mm(1)->minmax; if ($range =~ /\A([0-9]+)\z/) { $beg = $end = $1; } elsif ($range =~ /\A([0-9]+)-\z/) { ($beg, $end) = ($1, $max); } elsif ($range =~ /\A([0-9]+)-([0-9]+)\z/) { ($beg, $end) = ($1, $2); } else { return r501; } $beg = $min if ($beg < $min); $end = $max if ($end > $max); $beg > $end ? "420 No article(s) selected\r\n" : [ \$beg, $end ]; } sub long_response_done { $_[0]->write(\".\r\n") } # overrides superclass sub hdr_msgid_range_i { my ($self, $beg, $end) = @_; my $r = $self->{ibx}->mm(1)->msg_range($beg, $end); @$r or return; $self->msg_more(join('', map { "$_->[0] <$_->[1]>\r\n" } @$r)); 1; } sub hdr_message_id ($$$) { # optimize XHDR Message-ID [range] for slrnpull. my ($self, $xhdr, $range) = @_; if (defined $range && $range =~ $ONE_MSGID) { my ($ibx, $n) = mid_lookup($self, $1); return r430 unless $n; hdr_mid_response($self, $xhdr, $ibx, $n, $range, $range); } else { # numeric range $range = $self->{article} unless defined $range; my $r = get_range($self, $range); return $r unless ref $r; $self->msg_more($xhdr ? r221 : r225); $self->long_response(\&hdr_msgid_range_i, @$r); } } sub mid_lookup ($$) { my ($self, $mid) = @_; my $cur_ibx = $self->{ibx}; if ($cur_ibx) { my $n = $cur_ibx->mm(1)->num_for($mid); return ($cur_ibx, $n) if defined $n; } my $pi_cfg = $self->{nntpd}->{pi_cfg}; if (my $ALL = $pi_cfg->ALL) { my ($id, $prev); while (my $smsg = $ALL->over->next_by_mid($mid, \$id, \$prev)) { my $xr3 = $ALL->over->get_xref3($smsg->{num}); if (my @x = grep(/:$smsg->{blob}\z/, @$xr3)) { my ($ngname, $xnum) = split(/:/, $x[0]); my $ibx = $pi_cfg->{-by_newsgroup}->{$ngname}; return ($ibx, $xnum) if $ibx; # fall through to trying all xref3s } else { warn < ($smsg->{blob}) in $ALL->{topdir}, -extindex bug? EOF } # try all xref3s for my $x (@$xr3) { my ($ngname, $xnum) = split(/:/, $x); my $ibx = $pi_cfg->{-by_newsgroup}->{$ngname}; return ($ibx, $xnum) if $ibx; warn "W: `$ngname' does not exist for #$xnum\n"; } } # no warning here, $mid is just invalid } else { # slow path for non-ALL users for my $ibx (values %{$pi_cfg->{-by_newsgroup}}) { next if defined $cur_ibx && $ibx eq $cur_ibx; my $n = $ibx->mm(1)->num_for($mid); return ($ibx, $n) if defined $n; } } (undef, undef); } sub xref_range_i { my ($self, $beg, $end) = @_; my $ibx = $self->{ibx}; my $msgs = $ibx->over(1)->query_xover($$beg, $end); scalar(@$msgs) or return; $$beg = $msgs->[-1]->{num} + 1; $self->msg_more(join('', map { "$_->{num} ".xref($self, $ibx, $_) . "\r\n"; } @$msgs)); 1; } sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin my ($self, $xhdr, $range) = @_; if (defined $range && $range =~ $ONE_MSGID) { my $mid = $1; my ($ibx, $n) = mid_lookup($self, $mid); return r430 unless $n; my $smsg = $ibx->over(1)->get_art($n) or return; hdr_mid_response($self, $xhdr, $ibx, $n, $range, xref($self, $ibx, $smsg)); } else { # numeric range $range = $self->{article} unless defined $range; my $r = get_range($self, $range); return $r unless ref $r; $self->msg_more($xhdr ? r221 : r225); $self->long_response(\&xref_range_i, @$r); } } sub over_header_for { my ($ibx, $num, $field) = @_; my $smsg = $ibx->over(1)->get_art($num) or return; return PublicInbox::Smsg::date($smsg) if $field eq 'date'; $smsg->{$field}; } sub smsg_range_i { my ($self, $beg, $end, $field) = @_; my $msgs = $self->{ibx}->over(1)->query_xover($$beg, $end); scalar(@$msgs) or return; my $tmp = ''; # ->{$field} is faster than ->$field invocations, so favor that. if ($field eq 'date') { for my $s (@$msgs) { $tmp .= "$s->{num} ".PublicInbox::Smsg::date($s)."\r\n" } } else { for my $s (@$msgs) { $tmp .= "$s->{num} $s->{$field}\r\n"; } } utf8::encode($tmp); $self->msg_more($tmp); $$beg = $msgs->[-1]->{num} + 1; } sub hdr_smsg ($$$$) { my ($self, $xhdr, $field, $range) = @_; if (defined $range && $range =~ $ONE_MSGID) { my ($ibx, $n) = mid_lookup($self, $1); return r430 unless defined $n; my $v = over_header_for($ibx, $n, $field); hdr_mid_response($self, $xhdr, $ibx, $n, $range, $v); } else { # numeric range $range = $self->{article} unless defined $range; my $r = get_range($self, $range); return $r unless ref $r; $self->msg_more($xhdr ? r221 : r225); $self->long_response(\&smsg_range_i, @$r, $field); } } sub do_hdr ($$$;$) { my ($self, $xhdr, $header, $range) = @_; my $sub = lc $header; if ($sub eq 'message-id') { hdr_message_id($self, $xhdr, $range); } elsif ($sub eq 'xref') { hdr_xref($self, $xhdr, $range); } elsif ($sub =~ /\A(?:subject|references|date|from|to|cc| bytes|lines)\z/x) { hdr_smsg($self, $xhdr, $sub, $range); } elsif ($sub =~ /\A:(bytes|lines)\z/) { hdr_smsg($self, $xhdr, $1, $range); } else { $xhdr ? (r221.".\r\n") : "503 HDR not permitted on $header\r\n"; } } # RFC 3977 sub cmd_hdr ($$;$) { my ($self, $header, $range) = @_; do_hdr($self, 0, $header, $range); } # RFC 2980 sub cmd_xhdr ($$;$) { my ($self, $header, $range) = @_; do_hdr($self, 1, $header, $range); } sub hdr_mid_prefix ($$$$$) { my ($self, $xhdr, $ibx, $n, $mid) = @_; return $mid if $xhdr; # HDR for RFC 3977 users if (my $cur_ibx = $self->{ibx}) { ($cur_ibx eq $ibx) ? $n : '0'; } else { '0'; } } sub hdr_mid_response ($$$$$$) { my ($self, $xhdr, $ibx, $n, $mid, $v) = @_; $self->write(($xhdr ? r221.$mid : r225.hdr_mid_prefix($self, $xhdr, $ibx, $n, $mid)) . " $v\r\n.\r\n"); undef; } sub xrover_i { my ($self, $beg, $end) = @_; my $h = over_header_for($self->{ibx}, $$beg, 'references'); $self->msg_more("$$beg $h\r\n") if defined($h); $$beg++ < $end; } sub cmd_xrover ($;$) { my ($self, $range) = @_; my $ibx = $self->{ibx} or return \"412 no newsgroup selected\r\n"; (defined $range && $range =~ /[<>]/) and return \"420 No article(s) selected\r\n"; # no message IDs $range = $self->{article} unless defined $range; my $r = get_range($self, $range); return $r unless ref $r; $self->msg_more("224 Overview information follows\r\n"); $self->long_response(\&xrover_i, @$r); } sub over_line ($$$) { my ($self, $ibx, $smsg) = @_; # n.b. field access and procedural calls can be # 10%-15% faster than OO method calls: my $s = join("\t", $smsg->{num}, $smsg->{subject}, $smsg->{from}, PublicInbox::Smsg::date($smsg), "<$smsg->{mid}>", $smsg->{references}, $smsg->{bytes}, $smsg->{lines}, "Xref: " . xref($self, $ibx, $smsg)); utf8::encode($s); $s .= "\r\n"; } sub cmd_over ($;$) { my ($self, $range) = @_; if ($range && $range =~ $ONE_MSGID) { my ($ibx, $n) = mid_lookup($self, $1); defined $n or return r430; my $smsg = $ibx->over(1)->get_art($n) or return r430; $self->msg_more( "224 Overview information follows (multi-line)\r\n"); # Only set article number column if it's the current group # (RFC 3977 8.3.2) my $cur_ibx = $self->{ibx}; if (!$cur_ibx || $cur_ibx ne $ibx) { # set {-orig_num} for nntp_xref_for $smsg->{-orig_num} = $smsg->{num}; $smsg->{num} = 0; } over_line($self, $ibx, $smsg).".\r\n"; } else { cmd_xover($self, $range); } } sub xover_i { my ($self, $beg, $end) = @_; my $ibx = $self->{ibx}; my $msgs = $ibx->over(1)->query_xover($$beg, $end); my $nr = scalar @$msgs or return; # OVERVIEW.FMT $self->msg_more(join('', map { over_line($self, $ibx, $_); } @$msgs)); $$beg = $msgs->[-1]->{num} + 1; } sub cmd_xover ($;$) { my ($self, $range) = @_; $range = $self->{article} unless defined $range; my $r = get_range($self, $range); return $r unless ref $r; my ($beg, $end) = @$r; $self->msg_more( "224 Overview information follows for $$beg to $end\r\n"); $self->long_response(\&xover_i, @$r); } sub cmd_starttls ($) { my ($self) = @_; # RFC 4642 2.2.1 (($self->{sock} // return)->can('stop_SSL') || $self->compressed) and return r502; $self->{nntpd}->{ssl_ctx_opt} or return \"580 can not initiate TLS negotiation\r\n"; $self->write(\"382 Continue with TLS negotiation\r\n"); PublicInbox::TLS::start($self->{sock}, $self->{nntpd}); $self->requeue if PublicInbox::DS::accept_tls_step($self); undef; } # RFC 8054 sub cmd_compress ($$) { my ($self, $alg) = @_; return "503 Only DEFLATE is supported\r\n" if uc($alg) ne 'DEFLATE'; return r502 if $self->compressed; PublicInbox::NNTPdeflate->enable($self) or return \"403 Unable to activate compression\r\n"; PublicInbox::DS::write($self, \"206 Compression active\r\n"); $self->requeue; undef } sub cmd_xpath ($$) { my ($self, $mid) = @_; return r501 unless $mid =~ $ONE_MSGID; $mid = $1; my @paths; my $pi_cfg = $self->{nntpd}->{pi_cfg}; my $groups = $pi_cfg->{-by_newsgroup}; if (my $ALL = $pi_cfg->ALL) { my ($id, $prev, %seen); while (my $smsg = $ALL->over->next_by_mid($mid, \$id, \$prev)) { my $xr3 = $ALL->over->get_xref3($smsg->{num}); for my $x (@$xr3) { my ($ngname, $n) = split(/:/, $x); $x = "$ngname/$n"; if ($groups->{$ngname} && !$seen{$x}++) { push(@paths, $x); } } } } else { # slow path, no point in using long_response for my $ibx (values %$groups) { my $n = $ibx->mm(1)->num_for($mid) // next; push @paths, "$ibx->{newsgroup}/$n"; } } return \"430 no such article on server\r\n" unless @paths; '223 '.join(' ', sort(@paths))."\r\n"; } sub out ($$;@) { my ($self, $fmt, @args) = @_; printf { $self->{nntpd}->{out} } $fmt."\n", @args; } # callback used by PublicInbox::DS for any (e)poll (in/out/hup/err) sub event_step { my ($self) = @_; local $SIG{__WARN__} = $self->{nntpd}->{warn_cb}; return unless $self->flush_write && $self->{sock} && !$self->{long_cb}; # only read more requests if we've drained the write buffer, # otherwise we can be buffering infinitely w/o backpressure my $rbuf = $self->{rbuf} // \(my $x = ''); my $line = index($$rbuf, "\n"); while ($line < 0) { return $self->close if length($$rbuf) >= LINE_MAX; $self->do_read($rbuf, LINE_MAX, length($$rbuf)) or return; $line = index($$rbuf, "\n"); } $line = substr($$rbuf, 0, $line + 1, ''); $line =~ s/\r?\n\z//s; return $self->close if $line =~ /[[:cntrl:]]/s; my $t0 = now(); my $fd = fileno($self->{sock}); my $r = eval { process_line($self, $line) }; my $pending = $self->{wbuf} ? ' pending' : ''; out($self, "[$fd] %s - %0.6f$pending", $line, now() - $t0); return $self->close if $r < 0; $self->rbuf_idle($rbuf); # maybe there's more pipelined data, or we'll have # to register it for socket-readiness notifications $self->requeue unless $pending; } sub busy { # for graceful shutdown in PublicInbox::Daemon: my ($self) = @_; defined($self->{rbuf}) || defined($self->{wbuf}) } package PublicInbox::NNTPdeflate; use PublicInbox::DSdeflate; our @ISA = qw(PublicInbox::DSdeflate PublicInbox::NNTP); 1; public-inbox-1.9.0/lib/PublicInbox/NNTPD.pm000066400000000000000000000034131430031475700203260ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # represents an NNTPD (currently a singleton), # see script/public-inbox-nntpd for how it is used package PublicInbox::NNTPD; use strict; use v5.10.1; use Sys::Hostname; use PublicInbox::Config; use PublicInbox::InboxIdle; use PublicInbox::NNTP; sub new { my ($class) = @_; bless { err => \*STDERR, out => \*STDOUT, # pi_cfg => $pi_cfg, # ssl_ctx_opt => { SSL_cert_file => ..., SSL_key_file => ... } # idler => PublicInbox::InboxIdle }, $class; } sub refresh_groups { my ($self, $sig) = @_; my $pi_cfg = PublicInbox::Config->new; my $name = $pi_cfg->{'publicinbox.nntpserver'}; if (!defined($name) or $name eq '') { $name = hostname; } elsif (ref($name) eq 'ARRAY') { $name = $name->[0]; } if ($name ne ($self->{servername} // '')) { $self->{servername} = $name; $self->{greet} = \"201 $name ready - post via email\r\n"; } my $groups = $pi_cfg->{-by_newsgroup}; # filled during each_inbox my $cache = eval { $pi_cfg->ALL->misc->nntpd_cache_load } // {}; $pi_cfg->each_inbox(sub { my ($ibx) = @_; my $ngname = $ibx->{newsgroup} // return; my $ce = $cache->{$ngname}; if (($ce and (%$ibx = (%$ibx, %$ce))) || $ibx->nntp_usable) { # only valid if msgmap and over works # preload to avoid fragmentation: $ibx->description; } else { delete $groups->{$ngname}; # Note: don't be tempted to delete more for memory # savings just yet: NNTP, IMAP, and WWW may all # run in the same process someday. } }); @{$self->{groupnames}} = sort(keys %$groups); # this will destroy old groups that got deleted $self->{pi_cfg} = $pi_cfg; } sub idler_start { $_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_cfg}); } 1; public-inbox-1.9.0/lib/PublicInbox/NetNNTPSocks.pm000066400000000000000000000016331430031475700216760ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # wrap Net::NNTP client with SOCKS support package PublicInbox::NetNNTPSocks; use strict; use v5.10.1; use Net::NNTP; our %OPT; our @ISA = qw(IO::Socket::Socks); my @SOCKS_KEYS = qw(ProxyAddr ProxyPort SocksVersion SocksDebug SocksResolve); # use this instead of Net::NNTP->new if using Proxy* sub new_socks { my (undef, %opt) = @_; require IO::Socket::Socks; local @Net::NNTP::ISA = (qw(Net::Cmd), __PACKAGE__); local %OPT = map {; defined($opt{$_}) ? ($_ => $opt{$_}) : () } @SOCKS_KEYS; Net::NNTP->new(%opt); # this calls our new() below: } # called by Net::NNTP->new sub new { my ($self, %opt) = @_; @OPT{qw(ConnectAddr ConnectPort)} = @opt{qw(PeerAddr PeerPort)}; my $ret = $self->SUPER::new(%OPT) or die 'SOCKS error: '.eval('$IO::Socket::Socks::SOCKS_ERROR'); $ret; } 1; public-inbox-1.9.0/lib/PublicInbox/NetReader.pm000066400000000000000000000620101430031475700213120ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # common reader code for IMAP and NNTP (and maybe JMAP) package PublicInbox::NetReader; use strict; use v5.10.1; use parent qw(Exporter PublicInbox::IPC); use PublicInbox::Eml; use PublicInbox::Config; our %IMAPflags2kw = map {; "\\\u$_" => $_ } qw(seen answered flagged draft); $IMAPflags2kw{'$Forwarded'} = 'forwarded'; # RFC 5550 our @EXPORT = qw(uri_section imap_uri nntp_uri); sub ndump { require Data::Dumper; Data::Dumper->new(\@_)->Useqq(1)->Terse(1)->Dump; } # returns the git config section name, e.g [imap "imaps://user@example.com"] # without the mailbox, so we can share connections between different inboxes sub uri_section ($) { my ($uri) = @_; $uri->scheme . '://' . $uri->authority; } sub socks_args ($) { my ($val) = @_; return if ($val // '') eq ''; if ($val =~ m!\Asocks5h:// (?: \[ ([^\]]+) \] | ([^:/]+) ) (?::([0-9]+))?/*\z!ix) { my ($h, $p) = ($1 // $2, $3 + 0); $h = '127.0.0.1' if $h eq '0'; eval { require IO::Socket::Socks } or die < $h, ProxyPort => $p }; } die "$val not understood (only socks5h:// is supported)\n"; } sub mic_new ($$$$) { my ($self, $mic_arg, $sec, $uri) = @_; my %mic_arg = (%$mic_arg, Keepalive => 1); my $sa = $self->{cfg_opt}->{$sec}->{-proxy_cfg} || $self->{-proxy_cli}; if ($sa) { # this `require' needed for worker[1..Inf], since socks_args # only got called in worker[0] require IO::Socket::Socks; my %opt = (%$sa, Keepalive => 1); $opt{SocksDebug} = 1 if $mic_arg{Debug}; $opt{ConnectAddr} = delete $mic_arg{Server}; $opt{ConnectPort} = delete $mic_arg{Port}; my $s = IO::Socket::Socks->new(%opt) or die "E: <$uri> ".eval('$IO::Socket::Socks::SOCKS_ERROR'); if ($mic_arg->{Ssl}) { # for imaps:// require IO::Socket::SSL; $s = IO::Socket::SSL->start_SSL($s) or die "E: <$uri> ".(IO::Socket::SSL->errstr // ''); } $mic_arg{Socket} = $s; } PublicInbox::IMAPClient->new(%mic_arg); } sub auth_anon_cb { '' }; # for Mail::IMAPClient::Authcallback sub onion_hint ($$) { my ($lei, $uri) = @_; $uri->host =~ /\.onion\z/i or return "\n"; my $t = $uri->isa('PublicInbox::URIimap') ? 'imap' : 'nntp'; my $url = PublicInbox::Config::squote_maybe(uri_section($uri)); my $set_cfg = 'lei config'; if (!$lei) { # public-inbox-watch my $f = PublicInbox::Config::squote_maybe( $ENV{PI_CONFIG} || '~/.public-inbox/config'); $set_cfg = "git config -f $f"; } my $dq = substr($url, 0, 1) eq "'" ? '"' : ''; < "$uri", protocol => $uri->scheme, host => $uri->host, username => $uri->user, password => $uri->password, }, 'PublicInbox::GitCredential'; my $sec = uri_section($uri); my $common = $mic_common->{$sec} // {}; # IMAPClient and Net::Netrc both mishandles `0', so we pass `127.0.0.1' my $host = $cred->{host}; $host = '127.0.0.1' if $host eq '0'; my $mic_arg = { Port => $uri->port, Server => $host, %$common, # may set Starttls, Compress, Debug .... }; $mic_arg->{Ssl} = 1 if $uri->scheme eq 'imaps'; require PublicInbox::IMAPClient; my $mic = mic_new($self, $mic_arg, $sec, $uri); ($mic && $mic->IsConnected) or die "E: <$uri> new: $@".onion_hint($lei, $uri); # default to using STARTTLS if it's available, but allow # it to be disabled since I usually connect to localhost if (!$mic_arg->{Ssl} && !defined($mic_arg->{Starttls}) && $mic->has_capability('STARTTLS') && try_starttls($host) && $mic->can('starttls')) { $mic->starttls or die "E: <$uri> STARTTLS: $@\n"; } # do we even need credentials? if (!defined($cred->{username}) && $mic->has_capability('AUTH=ANONYMOUS')) { $cred = undef; } if ($cred) { my $p = $cred->{password} // $cred->check_netrc($lei); $cred->fill($lei) unless defined($p); # may prompt user here $mic->User($mic_arg->{User} = $cred->{username}); $mic->Password($mic_arg->{Password} = $cred->{password}); } else { # AUTH=ANONYMOUS $mic->Authmechanism($mic_arg->{Authmechanism} = 'ANONYMOUS'); $mic_arg->{Authcallback} = 'auth_anon_cb'; $mic->Authcallback(\&auth_anon_cb); } my $err; if ($mic->login && $mic->IsAuthenticated) { # success! keep IMAPClient->new arg in case we get disconnected $self->{net_arg}->{$sec} = $mic_arg; if ($cred) { $uri->user($cred->{username}) if !defined($uri->user); } elsif ($mic_arg->{Authmechanism} eq 'ANONYMOUS') { $uri->auth('ANONYMOUS') if !defined($uri->auth); } } else { $err = "E: <$uri> LOGIN: $@\n"; if ($cred && defined($cred->{password})) { $err =~ s/\Q$cred->{password}\E/*******/g; } $mic = undef; } $cred->run($mic ? 'approve' : 'reject') if $cred && $cred->{filled}; if ($err) { $lei ? $lei->fail($err) : warn($err); } $mic; } sub nn_new ($$$) { my ($nn_arg, $nntp_cfg, $uri) = @_; my $nn; if (defined $nn_arg->{ProxyAddr}) { require PublicInbox::NetNNTPSocks; $nn_arg->{SocksDebug} = 1 if $nn_arg->{Debug}; eval { $nn = PublicInbox::NetNNTPSocks->new_socks(%$nn_arg) }; die "E: <$uri> $@\n" if $@; } else { $nn = Net::NNTP->new(%$nn_arg) or return; } setsockopt($nn, Socket::SOL_SOCKET(), Socket::SO_KEEPALIVE(), 1); # default to using STARTTLS if it's available, but allow # it to be disabled for localhost/VPN users if (!$nn_arg->{SSL} && $nn->can('starttls')) { if (!defined($nntp_cfg->{starttls}) && try_starttls($nn_arg->{Host})) { # soft fail by default $nn->starttls or warn <<""; W: <$uri> STARTTLS tried and failed (not requested) } elsif ($nntp_cfg->{starttls}) { # hard fail if explicitly configured $nn->starttls or die <<""; E: <$uri> STARTTLS requested and failed } } elsif ($nntp_cfg->{starttls}) { $nn->can('starttls') or die "E: <$uri> Net::NNTP too old for STARTTLS\n"; $nn->starttls or die <<""; E: <$uri> STARTTLS requested and failed } $nn; } sub nn_for ($$$$) { # nn = Net::NNTP my ($self, $uri, $nn_common, $lei) = @_; my $sec = uri_section($uri); my $nntp_cfg = $self->{cfg_opt}->{$sec} //= {}; my $host = $uri->host; # Net::NNTP and Net::Netrc both mishandle `0', so we pass `127.0.0.1' $host = '127.0.0.1' if $host eq '0'; my $cred; my ($u, $p); if (defined(my $ui = $uri->userinfo)) { require PublicInbox::GitCredential; $cred = bless { url => $sec, protocol => $uri->scheme, host => $host, }, 'PublicInbox::GitCredential'; ($u, $p) = split(/:/, $ui, 2); ($cred->{username}, $cred->{password}) = ($u, $p); $p //= $cred->check_netrc($lei); } my $common = $nn_common->{$sec} // {}; my $nn_arg = { Port => $uri->port, Host => $host, %$common, # may Debug .... }; $nn_arg->{SSL} = 1 if $uri->secure; # snews == nntps my $sa = $self->{-proxy_cli}; %$nn_arg = (%$nn_arg, %$sa) if $sa; my $nn = nn_new($nn_arg, $nntp_cfg, $uri) or die "E: <$uri> new: $@".onion_hint($lei, $uri); if ($cred) { $cred->fill($lei) unless defined($p); # may prompt user here if ($nn->authinfo($u, $p)) { push @{$nntp_cfg->{-postconn}}, [ 'authinfo', $u, $p ]; } else { warn "E: <$uri> AUTHINFO $u XXXX failed\n"; $nn = undef; } } if ($nntp_cfg->{compress}) { # https://rt.cpan.org/Ticket/Display.html?id=129967 if ($nn->can('compress')) { if ($nn->compress) { push @{$nntp_cfg->{-postconn}}, [ 'compress' ]; } else { warn "W: <$uri> COMPRESS failed\n"; } } else { delete $nntp_cfg->{compress}; warn <<""; W: <$uri> COMPRESS not supported by Net::NNTP W: see https://rt.cpan.org/Ticket/Display.html?id=129967 for updates } } $self->{net_arg}->{$sec} = $nn_arg; $cred->run($nn ? 'approve' : 'reject') if $cred && $cred->{filled}; $nn; } sub imap_uri { my ($url, $ls_ok) = @_; require PublicInbox::URIimap; my $uri = PublicInbox::URIimap->new($url); $uri && ($ls_ok || $uri->mailbox) ? $uri->canonical : undef; } my %IS_NNTP = (news => 1, snews => 1, nntp => 1, nntps => 1); sub nntp_uri { my ($url, $ls_ok) = @_; require PublicInbox::URInntps; my $uri = PublicInbox::URInntps->new($url); $uri && $IS_NNTP{$uri->scheme} && ($ls_ok || $uri->group) ? $uri->canonical : undef; } sub cfg_intvl ($$$) { my ($cfg, $key, $url) = @_; my $v = $cfg->urlmatch($key, $url) // return; $v =~ /\A[0-9]+(?:\.[0-9]+)?\z/s and return $v + 0; if (ref($v) eq 'ARRAY') { $v = join(', ', @$v); warn "W: $key has multiple values: $v\nW: $key ignored\n"; } else { warn "W: $key=$v is not a numeric value in seconds\n"; } } sub cfg_bool ($$$) { my ($cfg, $key, $url) = @_; my $orig = $cfg->urlmatch($key, $url) // return; my $bool = $cfg->git_bool($orig); warn "W: $key=$orig for $url is not boolean\n" unless defined($bool); $bool; } # flesh out common IMAP-specific data structures sub imap_common_init ($;$) { my ($self, $lei) = @_; return unless $self->{imap_order}; $self->{quiet} = 1 if $lei && $lei->{opt}->{quiet}; eval { require PublicInbox::IMAPClient } or die "Mail::IMAPClient is required for IMAP:\n$@\n"; ($lei || eval { require PublicInbox::IMAPTracker }) or die "DBD::SQLite is required for IMAP\n:$@\n"; require PublicInbox::URIimap; my $cfg = $self->{pi_cfg} // $lei->_lei_cfg; my $mic_common = {}; # scheme://authority => Mail:IMAPClient arg for my $uri (@{$self->{imap_order}}) { my $sec = uri_section($uri); # knobs directly for Mail::IMAPClient->new for my $k (qw(Starttls Debug Compress)) { my $bool = cfg_bool($cfg, "imap.$k", $$uri) // next; $mic_common->{$sec}->{$k} = $bool; } my $to = cfg_intvl($cfg, 'imap.timeout', $$uri); $mic_common->{$sec}->{Timeout} = $to if $to; # knobs we use ourselves: my $sa = socks_args($cfg->urlmatch('imap.Proxy', $$uri)); $self->{cfg_opt}->{$sec}->{-proxy_cfg} = $sa if $sa; for my $k (qw(pollInterval idleInterval)) { $to = cfg_intvl($cfg, "imap.$k", $$uri) // next; $self->{cfg_opt}->{$sec}->{$k} = $to; } my $k = 'imap.fetchBatchSize'; my $bs = $cfg->urlmatch($k, $$uri) // next; if ($bs =~ /\A([0-9]+)\z/ && $bs > 0) { $self->{cfg_opt}->{$sec}->{batch_size} = $bs; } else { warn "$k=$bs is not a positive integer\n"; } } # make sure we can connect and cache the credentials in memory my $mics = {}; # schema://authority => IMAPClient obj for my $orig_uri (@{$self->{imap_order}}) { my $sec = uri_section($orig_uri); my $uri = PublicInbox::URIimap->new("$sec/"); my $mic = $mics->{$sec} //= mic_for($self, $uri, $mic_common, $lei) // die "Unable to continue\n"; next unless $self->isa('PublicInbox::NetWriter'); next if $self->{-skip_creat}; my $dst = $orig_uri->mailbox // next; next if $mic->exists($dst); # already exists $mic->create($dst) or die "CREATE $dst failed <$orig_uri>: $@"; } $mics; } # flesh out common NNTP-specific data structures sub nntp_common_init ($;$) { my ($self, $lei) = @_; return unless $self->{nntp_order}; $self->{quiet} = 1 if $lei && $lei->{opt}->{quiet}; eval { require Net::NNTP } or die "Net::NNTP is required for NNTP:\n$@\n"; ($lei || eval { require PublicInbox::IMAPTracker }) or die "DBD::SQLite is required for NNTP\n:$@\n"; my $cfg = $self->{pi_cfg} // $lei->_lei_cfg; my $nn_common = {}; # scheme://authority => Net::NNTP->new arg for my $uri (@{$self->{nntp_order}}) { my $sec = uri_section($uri); my $args = $nn_common->{$sec} //= {}; # Debug and Timeout are passed to Net::NNTP->new my $v = cfg_bool($cfg, 'nntp.Debug', $$uri); $args->{Debug} = $v if defined $v; my $to = cfg_intvl($cfg, 'nntp.Timeout', $$uri); $args->{Timeout} = $to if $to; my $sa = socks_args($cfg->urlmatch('nntp.Proxy', $$uri)); %$args = (%$args, %$sa) if $sa; # Net::NNTP post-connect commands for my $k (qw(starttls compress)) { $v = cfg_bool($cfg, "nntp.$k", $$uri) // next; $self->{cfg_opt}->{$sec}->{$k} = $v; } # -watch internal option for my $k (qw(pollInterval)) { $to = cfg_intvl($cfg, "nntp.$k", $$uri) // next; $self->{cfg_opt}->{$sec}->{$k} = $to; } } # make sure we can connect and cache the credentials in memory my %nn; # schema://authority => Net::NNTP object for my $uri (@{$self->{nntp_order}}) { my $sec = uri_section($uri); $nn{$sec} //= nn_for($self, $uri, $nn_common, $lei); } \%nn; # for optional {nn_cached} } sub add_url { my ($self, $arg, $ls_ok) = @_; my $uri; if ($uri = imap_uri($arg, $ls_ok)) { $_[1] = $$uri; # canonicalized push @{$self->{imap_order}}, $uri; } elsif ($uri = nntp_uri($arg, $ls_ok)) { $_[1] = $$uri; # canonicalized push @{$self->{nntp_order}}, $uri; } else { push @{$self->{unsupported_url}}, $arg; } } sub errors { my ($self, $lei) = @_; if (my $u = $self->{unsupported_url}) { return "Unsupported URL(s): @$u"; } if ($self->{imap_order}) { eval { require PublicInbox::IMAPClient } or die "Mail::IMAPClient is required for IMAP:\n$@\n"; } if ($self->{nntp_order}) { eval { require Net::NNTP } or die "Net::NNTP is required for NNTP:\n$@\n"; } my $sa = socks_args($lei ? $lei->{opt}->{proxy} : undef); $self->{-proxy_cli} = $sa if $sa; undef; } sub flags2kw ($$$$) { my ($self, $uri, $uid, $flags) = @_; my $kw = []; for my $f (split(/ /, $flags)) { if (my $k = $IMAPflags2kw{$f}) { push @$kw, $k; } elsif ($f eq "\\Recent") { # not in JMAP } elsif ($f eq "\\Deleted") { # not in JMAP return; } elsif ($self->{verbose}) { warn "# unknown IMAP flag $f <$uri/;UID=$uid>\n"; } } @$kw = sort @$kw; # for LeiSearch->kw_changed and UI/UX purposes $kw; } sub _imap_do_msg ($$$$$) { my ($self, $uri, $uid, $raw, $flags) = @_; # our target audience expects LF-only, save storage $$raw =~ s/\r\n/\n/sg; my $kw = defined($flags) ? (flags2kw($self, $uri, $uid, $flags) // return) : undef; my ($eml_cb, @args) = @{$self->{eml_each}}; $eml_cb->($uri, $uid, $kw, PublicInbox::Eml->new($raw), @args); } sub run_commit_cb ($) { my ($self) = @_; my $cmt_cb_args = $self->{on_commit} or return; my ($cb, @args) = @$cmt_cb_args; $cb->(@args); } sub itrk_last ($$;$$) { my ($self, $uri, $r_uidval, $mic) = @_; return (undef, undef, $r_uidval) unless $self->{incremental}; my ($itrk, $l_uid, $l_uidval); if (defined(my $lms = $self->{-lms_rw})) { # LeiMailSync or 0 $uri->uidvalidity($r_uidval) if defined $r_uidval; if ($mic) { my $auth = $mic->Authmechanism // ''; $uri->auth($auth) if $auth eq 'ANONYMOUS'; my $user = $mic->User; $uri->user($user) if defined($user); } my $x; $l_uid = ($lms && ($x = $lms->location_stats($$uri))) ? $x->{'uid.max'} : undef; # itrk remains undef, lei/store worker writes to # mail_sync.sqlite3 } else { $itrk = PublicInbox::IMAPTracker->new($$uri); ($l_uidval, $l_uid) = $itrk->get_last($$uri); } ($itrk, $l_uid, $l_uidval //= $r_uidval); } # import flags of already-seen messages sub each_old_flags ($$$$) { my ($self, $mic, $uri, $l_uid) = @_; $l_uid ||= 1; my $sec = uri_section($uri); my $bs = ($self->{cfg_opt}->{$sec}->{batch_size} // 1) * 10000; my ($eml_cb, @args) = @{$self->{eml_each}}; $self->{quiet} or warn "# $uri syncing flags 1:$l_uid\n"; for (my $n = 1; $n <= $l_uid; $n += $bs) { my $end = $n + $bs; $end = $l_uid if $end > $l_uid; my $r = $mic->fetch_hash("$n:$end", 'FLAGS'); if (!$r) { return if $!{EINTR} && $self->{quit}; return "E: $uri UID FETCH $n:$end error: $!"; } while (my ($uid, $per_uid) = each %$r) { my $kw = flags2kw($self, $uri, $uid, $per_uid->{FLAGS}) // next; # LeiImport->input_net_cb $eml_cb->($uri, $uid, $kw, undef, @args); } } } # returns true if PERMANENTFLAGS indicates FLAGS of already imported # messages are meaningful sub perm_fl_ok ($) { my ($perm_fl) = @_; return if !defined($perm_fl); for my $f (split(/[ \t]+/, $perm_fl)) { return 1 if $IMAPflags2kw{$f}; } undef; } # may be overridden in NetWriter or Watch sub folder_select { $_[0]->{each_old} ? 'select' : 'examine' } sub _imap_fetch_bodies ($$$$) { my ($self, $mic, $uri, $uids) = @_; my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK'; my $key = $req; $key =~ s/\.PEEK//; my $sec = uri_section($uri); my $mbx = $uri->mailbox; my $bs = $self->{cfg_opt}->{$sec}->{batch_size} // 1; my ($last_uid, $err); my $use_fl = $self->{-use_fl}; while (scalar @$uids) { my @batch = splice(@$uids, 0, $bs); my $batch = join(',', @batch); local $0 = "UID:$batch $mbx $sec"; my $r = $mic->fetch_hash($batch, $req, 'FLAGS'); unless ($r) { # network error? last if $!{EINTR} && $self->{quit}; $err = "E: $uri UID FETCH $batch error: $!"; last; } for my $uid (@batch) { # messages get deleted, so holes appear my $per_uid = delete $r->{$uid} // next; my $raw = delete($per_uid->{$key}) // next; my $fl = $use_fl ? $per_uid->{FLAGS} : undef; _imap_do_msg($self, $uri, $uid, \$raw, $fl); $last_uid = $uid; last if $self->{quit}; } last if $self->{quit}; } ($last_uid, $err); } sub _imap_fetch_all ($$$) { my ($self, $mic, $orig_uri) = @_; my $sec = uri_section($orig_uri); my $mbx = $orig_uri->mailbox; $mic->Clear(1); # trim results history # we need to check for mailbox writability to see if we care about # FLAGS from already-imported messages. my $cmd = $self->folder_select; $mic->$cmd($mbx) or return "E: \U$cmd\E $mbx ($sec) failed: $!"; my ($r_uidval, $r_uidnext, $perm_fl); for ($mic->Results) { /^\* OK \[PERMANENTFLAGS \(([^\)]*)\)\].*/ and $perm_fl = $1; /^\* OK \[UIDVALIDITY ([0-9]+)\].*/ and $r_uidval = $1; /^\* OK \[UIDNEXT ([0-9]+)\].*/ and $r_uidnext = $1; } $r_uidval //= $mic->uidvalidity($mbx) // return "E: $orig_uri cannot get UIDVALIDITY"; $r_uidnext //= $mic->uidnext($mbx) // return "E: $orig_uri cannot get UIDNEXT"; my $expect = $orig_uri->uidvalidity // $r_uidval; return <clone; my $single_uid = $uri->uid; my ($itrk, $l_uid, $l_uidval) = itrk_last($self, $uri, $r_uidval, $mic); if (defined($single_uid)) { $itrk = $l_uid = undef; $uri->uid(undef); # for eml_cb } return <uidvalidity($r_uidval); $l_uid //= 0; my $r_uid = $r_uidnext - 1; return < $r_uid; E: $uri local UID exceeds remote ($l_uid > $r_uid) E: $uri strangely, UIDVALIDLITY matches ($l_uidval) EOF $mic->Uid(1); # the default, we hope my $err; my $use_fl = perm_fl_ok($perm_fl); local $self->{-use_fl} = $use_fl; if (!defined($single_uid) && $self->{each_old} && $use_fl) { $err = each_old_flags($self, $mic, $uri, $l_uid); return $err if $err; } return if $l_uid >= $r_uid; # nothing to do $l_uid ||= 1; my ($mod, $shard) = @{$self->{shard_info} // []}; unless ($self->{quiet}) { my $m = $mod ? " [(UID % $mod) == $shard]" : ''; warn "# $uri fetching UID $l_uid:$r_uid$m\n"; } my $fetch_cb = \&_imap_fetch_bodies; do { # I wish "UID FETCH $START:*" could work, but: # 1) servers do not need to return results in any order # 2) Mail::IMAPClient doesn't offer a streaming API my $uids; if (defined $single_uid) { $uids = [ $single_uid ]; } elsif (!($uids = $mic->search("UID $l_uid:*"))) { return if $!{EINTR} && $self->{quit}; return "E: $uri UID SEARCH $l_uid:* error: $!"; } return if scalar(@$uids) == 0; # RFC 3501 doesn't seem to indicate order of UID SEARCH # responses, so sort it ourselves. Order matters so # IMAPTracker can store the newest UID. @$uids = sort { $a <=> $b } @$uids; # Did we actually get new messages? return if $uids->[0] < $l_uid; $l_uid = $uids->[-1] + 1; # for next search @$uids = grep { ($_ % $mod) == $shard } @$uids if $mod; (my $last_uid, $err) = $fetch_cb->($self, $mic, $uri, $uids); run_commit_cb($self); $itrk->update_last($r_uidval, $last_uid) if $itrk; } until ($err || $self->{quit} || defined($single_uid)); $err; } # uses cached auth info prepared by mic_for sub mic_get { my ($self, $uri) = @_; my $sec = uri_section($uri); # see if caller saved result of imap_common_init my $cached = $self->{mics_cached}; if ($cached) { my $mic = $cached->{$sec}; return $mic if $mic && $mic->IsConnected; delete $cached->{$sec}; } my $mic_arg = $self->{net_arg}->{$sec} or die "BUG: no Mail::IMAPClient->new arg for $sec"; if (defined(my $cb_name = $mic_arg->{Authcallback})) { if (ref($cb_name) ne 'CODE') { $mic_arg->{Authcallback} = $self->can($cb_name); } } my $mic = mic_new($self, $mic_arg, $sec, $uri); $cached //= {}; # invalid placeholder if no cache enabled $mic && $mic->IsConnected ? ($cached->{$sec} = $mic) : undef; } sub imap_each { my ($self, $url, $eml_cb, @args) = @_; my $uri = ref($url) ? $url : PublicInbox::URIimap->new($url); my $sec = uri_section($uri); local $0 = $uri->mailbox." $sec"; my $mic = mic_get($self, $uri); my $err; if ($mic) { local $self->{eml_each} = [ $eml_cb, @args ]; $err = _imap_fetch_all($self, $mic, $uri); } else { $err = "E: <$uri> not connected: $!"; } die $err if $err && $self->{-can_die}; warn $err if $err; $mic; } # may used cached auth info prepared by nn_for once sub nn_get { my ($self, $uri) = @_; my $sec = uri_section($uri); # see if caller saved result of nntp_common_init my $cached = $self->{nn_cached} // {}; my $nn; $nn = delete($cached->{$sec}) and return $nn; my $nn_arg = $self->{net_arg}->{$sec} or die "BUG: no Net::NNTP->new arg for $sec"; my $nntp_cfg = $self->{cfg_opt}->{$sec}; $nn = nn_new($nn_arg, $nntp_cfg, $uri) or return; if (my $postconn = $nntp_cfg->{-postconn}) { for my $m_arg (@$postconn) { my ($method, @args) = @$m_arg; $nn->$method(@args) and next; die "E: <$uri> $method failed\n"; return; } } $nn; } sub _nntp_fetch_all ($$$) { my ($self, $nn, $uri) = @_; my ($group, $num_a, $num_b) = $uri->group; my $sec = uri_section($uri); my ($nr, $beg, $end) = $nn->group($group); unless (defined($nr)) { my $msg = ndump($nn->message); return "E: GROUP $group <$sec> $msg"; } (defined($num_a) && defined($num_b) && $num_a > $num_b) and return "E: $uri: backwards range: $num_a > $num_b"; if (defined($num_a)) { # no article numbers in mail_sync.sqlite3 $uri = $uri->clone; $uri->group($group); } # IMAPTracker is also used for tracking NNTP, UID == article number # LIST.ACTIVE can get the equivalent of UIDVALIDITY, but that's # expensive. So we assume newsgroups don't change: my ($itrk, $l_art) = itrk_last($self, $uri); if (defined($l_art) && !defined($num_a)) { return if $l_art >= $end; # nothing to do $beg = $l_art + 1; } # allow users to specify articles to refetch # cf. https://tools.ietf.org/id/draft-gilman-news-url-01.txt # nntp://example.com/inbox.foo/$num_a-$num_b $beg = $num_a if defined($num_a) && $num_a > $beg && $num_a <= $end; $end = $num_b if defined($num_b) && $num_b >= $beg && $num_b < $end; $end = $beg if defined($num_a) && !defined($num_b); my ($err, $art, $last_art, $kw); # kw stays undef, no keywords in NNTP unless ($self->{quiet}) { warn "# $uri fetching ARTICLE $beg..$end\n"; } my $n = $self->{max_batch}; for ($beg..$end) { last if $self->{quit}; $art = $_; if (--$n < 0) { run_commit_cb($self); $itrk->update_last(0, $last_art) if $itrk; $n = $self->{max_batch}; } my $raw = $nn->article($art); unless (defined($raw)) { my $msg = ndump($nn->message); if ($nn->code == 421) { # pseudo response from Net::Cmd $err = "E: $msg"; last; } else { # probably just a deleted message (spam) warn "W: $msg"; next; } } $raw = join('', @$raw); $raw =~ s/\r\n/\n/sg; my ($eml_cb, @args) = @{$self->{eml_each}}; $eml_cb->($uri, $art, $kw, PublicInbox::Eml->new(\$raw), @args); $last_art = $art; } run_commit_cb($self); $itrk->update_last(0, $last_art) if $itrk; $err; } sub nntp_each { my ($self, $url, $eml_cb, @args) = @_; my $uri = ref($url) ? $url : PublicInbox::URInntps->new($url); my $sec = uri_section($uri); local $0 = $uri->group ." $sec"; my $nn = nn_get($self, $uri); return if $self->{quit}; my $err; if ($nn) { local $self->{eml_each} = [ $eml_cb, @args ]; $err = _nntp_fetch_all($self, $nn, $uri); } else { $err = "E: <$uri> not connected: $!"; } die $err if $err && $self->{-can_die}; warn $err if $err; $nn; } sub new { bless {}, shift }; # updates $uri with UIDVALIDITY sub mic_for_folder { my ($self, $uri) = @_; my $mic = $self->mic_get($uri) or die "E: not connected: $@"; my $m = $self->isa('PublicInbox::NetWriter') ? 'select' : 'examine'; $mic->$m($uri->mailbox) or return; my $uidval; for ($mic->Results) { /^\* OK \[UIDVALIDITY ([0-9]+)\].*/ or next; $uidval = $1; last; } $uidval //= $mic->uidvalidity($uri->mailbox) or die "E: failed to get uidvalidity from <$uri>: $@"; $uri->uidvalidity($uidval); $mic; } 1; public-inbox-1.9.0/lib/PublicInbox/NetWriter.pm000066400000000000000000000035051430031475700213700ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # common writer code for IMAP (and later, JMAP) package PublicInbox::NetWriter; use strict; use v5.10.1; use parent qw(PublicInbox::NetReader); use PublicInbox::Smsg; use PublicInbox::MsgTime qw(msg_timestamp); my %IMAPkw2flags; @IMAPkw2flags{values %PublicInbox::NetReader::IMAPflags2kw} = keys %PublicInbox::NetReader::IMAPflags2kw; sub kw2flags ($) { join(' ', map { $IMAPkw2flags{$_} } @{$_[0]}) } sub imap_append { my ($mic, $folder, $bref, $smsg, $eml) = @_; $bref //= \($eml->as_string); $smsg //= bless {}, 'PublicInbox::Smsg'; bless($smsg, 'PublicInbox::Smsg') if ref($smsg) eq 'HASH'; $smsg->{ts} //= msg_timestamp($eml // PublicInbox::Eml->new($$bref)); my $f = kw2flags($smsg->{kw}); $mic->append_string($folder, $$bref, $f, $smsg->internaldate) or die "APPEND $folder: $@"; } sub folder_select { 'select' } # for PublicInbox::NetReader sub imap_delete_all { my ($self, $uri) = @_; my $mic = $self->mic_for_folder($uri) or return; my $sec = $self->can('uri_section')->($uri); local $0 = $uri->mailbox." $sec"; if ($mic->delete_message('1:*')) { $mic->expunge; } } sub imap_delete_1 { my ($self, $uri, $uid, $delete_mic) = @_; $$delete_mic //= $self->mic_for_folder($uri) or return; $$delete_mic->delete_message($uid); } sub imap_add_kw { my ($self, $mic, $uid, $kw) = @_; $mic->store($uid, '+FLAGS.SILENT', '('.kw2flags($kw).')'); $mic; # caller must ->expunge } sub imap_set_kw { my ($self, $mic, $uid, $kw) = @_; $mic->store($uid, 'FLAGS.SILENT', '('.kw2flags($kw).')'); $mic; # caller must ->expunge } sub can_store_flags { my ($self, $mic) = @_; for ($mic->Results) { /^\* OK \[PERMANENTFLAGS \(([^\)]*)\)\].*/ and return $self->can('perm_fl_ok')->($1); } undef; } 1; public-inbox-1.9.0/lib/PublicInbox/NewsWWW.pm000066400000000000000000000054371430031475700207740ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # # Plack app redirector for mapping /$NEWSGROUP requests to # the appropriate /$INBOX in PublicInbox::WWW because some # auto-linkifiers cannot handle nntp:// redirects properly. # This is also used directly by PublicInbox::WWW package PublicInbox::NewsWWW; use strict; use warnings; use PublicInbox::Config; use PublicInbox::MID qw(mid_escape); use PublicInbox::Hval qw(prurl); sub new { my ($class, $pi_cfg) = @_; bless { pi_cfg => $pi_cfg // PublicInbox::Config->new }, $class; } sub redirect ($$) { my ($code, $url) = @_; [ $code, [ Location => $url, 'Content-Type' => 'text/plain' ], [ "Redirecting to $url\n" ] ] } sub try_inbox { my ($ibx, $arg) = @_; return if scalar(@$arg) > 1; # do not pass $env since HTTP_HOST may differ my $url = $ibx->base_url or return; my ($mid) = @$arg; eval { $ibx->mm->num_for($mid) } or return; # 302 since the same message may show up on # multiple inboxes and inboxes can be added/reordered $arg->[1] = redirect(302, $url .= mid_escape($mid) . '/'); } sub call { my ($self, $env) = @_; # some links may have the article number in them: # /inbox.foo.bar/123456 my (undef, @parts) = split(m!/!, $env->{PATH_INFO}); @parts or return [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ]; my ($ng, $article) = @parts; my $pi_cfg = $self->{pi_cfg}; if (my $ibx = $pi_cfg->lookup_newsgroup($ng)) { my $url = prurl($env, $ibx->{url}); my $code = 301; if (defined $article && $article =~ /\A[0-9]+\z/) { my $mid = eval { $ibx->mm->mid_for($article) }; if (defined $mid) { # article IDs are not stable across clones, # do not encourage caching/bookmarking them $code = 302; $url .= mid_escape($mid) . '/'; } } return redirect($code, $url); } my @try = (join('/', @parts)); # trailing slash is in the rest of our WWW, so maybe some users # will assume it: if ($parts[-1] eq '') { pop @parts; push @try, join('/', @parts); } my $ALL = $pi_cfg->ALL; if (my $over = $ALL ? $ALL->over : undef) { my $by_eidx_key = $pi_cfg->{-by_eidx_key}; for my $mid (@try) { my ($id, $prev); while (my $x = $over->next_by_mid($mid, \$id, \$prev)) { my $xr3 = $over->get_xref3($x->{num}); for (@$xr3) { s/:[0-9]+:$x->{blob}\z// or next; my $ibx = $by_eidx_key->{$_} // next; my $url = $ALL->base_url($env) // $ibx->base_url // next; $url .= mid_escape($mid) . '/'; return redirect(302, $url); } } } } else { # slow path, scan every inbox for my $mid (@try) { my $arg = [ $mid ]; # [1] => result $pi_cfg->each_inbox(\&try_inbox, $arg); return $arg->[1] if $arg->[1]; } } [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ]; } 1; public-inbox-1.9.0/lib/PublicInbox/OnDestroy.pm000066400000000000000000000006171430031475700213740ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ package PublicInbox::OnDestroy; sub new { shift; # ($class, $cb, @args) bless [ @_ ], __PACKAGE__; } sub DESTROY { my ($cb, @args) = @{$_[0]}; if (!ref($cb) && $cb) { my $pid = $cb; return if $pid != $$; $cb = shift @args; } $cb->(@args) if $cb; } 1; public-inbox-1.9.0/lib/PublicInbox/Over.pm000066400000000000000000000240451430031475700203620ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # for XOVER, OVER in NNTP, and feeds/homepage/threads in PSGI # Unlike Msgmap, this is an _UNSTABLE_ database which can be # tweaked/updated over time and rebuilt. package PublicInbox::Over; use strict; use v5.10.1; use DBI qw(:sql_types); # SQL_BLOB use DBD::SQLite; use PublicInbox::Smsg; use Compress::Zlib qw(uncompress); use constant DEFAULT_LIMIT => 1000; sub dbh_new { my ($self, $rw) = @_; my $f = delete $self->{filename}; if (!-s $f) { # SQLite defaults mode to 0644, we want 0666 if ($rw) { require PublicInbox::Syscall; my ($dir) = ($f =~ m!(.+)/[^/]+\z!); PublicInbox::Syscall::nodatacow_dir($dir); open my $fh, '+>>', $f or die "failed to open $f: $!"; } else { $self->{filename} = $f; # die on stat() below: } } my (@st, $st, $dbh); my $tries = 0; do { @st = stat($f) or die "failed to stat $f: $!"; $st = pack('dd', $st[0], $st[1]); # 0: dev, 1: inode $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', { AutoCommit => 1, RaiseError => 1, PrintError => 0, ReadOnly => !$rw, sqlite_use_immediate_transaction => 1, }); $self->{st} = $st; @st = stat($f) or die "failed to stat $f: $!"; $st = pack('dd', $st[0], $st[1]); } while ($st ne $self->{st} && $tries++ < 3); warn "W: $f: .st_dev, .st_ino unstable\n" if $st ne $self->{st}; if ($rw) { # TRUNCATE reduces I/O compared to the default (DELETE). # # Do not use WAL by default since we expect the case # where any users may read via read-only daemons # (-httpd/-imapd/-nntpd); but only a single user has # write permissions for -watch/-mda. # # Read-only WAL support in SQLite 3.22.0 (2018-01-22) # doesn't do what we need: it is only intended for # immutable read-only media (e.g. CD-ROM) and not # usable for our use case described above. # # If an admin is willing to give read-only daemons R/W # permissions; they can enable WAL manually and we will # respect that by not clobbering it. my $jm = $dbh->selectrow_array('PRAGMA journal_mode'); $dbh->do('PRAGMA journal_mode = TRUNCATE') if $jm ne 'wal'; $dbh->do('PRAGMA synchronous = OFF') if $rw > 1; } $dbh; } sub new { my ($class, $f) = @_; bless { filename => $f }, $class; } sub dbh_close { my ($self) = @_; if (my $dbh = delete $self->{dbh}) { delete $self->{-get_art}; $self->{filename} = $dbh->sqlite_db_filename; } } sub dbh ($) { $_[0]->{dbh} //= $_[0]->dbh_new } # dbh_new may be subclassed sub load_from_row ($;$) { my ($smsg, $cull) = @_; bless $smsg, 'PublicInbox::Smsg'; if (defined(my $data = delete $smsg->{ddd})) { $data = uncompress($data); PublicInbox::Smsg::load_from_data($smsg, $data); # saves over 600K for 1000+ message threads PublicInbox::Smsg::psgi_cull($smsg) if $cull; } $smsg } sub do_get { my ($self, $sql, $opts, @args) = @_; my $lim = (($opts->{limit} || 0) + 0) || DEFAULT_LIMIT; $sql .= "LIMIT $lim"; my $msgs = dbh($self)->selectall_arrayref($sql, { Slice => {} }, @args); my $cull = $opts->{cull}; load_from_row($_, $cull) for @$msgs; $msgs } sub query_xover { my ($self, $beg, $end, $opt) = @_; do_get($self, <<'', $opt, $beg, $end); SELECT num,ts,ds,ddd FROM over WHERE num >= ? AND num <= ? ORDER BY num ASC } sub query_ts { my ($self, $ts, $prev) = @_; do_get($self, <<'', {}, $ts, $prev); SELECT num,ddd FROM over WHERE ts >= ? AND num > ? ORDER BY num ASC } sub get_all { my $self = shift; my $nr = scalar(@_) or return []; my $in = '?' . (',?' x ($nr - 1)); do_get($self, <<"", { cull => 1, limit => $nr }, @_); SELECT num,ts,ds,ddd FROM over WHERE num IN ($in) } sub nothing () { wantarray ? (0, []) : [] }; sub get_thread { my ($self, $mid, $prev) = @_; my $dbh = dbh($self); my $opts = { cull => 1 }; my $id = $dbh->selectrow_array(<<'', undef, $mid); SELECT id FROM msgid WHERE mid = ? LIMIT 1 defined $id or return nothing; my $num = $dbh->selectrow_array(<<'', undef, $id); SELECT num FROM id2num WHERE id = ? AND num > 0 ORDER BY num ASC LIMIT 1 defined $num or return nothing; my ($tid, $sid) = $dbh->selectrow_array(<<'', undef, $num); SELECT tid,sid FROM over WHERE num = ? LIMIT 1 defined $tid or return nothing; # $sid may be undef my $cond_all = '(tid = ? OR sid = ?) AND num > ?'; my $sort_col = 'ds'; $num = 0; if ($prev) { # mboxrd stream, only $num = $prev->{num} || 0; $sort_col = 'num'; } my $cols = 'num,ts,ds,ddd'; unless (wantarray) { return do_get($self, <<"", $opts, $tid, $sid, $num); SELECT $cols FROM over WHERE $cond_all ORDER BY $sort_col ASC } # HTML view always wants an array and never uses $prev, # but the mbox stream never wants an array and always has $prev die '$prev not supported with wantarray' if $prev; my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid, $num); SELECT COUNT(num) FROM over WHERE $cond_all # giant thread, prioritize strict (tid) matches and throw # in the loose (sid) matches at the end my $msgs = do_get($self, <<"", $opts, $tid, $num); SELECT $cols FROM over WHERE tid = ? AND num > ? ORDER BY $sort_col ASC # do we have room for loose matches? get the most recent ones, first: my $lim = DEFAULT_LIMIT - scalar(@$msgs); if ($lim > 0) { $opts->{limit} = $lim; my $loose = do_get($self, <<"", $opts, $tid, $sid, $num); SELECT $cols FROM over WHERE tid != ? AND sid = ? AND num > ? ORDER BY $sort_col DESC # TODO separate strict and loose matches here once --reindex # is fixed to preserve `tid' properly push @$msgs, @$loose; } ($nr, $msgs); } # strict `tid' matches, only, for thread-expanded mbox.gz search results # and future CLI interface # returns true if we have IDs, undef if not sub expand_thread { my ($self, $ctx) = @_; my $dbh = dbh($self); do { defined(my $num = $ctx->{ids}->[0]) or return; my ($tid) = $dbh->selectrow_array(<<'', undef, $num); SELECT tid FROM over WHERE num = ? if (defined($tid)) { my $sql = <<''; SELECT num FROM over WHERE tid = ? AND num > ? ORDER BY num ASC LIMIT 1000 my $xids = $dbh->selectcol_arrayref($sql, undef, $tid, $ctx->{prev} // 0); if (scalar(@$xids)) { $ctx->{prev} = $xids->[-1]; $ctx->{xids} = $xids; return 1; # success } } $ctx->{prev} = 0; shift @{$ctx->{ids}}; } while (1); } sub recent { my ($self, $opts, $after, $before) = @_; my ($s, @v); if (defined($before)) { if (defined($after)) { $s = '+num > 0 AND ts >= ? AND ts <= ? ORDER BY ts DESC'; @v = ($after, $before); } else { $s = '+num > 0 AND ts <= ? ORDER BY ts DESC'; @v = ($before); } } else { if (defined($after)) { $s = '+num > 0 AND ts >= ? ORDER BY ts ASC'; @v = ($after); } else { $s = '+num > 0 ORDER BY ts DESC'; } } do_get($self, <<"", $opts, @v); SELECT ts,ds,ddd FROM over WHERE $s } sub get_art { my ($self, $num) = @_; # caching $sth ourselves is faster than prepare_cached my $sth = $self->{-get_art} //= dbh($self)->prepare(<<''); SELECT num,tid,ds,ts,ddd FROM over WHERE num = ? LIMIT 1 $sth->execute($num); my $smsg = $sth->fetchrow_hashref; $smsg ? load_from_row($smsg) : undef; } sub get_xref3 { my ($self, $num, $raw) = @_; my $dbh = dbh($self); my $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT ibx_id,xnum,oidbin FROM xref3 WHERE docid = ? ORDER BY ibx_id,xnum ASC $sth->execute($num); my $rows = $sth->fetchall_arrayref; return $rows if $raw; my $eidx_key_sth = $dbh->prepare_cached(<<'', undef, 1); SELECT eidx_key FROM inboxes WHERE ibx_id = ? for my $r (@$rows) { $eidx_key_sth->execute($r->[0]); my $eidx_key = $eidx_key_sth->fetchrow_array; $eidx_key //= "missing://ibx_id=$r->[0]"; $r = "$eidx_key:$r->[1]:".unpack('H*', $r->[2]); } $rows; } sub next_by_mid { my ($self, $mid, $id, $prev) = @_; my $dbh = dbh($self); unless (defined $$id) { my $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT id FROM msgid WHERE mid = ? LIMIT 1 $sth->execute($mid); $$id = $sth->fetchrow_array; defined $$id or return; } my $sth = $dbh->prepare_cached(<<"", undef, 1); SELECT num FROM id2num WHERE id = ? AND num > ? ORDER BY num ASC LIMIT 1 $$prev ||= 0; $sth->execute($$id, $$prev); my $num = $sth->fetchrow_array or return; $$prev = $num; get_art($self, $num); } # IMAP search, this is limited by callers to UID_SLICE size (50K) sub uid_range { my ($self, $beg, $end, $sql) = @_; my $dbh = dbh($self); my $q = 'SELECT num FROM over WHERE num >= ? AND num <= ?'; # This is read-only, anyways; but caller should verify it's # only sending \A[0-9]+\z for ds and ts column ranges $q .= $$sql if $sql; $q .= ' ORDER BY num ASC'; $dbh->selectcol_arrayref($q, undef, $beg, $end); } sub max { my ($self) = @_; my $sth = dbh($self)->prepare_cached(<<'', undef, 1); SELECT MAX(num) FROM over WHERE num > 0 $sth->execute; $sth->fetchrow_array // 0; } sub imap_exists { my ($self, $uid_base, $uid_end) = @_; my $sth = dbh($self)->prepare_cached(<<'', undef, 1); SELECT COUNT(num) FROM over WHERE num > ? AND num <= ? $sth->execute($uid_base, $uid_end); $sth->fetchrow_array; } sub check_inodes { my ($self) = @_; my $dbh = $self->{dbh} or return; my $f = $dbh->sqlite_db_filename; if (my @st = stat($f)) { # did st_dev, st_ino change? my $st = pack('dd', $st[0], $st[1]); # don't actually reopen, just let {dbh} be recreated later dbh_close($self) if $st ne ($self->{st} // $st); } else { warn "W: stat $f: $!\n"; } } sub oidbin_exists { my ($self, $oidbin) = @_; if (wantarray) { my $sth = $self->dbh->prepare_cached(<<'', undef, 1); SELECT docid FROM xref3 WHERE oidbin = ? ORDER BY docid ASC $sth->bind_param(1, $oidbin, SQL_BLOB); $sth->execute; my $tmp = $sth->fetchall_arrayref; map { $_->[0] } @$tmp; } else { my $sth = $self->dbh->prepare_cached(<<'', undef, 1); SELECT COUNT(*) FROM xref3 WHERE oidbin = ? $sth->bind_param(1, $oidbin, SQL_BLOB); $sth->execute; $sth->fetchrow_array; } } sub blob_exists { oidbin_exists($_[0], pack('H*', $_[1])) } # used by NNTP.pm sub ids_after { my ($self, $num) = @_; my $ids = dbh($self)->selectcol_arrayref(<<'', undef, $$num); SELECT num FROM over WHERE num > ? ORDER BY num ASC LIMIT 1000 $$num = $ids->[-1] if @$ids; $ids; } 1; public-inbox-1.9.0/lib/PublicInbox/OverIdx.pm000066400000000000000000000423411430031475700210260ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ # for XOVER, OVER in NNTP, and feeds/homepage/threads in PSGI # Unlike Msgmap, this is an _UNSTABLE_ cache which can be # tweaked/updated over time and rebuilt. # # Ghost messages (messages which are only referenced in References/In-Reply-To) # are denoted by a negative NNTP article number. package PublicInbox::OverIdx; use strict; use v5.10.1; use parent qw(PublicInbox::Over); use IO::Handle; use DBI qw(:sql_types); # SQL_BLOB use PublicInbox::MID qw/id_compress mids_for_index references/; use PublicInbox::Smsg qw(subject_normalized); use Compress::Zlib qw(compress); use Carp qw(croak); sub dbh_new { my ($self) = @_; my $dbh = $self->SUPER::dbh_new($self->{-no_fsync} ? 2 : 1); # 80000 pages (80MiB on SQLite <3.12.0, 320MiB on 3.12.0+) # was found to be good in 2018 during the large LKML import # at the time. This ought to be configurable based on HW # and inbox size; I suspect it's overkill for many inboxes. $dbh->do('PRAGMA cache_size = 80000'); create_tables($dbh); $dbh; } sub new { my ($class, $f) = @_; my $self = $class->SUPER::new($f); $self->{min_tid} = 0; $self; } sub get_counter ($$) { my ($dbh, $key) = @_; my $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT val FROM counter WHERE key = ? LIMIT 1 $sth->execute($key); $sth->fetchrow_array; } sub adj_counter ($$$) { my ($self, $key, $op) = @_; my $dbh = $self->{dbh}; my $sth = $dbh->prepare_cached(<<""); UPDATE counter SET val = val $op 1 WHERE key = ? $sth->execute($key); get_counter($dbh, $key); } sub next_tid { adj_counter($_[0], 'thread', '+') } sub next_ghost_num { adj_counter($_[0], 'ghost', '-') } sub id_for ($$$$$) { my ($self, $tbl, $id_col, $val_col, $val) = @_; my $dbh = $self->{dbh}; my $in = $dbh->prepare_cached(<<"")->execute($val); INSERT OR IGNORE INTO $tbl ($val_col) VALUES (?) if ($in == 0) { my $sth = $dbh->prepare_cached(<<"", undef, 1); SELECT $id_col FROM $tbl WHERE $val_col = ? LIMIT 1 $sth->execute($val); $sth->fetchrow_array; } else { $dbh->last_insert_id(undef, undef, $tbl, $id_col); } } sub ibx_id { my ($self, $eidx_key) = @_; id_for($self, 'inboxes', 'ibx_id', eidx_key => $eidx_key); } sub sid { my ($self, $path) = @_; return unless defined $path && $path ne ''; id_for($self, 'subject', 'sid', 'path' => $path); } sub mid2id { my ($self, $mid) = @_; id_for($self, 'msgid', 'id', 'mid' => $mid); } sub delete_by_num { my ($self, $num, $tid_ref) = @_; my $dbh = $self->{dbh}; if ($tid_ref) { my $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT tid FROM over WHERE num = ? LIMIT 1 $sth->execute($num); $$tid_ref = $sth->fetchrow_array; # may be undef } foreach (qw(over id2num)) { $dbh->prepare_cached(<<"")->execute($num); DELETE FROM $_ WHERE num = ? } } # this includes ghosts sub each_by_mid { my ($self, $mid, $cols, $cb, @arg) = @_; my $dbh = $self->{dbh}; =over I originally wanted to stuff everything into a single query: SELECT over.* FROM over LEFT JOIN id2num ON over.num = id2num.num LEFT JOIN msgid ON msgid.id = id2num.id WHERE msgid.mid = ? AND over.num >= ? ORDER BY over.num ASC LIMIT 1000 But it's faster broken out (and we're always in a transaction for subroutines in this file) =cut my $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT id FROM msgid WHERE mid = ? LIMIT 1 $sth->execute($mid); my $id = $sth->fetchrow_array; defined $id or return; push(@$cols, 'num'); $cols = join(',', map { $_ } @$cols); my $lim = 10; my $prev = get_counter($dbh, 'ghost'); while (1) { $sth = $dbh->prepare_cached(<<"", undef, 1); SELECT num FROM id2num WHERE id = ? AND num >= ? ORDER BY num ASC LIMIT $lim $sth->execute($id, $prev); my $nums = $sth->fetchall_arrayref; my $nr = scalar(@$nums) or return; $prev = $nums->[-1]->[0]; $sth = $dbh->prepare_cached(<<"", undef, 1); SELECT $cols FROM over WHERE over.num = ? LIMIT 1 foreach (@$nums) { $sth->execute($_->[0]); # $cb may delete rows and invalidate nums my $smsg = $sth->fetchrow_hashref // next; $smsg = PublicInbox::Over::load_from_row($smsg); $cb->($self, $smsg, @arg) or return; } return if $nr != $lim; } } sub _resolve_mid_to_tid { my ($self, $smsg, $tid) = @_; my $cur_tid = $smsg->{tid}; if (defined $$tid) { merge_threads($self, $$tid, $cur_tid); } elsif ($cur_tid > $self->{min_tid}) { $$tid = $cur_tid; } else { # rethreading, queue up dead ghosts $$tid = next_tid($self); my $n = $smsg->{num}; if ($n > 0) { $self->{dbh}->prepare_cached(<<'')->execute($$tid, $n); UPDATE over SET tid = ? WHERE num = ? } elsif ($n < 0) { push(@{$self->{-ghosts_to_delete}}, $n); } } 1; } # this will create a ghost as necessary sub resolve_mid_to_tid { my ($self, $mid) = @_; my $tid; each_by_mid($self, $mid, ['tid'], \&_resolve_mid_to_tid, \$tid); if (my $del = delete $self->{-ghosts_to_delete}) { delete_by_num($self, $_) for @$del; } $tid // do { # create a new ghost my $id = mid2id($self, $mid); my $num = next_ghost_num($self); $num < 0 or die "ghost num is non-negative: $num\n"; $tid = next_tid($self); my $dbh = $self->{dbh}; $dbh->prepare_cached(<<'')->execute($num, $tid); INSERT INTO over (num, tid) VALUES (?,?) $dbh->prepare_cached(<<'')->execute($id, $num); INSERT INTO id2num (id, num) VALUES (?,?) $tid; }; } sub merge_threads { my ($self, $winner_tid, $loser_tid) = @_; return if $winner_tid == $loser_tid; my $dbh = $self->{dbh}; $dbh->prepare_cached(<<'')->execute($winner_tid, $loser_tid); UPDATE over SET tid = ? WHERE tid = ? } sub link_refs { my ($self, $refs, $old_tid) = @_; my $tid; if (@$refs) { # first ref *should* be the thread root, # but we can never trust clients to do the right thing my $ref = $refs->[0]; $tid = resolve_mid_to_tid($self, $ref); merge_threads($self, $tid, $old_tid) if defined $old_tid; # the rest of the refs should point to this tid: foreach my $i (1..$#$refs) { $ref = $refs->[$i]; my $ptid = resolve_mid_to_tid($self, $ref); merge_threads($self, $tid, $ptid); } } else { $tid = $old_tid // next_tid($self); } $tid; } # normalize subjects somewhat, they used to be ASCII-only but now # we use \w for UTF-8 support. We may still drop it entirely and # rely on Xapian for subject matches... sub subject_path ($) { my ($subj) = @_; $subj = subject_normalized($subj); $subj =~ s![^\w\.~/\-]+!_!g; lc($subj); } sub ddd_for ($) { my ($smsg) = @_; my $dd = $smsg->to_doc_data; utf8::encode($dd); compress($dd); } sub add_overview { my ($self, $eml, $smsg) = @_; $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!; my $mids = mids_for_index($eml); my $refs = $smsg->parse_references($eml, $mids); $mids->[0] //= do { $smsg->{mid} //= ''; $eml->{-lei_fake_mid}; }; my $subj = $smsg->{subject}; my $xpath; if ($subj ne '') { $xpath = subject_path($subj); $xpath = id_compress($xpath); } add_over($self, $smsg, $mids, $refs, $xpath, ddd_for($smsg)); } sub _add_over { my ($self, $smsg, $mid, $refs, $old_tid, $v) = @_; my $cur_tid = $smsg->{tid}; my $n = $smsg->{num}; die "num must not be zero for $mid" if !$n; my $cur_valid = $cur_tid > $self->{min_tid}; if ($n > 0) { # regular mail if ($cur_valid) { $$old_tid //= $cur_tid; merge_threads($self, $$old_tid, $cur_tid); } else { $$old_tid //= next_tid($self); } } elsif ($n < 0) { # ghost $$old_tid //= $cur_valid ? $cur_tid : next_tid($self); $$old_tid = link_refs($self, $refs, $$old_tid); delete_by_num($self, $n); $$v++; } 1; } sub add_over { my ($self, $smsg, $mids, $refs, $xpath, $ddd) = @_; my $old_tid; my $vivified = 0; my $num = $smsg->{num}; begin_lazy($self); delete_by_num($self, $num, \$old_tid); $old_tid = undef if ($old_tid // 0) <= $self->{min_tid}; foreach my $mid (@$mids) { my $v = 0; each_by_mid($self, $mid, ['tid'], \&_add_over, $mid, $refs, \$old_tid, \$v); $v > 1 and warn "BUG: vivified multiple ($v) ghosts for $mid\n"; $vivified += $v; } $smsg->{tid} = $vivified ? $old_tid : link_refs($self, $refs, $old_tid); $smsg->{sid} = sid($self, $xpath); my $dbh = $self->{dbh}; my $sth = $dbh->prepare_cached(<<''); INSERT INTO over (num, tid, sid, ts, ds, ddd) VALUES (?,?,?,?,?,?) my $nc = 1; $sth->bind_param($nc, $num); $sth->bind_param(++$nc, $smsg->{$_}) for (qw(tid sid ts ds)); $sth->bind_param(++$nc, $ddd, SQL_BLOB); $sth->execute; $sth = $dbh->prepare_cached(<<''); INSERT INTO id2num (id, num) VALUES (?,?) foreach my $mid (@$mids) { my $id = mid2id($self, $mid); $sth->execute($id, $num); } } sub _remove_oid { my ($self, $smsg, $oid, $removed) = @_; if (!defined($oid) || $smsg->{blob} eq $oid) { delete_by_num($self, $smsg->{num}); push @$removed, $smsg->{num}; } 1; } # returns number of removed messages in scalar context, # array of removed article numbers in array context. # $oid may be undef to match only on $mid sub remove_oid { my ($self, $oid, $mid) = @_; my $removed = []; begin_lazy($self); each_by_mid($self, $mid, ['ddd'], \&_remove_oid, $oid, $removed); @$removed; } sub _num_mid0_for_oid { my ($self, $smsg, $oid, $res) = @_; my $blob = $smsg->{blob}; return 1 if (!defined($blob) || $blob ne $oid); # continue; @$res = ($smsg->{num}, $smsg->{mid}); 0; # done } sub num_mid0_for_oid { my ($self, $oid, $mid) = @_; my $res = []; begin_lazy($self); each_by_mid($self, $mid, ['ddd'], \&_num_mid0_for_oid, $oid, $res); @$res, # ($num, $mid0); } sub create_tables { my ($dbh) = @_; $dbh->do(<<''); CREATE TABLE IF NOT EXISTS over ( num INTEGER PRIMARY KEY NOT NULL, /* NNTP article number == IMAP UID */ tid INTEGER NOT NULL, /* THREADID (IMAP REFERENCES threading, JMAP) */ sid INTEGER, /* Subject ID (IMAP ORDEREDSUBJECT "threading") */ ts INTEGER, /* IMAP INTERNALDATE (Received: header, git commit time) */ ds INTEGER, /* RFC-2822 sent Date: header, git author time */ ddd VARBINARY /* doc-data-deflated (->to_doc_data, ->load_from_data) */ ) $dbh->do('CREATE INDEX IF NOT EXISTS idx_tid ON over (tid)'); $dbh->do('CREATE INDEX IF NOT EXISTS idx_sid ON over (sid)'); $dbh->do('CREATE INDEX IF NOT EXISTS idx_ts ON over (ts)'); $dbh->do('CREATE INDEX IF NOT EXISTS idx_ds ON over (ds)'); $dbh->do(<<''); CREATE TABLE IF NOT EXISTS counter ( key VARCHAR(8) PRIMARY KEY NOT NULL, val INTEGER DEFAULT 0, UNIQUE (key) ) $dbh->do("INSERT OR IGNORE INTO counter (key) VALUES ('thread')"); $dbh->do("INSERT OR IGNORE INTO counter (key) VALUES ('ghost')"); $dbh->do(<<''); CREATE TABLE IF NOT EXISTS subject ( sid INTEGER PRIMARY KEY AUTOINCREMENT, path VARCHAR(40) NOT NULL, /* SHA-1 of normalized subject */ UNIQUE (path) ) $dbh->do(<<''); CREATE TABLE IF NOT EXISTS id2num ( id INTEGER NOT NULL, /* <=> msgid.id */ num INTEGER NOT NULL, UNIQUE (id, num) ) # performance critical: $dbh->do('CREATE INDEX IF NOT EXISTS idx_inum ON id2num (num)'); $dbh->do('CREATE INDEX IF NOT EXISTS idx_id ON id2num (id)'); $dbh->do(<<''); CREATE TABLE IF NOT EXISTS msgid ( id INTEGER PRIMARY KEY AUTOINCREMENT, /* <=> id2num.id */ mid VARCHAR(244) NOT NULL, UNIQUE (mid) ) } sub commit_lazy { my ($self) = @_; delete $self->{txn} or return; $self->{dbh}->commit; eval { $self->{dbh}->do('PRAGMA optimize') }; } sub begin_lazy { my ($self) = @_; return if $self->{txn}; my $dbh = $self->dbh or return; $dbh->begin_work; # $dbh->{Profile} = 2; $self->{txn} = 1; } sub rollback_lazy { my ($self) = @_; delete $self->{txn} or return; $self->{dbh}->rollback; } sub dbh_close { my ($self) = @_; die "in transaction" if $self->{txn}; $self->SUPER::dbh_close; } sub create { my ($self) = @_; my $fn = $self->{filename} // do { croak('BUG: no {filename}') unless $self->{dbh}; return; }; unless (-r $fn) { require File::Path; my ($dir) = ($fn =~ m!(.*?/)[^/]+\z!); File::Path::mkpath($dir); } # create the DB: PublicInbox::Over::dbh($self); $self->dbh_close; } sub rethread_prepare { my ($self, $opt) = @_; return unless $opt->{rethread}; begin_lazy($self); my $min = $self->{min_tid} = get_counter($self->{dbh}, 'thread') // 0; my $pr = $opt->{-progress}; $pr->("rethread min THREADID ".($min + 1)."\n") if $pr && $min; } sub rethread_done { my ($self, $opt) = @_; return unless $opt->{rethread} && $self->{txn}; defined(my $min = $self->{min_tid}) or croak('BUG: no min_tid'); my $dbh = $self->{dbh} or croak('BUG: no dbh'); my $rows = $dbh->selectall_arrayref(<<'', { Slice => {} }, $min); SELECT num,tid FROM over WHERE num < 0 AND tid < ? my $show_id = $dbh->prepare('SELECT id FROM id2num WHERE num = ?'); my $show_mid = $dbh->prepare('SELECT mid FROM msgid WHERE id = ?'); my $pr = $opt->{-progress}; my $total = 0; for my $r (@$rows) { my $exp = 0; $show_id->execute($r->{num}); while (defined(my $id = $show_id->fetchrow_array)) { ++$exp; $show_mid->execute($id); my $mid = $show_mid->fetchrow_array; if (!defined($mid)) { warn <{num} ID=$id THREADID=$r->{tid} has no Message-ID EOF next; } $pr->(<{num} <$mid> THREADID=$r->{tid} culled EOM } delete_by_num($self, $r->{num}); } $pr->("I: rethread culled $total ghosts\n") if $pr && $total; } # used for cross-inbox search sub eidx_prep ($) { my ($self) = @_; $self->{-eidx_prep} //= do { my $dbh = $self->dbh; $dbh->do(<<''); INSERT OR IGNORE INTO counter (key) VALUES ('eidx_docid') $dbh->do(<<''); CREATE TABLE IF NOT EXISTS inboxes ( ibx_id INTEGER PRIMARY KEY AUTOINCREMENT, eidx_key VARCHAR(255) NOT NULL, /* {newsgroup} // {inboxdir} */ UNIQUE (eidx_key) ) $dbh->do(<<''); CREATE TABLE IF NOT EXISTS xref3 ( docid INTEGER NOT NULL, /* <=> over.num */ ibx_id INTEGER NOT NULL, /* <=> inboxes.ibx_id */ xnum INTEGER NOT NULL, /* NNTP article number in ibx */ oidbin VARBINARY NOT NULL, /* 20-byte SHA-1 or 32-byte SHA-256 */ UNIQUE (docid, ibx_id, xnum, oidbin) ) $dbh->do('CREATE INDEX IF NOT EXISTS idx_docid ON xref3 (docid)'); # performance critical, this is not UNIQUE since we may need to # tolerate some old bugs from indexing mirrors. n.b. we used # to index oidbin here, but leaving it out speeds up reindexing # and "XHDR Xref <$MSGID>" isn't any slower w/o oidbin $dbh->do('CREATE INDEX IF NOT EXISTS idx_reindex ON '. 'xref3 (xnum,ibx_id)'); $dbh->do('CREATE INDEX IF NOT EXISTS idx_oidbin ON xref3 (oidbin)'); $dbh->do(<<''); CREATE TABLE IF NOT EXISTS eidx_meta ( key VARCHAR(255) PRIMARY KEY, val VARCHAR(255) NOT NULL ) # A queue of current docids which need reindexing. # eidxq persists across aborted -extindex invocations # Currently used for "-extindex --reindex" for Xapian # data, but may be used in more places down the line. $dbh->do(<<''); CREATE TABLE IF NOT EXISTS eidxq (docid INTEGER PRIMARY KEY NOT NULL) 1; }; } sub eidx_meta { # requires transaction my ($self, $key, $val) = @_; my $sql = 'SELECT val FROM eidx_meta WHERE key = ? LIMIT 1'; my $dbh = $self->{dbh}; defined($val) or return $dbh->selectrow_array($sql, undef, $key); my $prev = $dbh->selectrow_array($sql, undef, $key); if (defined $prev) { $sql = 'UPDATE eidx_meta SET val = ? WHERE key = ?'; $dbh->do($sql, undef, $val, $key); } else { $sql = 'INSERT INTO eidx_meta (key,val) VALUES (?,?)'; $dbh->do($sql, undef, $key, $val); } $prev; } sub eidx_max { my ($self) = @_; get_counter($self->{dbh}, 'eidx_docid'); } sub add_xref3 { my ($self, $docid, $xnum, $oidhex, $eidx_key) = @_; begin_lazy($self); my $ibx_id = ibx_id($self, $eidx_key); my $oidbin = pack('H*', $oidhex); my $sth = $self->{dbh}->prepare_cached(<<''); INSERT OR IGNORE INTO xref3 (docid, ibx_id, xnum, oidbin) VALUES (?, ?, ?, ?) $sth->bind_param(1, $docid); $sth->bind_param(2, $ibx_id); $sth->bind_param(3, $xnum); $sth->bind_param(4, $oidbin, SQL_BLOB); $sth->execute; } # for when an xref3 goes missing, this does NOT update {ts} sub update_blob { my ($self, $smsg, $oidhex) = @_; my $sth = $self->{dbh}->prepare(<<''); UPDATE over SET ddd = ? WHERE num = ? $smsg->{blob} = $oidhex; $sth->bind_param(1, ddd_for($smsg), SQL_BLOB); $sth->bind_param(2, $smsg->{num}); $sth->execute; } sub merge_xref3 { # used for "-extindex --dedupe" my ($self, $keep_docid, $drop_docid, $oidbin) = @_; my $sth = $self->{dbh}->prepare_cached(<<''); UPDATE OR IGNORE xref3 SET docid = ? WHERE docid = ? AND oidbin = ? $sth->bind_param(1, $keep_docid); $sth->bind_param(2, $drop_docid); $sth->bind_param(3, $oidbin, SQL_BLOB); $sth->execute; # drop anything that conflicted $sth = $self->{dbh}->prepare_cached(<<''); DELETE FROM xref3 WHERE docid = ? AND oidbin = ? $sth->bind_param(1, $drop_docid); $sth->bind_param(2, $oidbin, SQL_BLOB); $sth->execute; } sub eidxq_add { my ($self, $docid) = @_; $self->dbh->prepare_cached(<<'')->execute($docid); INSERT OR IGNORE INTO eidxq (docid) VALUES (?) } sub eidxq_del { my ($self, $docid) = @_; $self->dbh->prepare_cached(<<'')->execute($docid); DELETE FROM eidxq WHERE docid = ? } # returns true if we're vivifying a message for lei/store that was # previously external-metadata only sub vivify_xvmd { my ($self, $smsg) = @_; my @docids = $self->blob_exists($smsg->{blob}); my @vivify_xvmd; for my $id (@docids) { if (my $cur = $self->get_art($id)) { # already indexed if bytes > 0 return if $cur->{bytes} > 0; push @vivify_xvmd, $id; } else { warn "W: $smsg->{blob} #$id gone (bug?)\n"; } } $smsg->{-vivify_xvmd} = \@vivify_xvmd; } 1; public-inbox-1.9.0/lib/PublicInbox/POP3.pm000066400000000000000000000304541430031475700201710ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # # Each instance of this represents a POP3 client connected to # public-inbox-{netd,pop3d}. Much of this was taken from IMAP.pm and NNTP.pm # # POP3 is one mailbox per-user, so the "USER" command is like the # format of -imapd and is mapped to $NEWSGROUP.$SLICE (large inboxes # are sliced into 50K mailboxes in both POP3 and IMAP to avoid overloading # clients) # # Unlike IMAP, the "$NEWSGROUP" mailbox (without $SLICE) is a rolling # window of the latest messages. We can do this for POP3 since the # typical POP3 session is short-lived while long-lived IMAP sessions # would cause slices to grow on the server side without bounds. # # Like IMAP, POP3 also has per-session message sequence numbers (MSN), # which require mapping to UIDs. The offset of an entry into our # per-client cache is: (MSN-1) # # fields: # - uuid - 16-byte (binary) UUID representation (before successful login) # - cache - one-dimentional arrayref of (UID, bytesize, oidhex) # - nr_dele - number of deleted messages # - expire - string of packed unsigned short offsets # - user_id - user-ID mapped to UUID (on successful login + lock) # - txn_max_uid - for storing max deleted UID persistently # - ibx - PublicInbox::Inbox object # - slice - unsigned integer slice number (0..Inf), -1 => latest # - salt - pre-auth for APOP # - uid_dele - maximum deleted from previous session at login (NNTP ARTICLE) # - uid_base - base UID for mailbox slice (0-based) (same as IMAP) package PublicInbox::POP3; use v5.12; use parent qw(PublicInbox::DS); use PublicInbox::GitAsyncCat; use PublicInbox::DS qw(now); use Errno qw(EAGAIN); use Digest::MD5 qw(md5); use PublicInbox::IMAP; # for UID slice stuff use constant { LINE_MAX => 512, # XXX unsure }; # XXX FIXME: duplicated stuff from NNTP.pm and IMAP.pm sub out ($$;@) { my ($self, $fmt, @args) = @_; printf { $self->{pop3d}->{out} } $fmt."\n", @args; } sub do_greet { my ($self) = @_; my $s = $self->{salt} = sprintf('%x.%x', int(rand(0x7fffffff)), time); $self->write("+OK POP3 server ready <$s\@public-inbox>\r\n"); } sub new { my ($cls, $sock, $pop3d) = @_; (bless { pop3d => $pop3d }, $cls)->greet($sock) } # POP user is $UUID1@$NEWSGROUP.$SLICE sub cmd_user ($$) { my ($self, $mailbox) = @_; $self->{salt} // return \"-ERR already authed\r\n"; $mailbox =~ s/\A([a-f0-9\-]+)\@//i or return \"-ERR no UUID@ in mailbox name\r\n"; my $user = $1; $user =~ tr/-//d; # most have dashes, some (dbus-uuidgen) don't $user =~ m!\A[a-f0-9]{32}\z!i or return \"-ERR user has no UUID\r\n"; my $slice; $mailbox =~ s/\.([0-9]+)\z// and $slice = $1 + 0; my $ibx = $self->{pop3d}->{pi_cfg}->lookup_newsgroup($mailbox) // return \"-ERR $mailbox does not exist\r\n"; my $uidmax = $ibx->mm(1)->num_highwater // 0; if (defined $slice) { my $max = int($uidmax / PublicInbox::IMAP::UID_SLICE); my $tip = "$mailbox.$max"; return \"-ERR $mailbox.$slice does not exist ($tip does)\r\n" if $slice > $max; $self->{uid_base} = $slice * PublicInbox::IMAP::UID_SLICE; $self->{slice} = $slice; } else { # latest 50K messages my $base = $uidmax - PublicInbox::IMAP::UID_SLICE; $self->{uid_base} = $base < 0 ? 0 : $base; $self->{slice} = -1; } $self->{ibx} = $ibx; $self->{uuid} = pack('H*', $user); # deleted by _login_ok $slice //= '(latest)'; \"+OK $ibx->{newsgroup} slice=$slice selected\r\n"; } sub _login_ok ($) { my ($self) = @_; if ($self->{pop3d}->lock_mailbox($self)) { $self->{uid_max} = $self->{ibx}->over(1)->max; \"+OK logged in\r\n"; } else { \"-ERR [IN-USE] unable to lock maildrop\r\n"; } } sub cmd_apop { my ($self, $mailbox, $hex) = @_; my $res = cmd_user($self, $mailbox); # sets {uuid} return $res if substr($$res, 0, 1) eq '-'; my $s = delete($self->{salt}) // die 'BUG: salt missing'; return _login_ok($self) if md5("<$s\@public-inbox>anonymous") eq pack('H*', $hex); $self->{salt} = $s; \"-ERR APOP password mismatch\r\n"; } sub cmd_pass { my ($self, $pass) = @_; $self->{ibx} // return \"-ERR mailbox unspecified\r\n"; my $s = delete($self->{salt}) // return \"-ERR already authed\r\n"; return _login_ok($self) if $pass eq 'anonymous'; $self->{salt} = $s; \"-ERR password is not `anonymous'\r\n"; } sub cmd_stls { my ($self) = @_; ($self->{sock} // return)->can('stop_SSL') and return \"-ERR TLS already enabled\r\n"; $self->{pop3d}->{ssl_ctx_opt} or return \"-ERR can't start TLS negotiation\r\n"; $self->write(\"+OK begin TLS negotiation now\r\n"); PublicInbox::TLS::start($self->{sock}, $self->{pop3d}); $self->requeue if PublicInbox::DS::accept_tls_step($self); undef; } sub need_txn ($) { exists($_[0]->{salt}) ? \"-ERR not in TRANSACTION\r\n" : undef; } sub _stat_cache ($) { my ($self) = @_; my ($beg, $end) = (($self->{uid_dele} // -1) + 1, $self->{uid_max}); PublicInbox::IMAP::uid_clamp($self, \$beg, \$end); my (@cache, $m); my $sth = $self->{ibx}->over(1)->dbh->prepare_cached(<<'', undef, 1); SELECT num,ddd FROM over WHERE num >= ? AND num <= ? ORDER BY num ASC $sth->execute($beg, $end); my $tot = 0; while (defined($m = $sth->fetchall_arrayref({}, 1000))) { for my $x (@$m) { PublicInbox::Over::load_from_row($x); push(@cache, $x->{num}, $x->{bytes} + 0, $x->{blob}); undef $x; # saves ~1.5M memory w/ 50k messages $tot += $cache[-2]; } } $self->{total_bytes} = $tot; $self->{cache} = \@cache; } sub cmd_stat { my ($self) = @_; my $err; $err = need_txn($self) and return $err; my $cache = $self->{cache} // _stat_cache($self); my $nr = @$cache / 3 - ($self->{nr_dele} // 0); "+OK $nr $self->{total_bytes}\r\n"; } # for LIST and UIDL sub _list { my ($desc, $idx, $self, $msn) = @_; my $err; $err = need_txn($self) and return $err; my $cache = $self->{cache} // _stat_cache($self); if (defined $msn) { my $base_off = ($msn - 1) * 3; my $val = $cache->[$base_off + $idx] // return \"-ERR no such message\r\n"; "+OK $desc listing follows\r\n$msn $val\r\n.\r\n"; } else { # always +OK, even if no messages my $res = "+OK $desc listing follows\r\n"; my $msn = 0; for (my $i = 0; $i < scalar(@$cache); $i += 3) { ++$msn; defined($cache->[$i]) and $res .= "$msn $cache->[$i + $idx]\r\n"; } $res .= ".\r\n"; } } sub cmd_list { _list('scan', 1, @_) } sub cmd_uidl { _list('unique-id', 2, @_) } sub mark_dele ($$) { my ($self, $off) = @_; my $base_off = $off * 3; my $cache = $self->{cache}; my $uid = $cache->[$base_off] // return; # already deleted my $old = $self->{txn_max_uid} //= $uid; $self->{txn_max_uid} = $uid if $uid > $old; $self->{total_bytes} -= $cache->[$base_off + 1]; $cache->[$base_off] = undef; # clobber UID $cache->[$base_off + 1] = undef; # clobber bytes $cache->[$base_off + 2] = undef; # clobber oidhex ++$self->{nr_dele}; } sub retr_cb { # called by git->cat_async via ibx_async_cat my ($bref, $oid, $type, $size, $args) = @_; my ($self, $off, $top_nr) = @$args; my $hex = $self->{cache}->[$off * 3 + 2] // die "BUG: no hex (oid=$oid)"; if (!defined($oid)) { # it's possible to have TOCTOU if an admin runs # public-inbox-(edit|purge), just move onto the next message warn "E: $hex missing in $self->{ibx}->{inboxdir}\n"; $self->write(\"-ERR no such message\r\n"); return $self->requeue; } elsif ($hex ne $oid) { $self->close; die "BUG: $hex != $oid"; } PublicInbox::IMAP::to_crlf_full($bref); if (defined $top_nr) { my ($hdr, $bdy) = split(/\r\n\r\n/, $$bref, 2); $bref = \$hdr; $hdr .= "\r\n\r\n"; my @tmp = split(/^/m, $bdy); $hdr .= join('', splice(@tmp, 0, $top_nr)); } elsif (exists $self->{expire}) { $self->{expire} .= pack('S', $off); } $$bref =~ s/^\./../gms; $$bref .= substr($$bref, -2, 2) eq "\r\n" ? ".\r\n" : "\r\n.\r\n"; $self->msg_more("+OK message follows\r\n"); $self->write($bref); $self->requeue; } sub cmd_retr { my ($self, $msn, $top_nr) = @_; return \"-ERR lines must be a non-negative number\r\n" if (defined($top_nr) && $top_nr !~ /\A[0-9]+\z/); my $err; $err = need_txn($self) and return $err; my $cache = $self->{cache} // _stat_cache($self); my $off = $msn - 1; my $hex = $cache->[$off * 3 + 2] // return \"-ERR no such message\r\n"; ${ibx_async_cat($self->{ibx}, $hex, \&retr_cb, [ $self, $off, $top_nr ])}; } sub cmd_noop { $_[0]->write(\"+OK\r\n") } sub cmd_rset { my ($self) = @_; my $err; $err = need_txn($self) and return $err; delete $self->{cache}; delete $self->{txn_max_uid}; \"+OK\r\n"; } sub cmd_dele { my ($self, $msn) = @_; my $err; $err = need_txn($self) and return $err; $self->{cache} // _stat_cache($self); $msn =~ /\A[1-9][0-9]*\z/ or return \"-ERR no such message\r\n"; mark_dele($self, $msn - 1) ? \"+OK\r\n" : \"-ERR no such message\r\n"; } # RFC 2449 sub cmd_capa { my ($self) = @_; my $STLS = !$self->{ibx} && !$self->{sock}->can('stop_SSL') && $self->{pop3d}->{ssl_ctx_opt} ? "\nSTLS\r" : ''; $self->{expire} = ''; # "EXPIRE 0" allows clients to avoid DELE commands <{pop3d}->unlock_mailbox($self); $self->SUPER::close; } # must be called inside a state_dbh transaction with flock held sub __cleanup_state { my ($self, $txn_id) = @_; my $user_id = $self->{user_id} // die 'BUG: no {user_id}'; $self->{pop3d}->{-state_dbh}->prepare_cached(<<'')->execute($txn_id); DELETE FROM deletes WHERE txn_id = ? AND uid_dele = -1 my $sth = $self->{pop3d}->{-state_dbh}->prepare_cached(<<'', undef, 1); SELECT COUNT(*) FROM deletes WHERE user_id = ? $sth->execute($user_id); my $nr = $sth->fetchrow_array; if ($nr == 0) { $sth = $self->{pop3d}->{-state_dbh}->prepare_cached(<<''); DELETE FROM users WHERE user_id = ? $sth->execute($user_id); } $nr; } sub cmd_quit { my ($self) = @_; if (defined(my $txn_id = $self->{txn_id})) { my $user_id = $self->{user_id} // die 'BUG: no {user_id}'; if (my $exp = delete $self->{expire}) { mark_dele($self, $_) for unpack('S*', $exp); } my $keep = 1; my $dbh = $self->{pop3d}->{-state_dbh}; my $lk = $self->{pop3d}->lock_for_scope; $dbh->begin_work; if (defined(my $max = $self->{txn_max_uid})) { $dbh->prepare_cached(<<'')->execute($max, $txn_id, $max) UPDATE deletes SET uid_dele = ? WHERE txn_id = ? AND uid_dele < ? } else { $keep = $self->__cleanup_state($txn_id); } $dbh->prepare_cached(<<'')->execute(time, $user_id) if $keep; UPDATE users SET last_seen = ? WHERE user_id = ? $dbh->commit; # we MUST do txn_id F_UNLCK here inside ->lock_for_scope: $self->{did_quit} = 1; $self->{pop3d}->unlock_mailbox($self); } $self->write(\"+OK public-inbox POP3 server signing off\r\n"); $self->close; undef; } # returns 1 if we can continue, 0 if not due to buffered writes or disconnect sub process_line ($$) { my ($self, $l) = @_; my ($req, @args) = split(/[ \t]+/, $l); return 1 unless defined($req); # skip blank line $req = $self->can('cmd_'.lc($req)); my $res = $req ? eval { $req->($self, @args) } : \"-ERR command not recognized\r\n"; my $err = $@; if ($err && $self->{sock}) { $l =~ s/\r?\n//s; warn("error from: $l ($err)\n"); $res = \"-ERR program fault - command not performed\r\n"; } defined($res) ? $self->write($res) : 0; } # callback used by PublicInbox::DS for any (e)poll (in/out/hup/err) sub event_step { my ($self) = @_; local $SIG{__WARN__} = $self->{pop3d}->{warn_cb}; return unless $self->flush_write && $self->{sock} && !$self->{long_cb}; # only read more requests if we've drained the write buffer, # otherwise we can be buffering infinitely w/o backpressure my $rbuf = $self->{rbuf} // \(my $x = ''); my $line = index($$rbuf, "\n"); while ($line < 0) { return $self->close if length($$rbuf) >= LINE_MAX; $self->do_read($rbuf, LINE_MAX, length($$rbuf)) or return; $line = index($$rbuf, "\n"); } $line = substr($$rbuf, 0, $line + 1, ''); $line =~ s/\r?\n\z//s; return $self->close if $line =~ /[[:cntrl:]]/s; my $t0 = now(); my $fd = fileno($self->{sock}); # may become invalid after process_line my $r = eval { process_line($self, $line) }; my $pending = $self->{wbuf} ? ' pending' : ''; out($self, "[$fd] %s - %0.6f$pending - $r", $line, now() - $t0); return $self->close if $r < 0; $self->rbuf_idle($rbuf); # maybe there's more pipelined data, or we'll have # to register it for socket-readiness notifications $self->requeue unless $pending; } no warnings 'once'; *cmd_top = \&cmd_retr; 1; public-inbox-1.9.0/lib/PublicInbox/POP3D.pm000066400000000000000000000172261430031475700202770ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # represents an POP3D package PublicInbox::POP3D; use v5.12; use parent qw(PublicInbox::Lock); use DBI qw(:sql_types); # SQL_BLOB use Carp (); use File::Temp 0.19 (); # 0.19 for ->newdir use PublicInbox::Config; use PublicInbox::POP3; use PublicInbox::Syscall; use File::Temp 0.19 (); # 0.19 for ->newdir use Fcntl qw(F_SETLK F_UNLCK F_WRLCK SEEK_SET); my @FLOCK; if ($^O eq 'linux' || $^O =~ /bsd/) { require Config; my $off_t; my $sz = $Config::Config{lseeksize}; if ($sz == 8 && eval('length(pack("q", 1)) == 8')) { $off_t = 'q' } elsif ($sz == 4) { $off_t = 'l' } else { warn "sizeof(off_t)=$sz requires File::FcntlLock\n" } if (defined($off_t)) { if ($^O eq 'linux') { @FLOCK = ("ss\@8$off_t$off_t\@32", qw(l_type l_whence l_start l_len)); } elsif ($^O =~ /bsd/) { @FLOCK = ("${off_t}${off_t}lss\@256", qw(l_start l_len l_pid l_type l_whence)); } } } @FLOCK or eval { require File::FcntlLock } or die "File::FcntlLock required for POP3 on $^O: $@\n"; sub new { my ($cls) = @_; bless { err => \*STDERR, out => \*STDOUT, # pi_cfg => PublicInbox::Config # lock_path => ... # interprocess lock is the $pop3state/txn.locks file # txn_locks => {}, # intraworker locks # ssl_ctx_opt => { SSL_cert_file => ..., SSL_key_file => ... } }, $cls; } sub refresh_groups { # PublicInbox::Daemon callback my ($self, $sig) = @_; # TODO share pi_cfg with nntpd/imapd inside -netd my $new = PublicInbox::Config->new; my $d = $new->{'publicinbox.pop3state'} // die "publicinbox.pop3state undefined ($new->{-f})\n"; -d $d or do { require File::Path; File::Path::make_path($d, { mode => 0700 }); PublicInbox::Syscall::nodatacow_dir($d); }; $self->{lock_path} //= "$d/db.lock"; if (my $old = $self->{pi_cfg}) { my $s = 'publicinbox.pop3state'; $new->{$s} //= $old->{$s}; return warn <{$s} ne $old->{$s}; $s changed: `$old->{$s}' => `$new->{$s}', config reload ignored EOM } $self->{pi_cfg} = $new; } # persistent tables sub create_state_tables ($$) { my ($self, $dbh) = @_; $dbh->do(<<''); # map publicinbox..newsgroup to integers CREATE TABLE IF NOT EXISTS newsgroups ( newsgroup_id INTEGER PRIMARY KEY NOT NULL, newsgroup VARBINARY NOT NULL, UNIQUE (newsgroup) ) # the $NEWSGROUP_NAME.$SLICE_INDEX is part of the POP3 username; # POP3 has no concept of folders/mailboxes like IMAP/JMAP $dbh->do(<<''); CREATE TABLE IF NOT EXISTS mailboxes ( mailbox_id INTEGER PRIMARY KEY NOT NULL, newsgroup_id INTEGER NOT NULL REFERENCES newsgroups, slice INTEGER NOT NULL, /* -1 for most recent slice */ UNIQUE (newsgroup_id, slice) ) $dbh->do(<<''); # actual users are differentiated by their UUID CREATE TABLE IF NOT EXISTS users ( user_id INTEGER PRIMARY KEY NOT NULL, uuid VARBINARY NOT NULL, last_seen INTEGER NOT NULL, /* to expire idle accounts */ UNIQUE (uuid) ) # we only track the highest-numbered deleted message per-UUID@mailbox $dbh->do(<<''); CREATE TABLE IF NOT EXISTS deletes ( txn_id INTEGER PRIMARY KEY NOT NULL, /* -1 == txn lock offset */ user_id INTEGER NOT NULL REFERENCES users, mailbox_id INTEGER NOT NULL REFERENCES mailboxes, uid_dele INTEGER NOT NULL DEFAULT -1, /* IMAP UID, NNTP article */ UNIQUE(user_id, mailbox_id) ) } sub state_dbh_new { my ($self) = @_; my $f = "$self->{pi_cfg}->{'publicinbox.pop3state'}/db.sqlite3"; my $creat = !-s $f; if ($creat) { open my $fh, '+>>', $f or Carp::croak "open($f): $!"; PublicInbox::Syscall::nodatacow_fh($fh); } my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', { AutoCommit => 1, RaiseError => 1, PrintError => 0, sqlite_use_immediate_transaction => 1, sqlite_see_if_its_a_number => 1, }); $dbh->do('PRAGMA journal_mode = WAL') if $creat; $dbh->do('PRAGMA foreign_keys = ON'); # don't forget this # ensure the interprocess fcntl lock file exists $f = "$self->{pi_cfg}->{'publicinbox.pop3state'}/txn.locks"; open my $fh, '+>>', $f or Carp::croak("open($f): $!"); $self->{txn_fh} = $fh; create_state_tables($self, $dbh); $dbh; } sub _setlk ($%) { my ($self, %lk) = @_; $lk{l_pid} = 0; # needed for *BSD $lk{l_whence} = SEEK_SET; if (@FLOCK) { fcntl($self->{txn_fh}, F_SETLK, pack($FLOCK[0], @lk{@FLOCK[1..$#FLOCK]})); } else { my $fs = File::FcntlLock->new(%lk); $fs->lock($self->{txn_fh}, F_SETLK); } } sub lock_mailbox { my ($self, $pop3) = @_; # pop3 - PublicInbox::POP3 client object my $lk = $self->lock_for_scope; # lock the SQLite DB, only my $dbh = $self->{-state_dbh} //= state_dbh_new($self); my ($user_id, $ngid, $mbid, $txn_id); my $uuid = delete $pop3->{uuid}; $dbh->begin_work; # 1. make sure the user exists, update `last_seen' my $sth = $dbh->prepare_cached(<<''); INSERT OR IGNORE INTO users (uuid, last_seen) VALUES (?,?) $sth->bind_param(1, $uuid, SQL_BLOB); $sth->bind_param(2, time); if ($sth->execute == 0) { # existing user $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT user_id FROM users WHERE uuid = ? $sth->bind_param(1, $uuid, SQL_BLOB); $sth->execute; $user_id = $sth->fetchrow_array // die 'BUG: user '.unpack('H*', $uuid).' not found'; $sth = $dbh->prepare_cached(<<''); UPDATE users SET last_seen = ? WHERE user_id = ? $sth->execute(time, $user_id); } else { # new user $user_id = $dbh->last_insert_id(undef, undef, 'users', 'user_id') } # 2. make sure the newsgroup has an integer ID $sth = $dbh->prepare_cached(<<''); INSERT OR IGNORE INTO newsgroups (newsgroup) VALUES (?) my $ng = $pop3->{ibx}->{newsgroup}; $sth->bind_param(1, $ng, SQL_BLOB); if ($sth->execute == 0) { $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT newsgroup_id FROM newsgroups WHERE newsgroup = ? $sth->bind_param(1, $ng, SQL_BLOB); $sth->execute; $ngid = $sth->fetchrow_array // die "BUG: `$ng' not found"; } else { $ngid = $dbh->last_insert_id(undef, undef, 'newsgroups', 'newsgroup_id'); } # 3. ensure the mailbox exists $sth = $dbh->prepare_cached(<<''); INSERT OR IGNORE INTO mailboxes (newsgroup_id, slice) VALUES (?,?) if ($sth->execute($ngid, $pop3->{slice}) == 0) { $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT mailbox_id FROM mailboxes WHERE newsgroup_id = ? AND slice = ? $sth->execute($ngid, $pop3->{slice}); $mbid = $sth->fetchrow_array // die "BUG: mailbox_id for $ng.$pop3->{slice} not found"; } else { $mbid = $dbh->last_insert_id(undef, undef, 'mailboxes', 'mailbox_id'); } # 4. ensure the (max) deletes row exists for locking $sth = $dbh->prepare_cached(<<''); INSERT OR IGNORE INTO deletes (user_id,mailbox_id) VALUES (?,?) if ($sth->execute($user_id, $mbid) == 0) { $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT txn_id,uid_dele FROM deletes WHERE user_id = ? AND mailbox_id = ? $sth->execute($user_id, $mbid); ($txn_id, $pop3->{uid_dele}) = $sth->fetchrow_array; } else { $txn_id = $dbh->last_insert_id(undef, undef, 'deletes', 'txn_id'); } $dbh->commit; # see if it's locked by the same worker: return if $self->{txn_locks}->{$txn_id}; # see if it's locked by another worker: _setlk($self, l_type => F_WRLCK, l_start => $txn_id - 1, l_len => 1) or return; $pop3->{user_id} = $user_id; $pop3->{txn_id} = $txn_id; $self->{txn_locks}->{$txn_id} = 1; } sub unlock_mailbox { my ($self, $pop3) = @_; my $txn_id = delete($pop3->{txn_id}) // return; if (!$pop3->{did_quit}) { # deal with QUIT-less disconnects my $lk = $self->lock_for_scope; $self->{-state_dbh}->begin_work; $pop3->__cleanup_state($txn_id); $self->{-state_dbh}->commit; } delete $self->{txn_locks}->{$txn_id}; # same worker # other workers _setlk($self, l_type => F_UNLCK, l_start => $txn_id - 1, l_len => 1) or die "F_UNLCK: $!"; } 1; public-inbox-1.9.0/lib/PublicInbox/PktOp.pm000066400000000000000000000035431430031475700205040ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # op dispatch socket, reads a message, runs a sub # There may be multiple producers, but (for now) only one consumer # Used for lei_xsearch and maybe other things # "command" => [ $sub, @fixed_operands ] package PublicInbox::PktOp; use strict; use v5.10.1; use parent qw(PublicInbox::DS); use Errno qw(EAGAIN ECONNRESET); use PublicInbox::Syscall qw(EPOLLIN); use Socket qw(AF_UNIX MSG_EOR SOCK_SEQPACKET); use PublicInbox::IPC qw(ipc_freeze ipc_thaw); use Scalar::Util qw(blessed); sub new { my ($cls, $r) = @_; my $self = bless { sock => $r }, $cls; $r->blocking(0); $self->SUPER::new($r, EPOLLIN); } # returns a blessed objects as the consumer and producer sub pair { my ($cls) = @_; my ($c, $p); socketpair($c, $p, AF_UNIX, SOCK_SEQPACKET, 0) or die "socketpair: $!"; (new($cls, $c), bless { op_p => $p }, $cls); } sub pkt_do { # for the producer to trigger event_step in consumer my ($self, $cmd, @args) = @_; send($self->{op_p}, @args ? "$cmd\0".ipc_freeze(\@args) : $cmd, MSG_EOR) } sub event_step { my ($self) = @_; my $c = $self->{sock}; my $n = recv($c, my $msg, 4096, 0); unless (defined $n) { return if $! == EAGAIN; die "recv: $!" if $! != ECONNRESET; # we may be bidirectional } my ($cmd, @pargs); if (index($msg, "\0") > 0) { ($cmd, my $pargs) = split(/\0/, $msg, 2); @pargs = @{ipc_thaw($pargs)}; } else { # for compatibility with the script/lei in client mode, # it doesn't load Sereal||Storable for startup speed ($cmd, @pargs) = split(/ /, $msg); } my $op = $self->{ops}->{$cmd //= $msg}; if ($op) { my ($obj, @args) = (@$op, @pargs); blessed($obj) ? $obj->$cmd(@args) : $obj->(@args); } elsif ($msg ne '') { die "BUG: unknown message: `$cmd'"; } $self->close if $msg eq ''; # close on EOF } 1; public-inbox-1.9.0/lib/PublicInbox/ProcessPipe.pm000066400000000000000000000035201430031475700216760ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # a tied handle for auto reaping of children tied to a pipe, see perltie(1) package PublicInbox::ProcessPipe; use strict; use v5.10.1; use Carp qw(carp); sub TIEHANDLE { my ($class, $pid, $fh, $cb, $arg) = @_; bless { pid => $pid, fh => $fh, ppid => $$, cb => $cb, arg => $arg }, $class; } sub BINMODE { binmode(shift->{fh}) } # for IO::Uncompress::Gunzip sub READ { read($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) } sub READLINE { readline($_[0]->{fh}) } sub WRITE { use bytes qw(length); syswrite($_[0]->{fh}, $_[1], $_[2] // length($_[1]), $_[3] // 0); } sub PRINT { my $self = shift; print { $self->{fh} } @_; } sub FILENO { fileno($_[0]->{fh}) } sub _close ($;$) { my ($self, $wait) = @_; my $fh = delete $self->{fh}; my $ret = defined($fh) ? close($fh) : ''; my ($pid, $cb, $arg) = delete @$self{qw(pid cb arg)}; return $ret unless defined($pid) && $self->{ppid} == $$; if ($wait) { # caller cares about the exit status: my $wp = waitpid($pid, 0); if ($wp == $pid) { $ret = '' if $?; if ($cb) { eval { $cb->($arg, $pid) }; carp "E: cb(arg, $pid): $@" if $@; } } else { carp "waitpid($pid, 0) = $wp, \$!=$!, \$?=$?"; } } else { # caller just undef-ed it, let event loop deal with it require PublicInbox::DS; PublicInbox::DS::dwaitpid($pid, $cb, $arg); } $ret; } # if caller uses close(), assume they want to check $? immediately so # we'll waitpid() synchronously. n.b. wantarray doesn't seem to # propagate `undef' down to tied methods, otherwise I'd rely on that. sub CLOSE { _close($_[0], 1) } # if relying on DESTROY, assume the caller doesn't care about $? and # we can let the event loop call waitpid() whenever it gets SIGCHLD sub DESTROY { _close($_[0]); undef; } 1; public-inbox-1.9.0/lib/PublicInbox/Qspawn.pm000066400000000000000000000251331430031475700207170ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # Like most Perl modules in public-inbox, this is internal and # NOT subject to any stability guarantees! It is only documented # for other hackers. # # This is used to limit the number of processes spawned by the # PSGI server, so it acts like a semaphore and queues up extra # commands to be run if currently at the limit. Multiple "limiters" # may be configured which give inboxes different channels to # operate in. This can be useful to ensure smaller inboxes can # be cloned while cloning of large inboxes is maxed out. # # This does not depend on the PublicInbox::DS::event_loop or any # other external scheduling mechanism, you just need to call # start() and finish() appropriately. However, public-inbox-httpd # (which uses PublicInbox::DS) will be able to schedule this # based on readability of stdout from the spawned process. # See GitHTTPBackend.pm and SolverGit.pm for usage examples. # It does not depend on any form of threading. # # This is useful for scheduling CGI execution of both long-lived # git-http-backend(1) process (for "git clone") as well as short-lived # processes such as git-apply(1). package PublicInbox::Qspawn; use strict; use PublicInbox::Spawn qw(popen_rd); use PublicInbox::GzipFilter; # n.b.: we get EAGAIN with public-inbox-httpd, and EINTR on other PSGI servers use Errno qw(EAGAIN EINTR); my $def_limiter; # declares a command to spawn (but does not spawn it). # $cmd is the command to spawn # $cmd_env is the environ for the child process (not PSGI env) # $opt can include redirects and perhaps other process spawning options sub new ($$$;) { my ($class, $cmd, $cmd_env, $opt) = @_; bless { args => [ $cmd, $cmd_env, $opt ] }, $class; } sub _do_spawn { my ($self, $start_cb, $limiter) = @_; my $err; my ($cmd, $cmd_env, $opt) = @{delete $self->{args}}; my %o = %{$opt || {}}; $self->{limiter} = $limiter; foreach my $k (@PublicInbox::Spawn::RLIMITS) { if (defined(my $rlimit = $limiter->{$k})) { $o{$k} = $rlimit; } } $self->{cmd} = $o{quiet} ? undef : $cmd; eval { # popen_rd may die on EMFILE, ENFILE $self->{rpipe} = popen_rd($cmd, $cmd_env, \%o); die "E: $!" unless defined($self->{rpipe}); $limiter->{running}++; $start_cb->($self); # EPOLL_CTL_ADD may ENOSPC/ENOMEM }; finish($self, $@) if $@; } sub child_err ($) { my ($child_error) = @_; # typically $? my $exitstatus = ($child_error >> 8) or return; my $sig = $child_error & 127; my $msg = "exit status=$exitstatus"; $msg .= " signal=$sig" if $sig; $msg; } sub finalize ($$) { my ($self, $err) = @_; my ($env, $qx_cb, $qx_arg, $qx_buf) = delete @$self{qw(psgi_env qx_cb qx_arg qx_buf)}; # done, spawn whatever's in the queue my $limiter = $self->{limiter}; my $running = --$limiter->{running}; if ($running < $limiter->{max}) { if (my $next = shift(@{$limiter->{run_queue}})) { _do_spawn(@$next, $limiter); } } if ($err) { if (defined $self->{err}) { $self->{err} .= "; $err"; } else { $self->{err} = $err; } if ($env && $self->{cmd}) { warn join(' ', @{$self->{cmd}}) . ": $err"; } } if ($qx_cb) { eval { $qx_cb->($qx_buf, $qx_arg) }; } elsif (my $wcb = delete $env->{'qspawn.wcb'}) { # have we started writing, yet? require PublicInbox::WwwStatic; $wcb->(PublicInbox::WwwStatic::r(500)); } } # callback for dwaitpid or ProcessPipe sub waitpid_err { finalize($_[0], child_err($?)) } sub finish ($;$) { my ($self, $err) = @_; my $tied_pp = delete($self->{rpipe}) or return finalize($self, $err); my PublicInbox::ProcessPipe $pp = tied *$tied_pp; @$pp{qw(cb arg)} = (\&waitpid_err, $self); # for ->DESTROY } sub start ($$$) { my ($self, $limiter, $start_cb) = @_; if ($limiter->{running} < $limiter->{max}) { _do_spawn($self, $start_cb, $limiter); } else { push @{$limiter->{run_queue}}, [ $self, $start_cb ]; } } sub psgi_qx_init_cb { my ($self) = @_; my $async = delete $self->{async}; my ($r, $buf); my $qx_fh = $self->{qx_fh}; reread: $r = sysread($self->{rpipe}, $buf, 65536); if ($async) { $async->async_pass($self->{psgi_env}->{'psgix.io'}, $qx_fh, \$buf); } elsif (defined $r) { $r ? (print $qx_fh $buf) : event_step($self, undef); } else { return if $! == EAGAIN; # try again when notified goto reread if $! == EINTR; event_step($self, $!); } } sub psgi_qx_start { my ($self) = @_; if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) { # PublicInbox::HTTPD::Async->new(rpipe, $cb, cb_arg, $end_obj) $self->{async} = $async->($self->{rpipe}, \&psgi_qx_init_cb, $self, $self); # init_cb will call ->async_pass or ->close } else { # generic PSGI psgi_qx_init_cb($self) while $self->{qx_fh}; } } # Similar to `backtick` or "qx" ("perldoc -f qx"), it calls $qx_cb with # the stdout of the given command when done; but respects the given limiter # $env is the PSGI env. As with ``/qx; only use this when output is small # and safe to slurp. sub psgi_qx { my ($self, $env, $limiter, $qx_cb, $qx_arg) = @_; $self->{psgi_env} = $env; my $qx_buf = ''; open(my $qx_fh, '+>', \$qx_buf) or die; # PerlIO::scalar $self->{qx_cb} = $qx_cb; $self->{qx_arg} = $qx_arg; $self->{qx_fh} = $qx_fh; $self->{qx_buf} = \$qx_buf; $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32); start($self, $limiter, \&psgi_qx_start); } # this is called on pipe EOF to reap the process, may be called # via PublicInbox::DS event loop OR via GetlineBody for generic # PSGI servers. sub event_step { my ($self, $err) = @_; # $err: $! warn "psgi_{return,qx} $err" if defined($err); finish($self); my ($fh, $qx_fh) = delete(@$self{qw(fh qx_fh)}); $fh->close if $fh; # async-only (psgi_return) } sub rd_hdr ($) { my ($self) = @_; # typically used for reading CGI headers # We also need to check EINTR for generic PSGI servers. my $ret; my $total_rd = 0; my $hdr_buf = $self->{hdr_buf}; my ($ph_cb, $ph_arg) = @{$self->{parse_hdr}}; do { my $r = sysread($self->{rpipe}, $$hdr_buf, 4096, length($$hdr_buf)); if (defined($r)) { $total_rd += $r; eval { $ret = $ph_cb->($total_rd, $hdr_buf, $ph_arg) }; if ($@) { warn "parse_hdr: $@"; $ret = [ 500, [], [ "Internal error\n" ] ]; } } else { # caller should notify us when it's ready: return if $! == EAGAIN; next if $! == EINTR; # immediate retry warn "error reading header: $!"; $ret = [ 500, [], [ "Internal error\n" ] ]; } } until (defined $ret); delete $self->{parse_hdr}; # done parsing headers $ret; } sub psgi_return_init_cb { my ($self) = @_; my $r = rd_hdr($self) or return; my $env = $self->{psgi_env}; my $filter = delete $env->{'qspawn.filter'} // PublicInbox::GzipFilter::qsp_maybe($r->[1], $env); my $wcb = delete $env->{'qspawn.wcb'}; my $async = delete $self->{async}; if (scalar(@$r) == 3) { # error if ($async) { # calls rpipe->close && ->event_step $async->close; } else { $self->{rpipe}->close; event_step($self); } $wcb->($r); } elsif ($async) { # done reading headers, handoff to read body my $fh = $wcb->($r); # scalar @$r == 2 $fh = $filter->attach($fh) if $filter; $self->{fh} = $fh; $async->async_pass($env->{'psgix.io'}, $fh, delete($self->{hdr_buf})); } else { # for synchronous PSGI servers require PublicInbox::GetlineBody; $r->[2] = PublicInbox::GetlineBody->new($self->{rpipe}, \&event_step, $self, ${$self->{hdr_buf}}, $filter); $wcb->($r); } } sub psgi_return_start { # may run later, much later... my ($self) = @_; if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) { # PublicInbox::HTTPD::Async->new(rpipe, $cb, $cb_arg, $end_obj) $self->{async} = $async->($self->{rpipe}, \&psgi_return_init_cb, $self, $self); } else { # generic PSGI psgi_return_init_cb($self) while $self->{parse_hdr}; } } # Used for streaming the stdout of one process as a PSGI response. # # $env is the PSGI env. # optional keys in $env: # $env->{'qspawn.wcb'} - the write callback from the PSGI server # optional, use this if you've already # captured it elsewhere. If not given, # psgi_return will return an anonymous # sub for the PSGI server to call # # $env->{'qspawn.filter'} - filter object, responds to ->attach for # pi-httpd.async and ->translate for generic # PSGI servers # # $limiter - the Limiter object to use (uses the def_limiter if not given) # # $parse_hdr - Initial read function; often for parsing CGI header output. # It will be given the return value of sysread from the pipe # and a string ref of the current buffer. Returns an arrayref # for PSGI responses. 2-element arrays in PSGI mean the # body will be streamed, later, via writes (push-based) to # psgix.io. 3-element arrays means the body is available # immediately (or streamed via ->getline (pull-based)). sub psgi_return { my ($self, $env, $limiter, $parse_hdr, $hdr_arg) = @_; $self->{psgi_env} = $env; $self->{hdr_buf} = \(my $hdr_buf = ''); $self->{parse_hdr} = [ $parse_hdr, $hdr_arg ]; $limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32); # the caller already captured the PSGI write callback from # the PSGI server, so we can call ->start, here: $env->{'qspawn.wcb'} and return start($self, $limiter, \&psgi_return_start); # the caller will return this sub to the PSGI server, so # it can set the response callback (that is, for # PublicInbox::HTTP, the chunked_wcb or identity_wcb callback), # but other HTTP servers are supported: sub { $env->{'qspawn.wcb'} = $_[0]; start($self, $limiter, \&psgi_return_start); } } package PublicInbox::Qspawn::Limiter; use strict; use warnings; sub new { my ($class, $max) = @_; bless { # 32 is same as the git-daemon connection limit max => $max || 32, running => 0, run_queue => [], # RLIMIT_CPU => undef, # RLIMIT_DATA => undef, # RLIMIT_CORE => undef, }, $class; } sub setup_rlimit { my ($self, $name, $cfg) = @_; foreach my $rlim (@PublicInbox::Spawn::RLIMITS) { my $k = lc($rlim); $k =~ tr/_//d; $k = "publicinboxlimiter.$name.$k"; defined(my $v = $cfg->{$k}) or next; my @rlimit = split(/\s*,\s*/, $v); if (scalar(@rlimit) == 1) { push @rlimit, $rlimit[0]; } elsif (scalar(@rlimit) != 2) { warn "could not parse $k: $v\n"; } eval { require BSD::Resource }; if ($@) { warn "BSD::Resource missing for $rlim"; next; } foreach my $i (0..$#rlimit) { next if $rlimit[$i] ne 'INFINITY'; $rlimit[$i] = BSD::Resource::RLIM_INFINITY(); } $self->{$rlim} = \@rlimit; } } 1; public-inbox-1.9.0/lib/PublicInbox/Reply.pm000066400000000000000000000062321430031475700205400ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # For reply instructions and address generation in WWW UI package PublicInbox::Reply; use strict; use v5.10.1; use URI::Escape qw/uri_escape_utf8/; use PublicInbox::Hval qw(ascii_html obfuscate_addrs mid_href); use PublicInbox::Address; use PublicInbox::MID qw(mid_clean); use PublicInbox::Config; *squote_maybe = \&PublicInbox::Config::squote_maybe; sub add_addrs { my ($to, $cc, @addrs) = @_; foreach my $address (@addrs) { my $dst = lc($address); $cc->{$dst} ||= $address; $$to ||= $dst; } } my @reply_headers = qw(From To Cc Reply-To); my $reply_headers = join('|', @reply_headers); sub mailto_arg_link { my ($ibx, $hdr) = @_; my $cc = {}; # everyone else my $to; # this is the From address by default my $reply_to_all = 'reply-to-all'; # the only good default :P my $reply_to_cfg = $ibx->{replyto}; $reply_to_cfg ||= ':all'; if ($reply_to_cfg =~ /\A:none=(.*)/) { my $msg = $1; $msg = 'replies disabled' if $msg eq ''; return \$msg; } foreach my $rt (split(/\s*,\s*/, $reply_to_cfg)) { if ($rt eq ':all') { foreach my $h (@reply_headers) { my $v = $hdr->header($h); defined($v) && ($v ne '') or next; my @addrs = PublicInbox::Address::emails($v); add_addrs(\$to, $cc, @addrs); } } elsif ($rt eq ':list') { $reply_to_all = 'reply-to-list'; add_addrs(\$to, $cc, $ibx->{-primary_address}); } elsif ($rt =~ /\A(?:$reply_headers)\z/io) { # ugh, this is weird... my $v = $hdr->header($rt); if (defined($v) && ($v ne '')) { my @addrs = PublicInbox::Address::emails($v); add_addrs(\$to, $cc, @addrs); } } elsif ($rt =~ /@/) { add_addrs(\$to, $cc, $rt); } else { warn "Unrecognized replyto = '$rt' in config\n"; } } my @arg; my $obfs = $ibx->{obfuscate}; my $subj = $hdr->header('Subject') || ''; $subj = "Re: $subj" unless $subj =~ /\bRe:/i; my $mid = $hdr->header_raw('Message-ID'); push @arg, '--in-reply-to='.squote_maybe(mid_clean($mid)); my $irt = mid_href($mid); add_addrs(\$to, $cc, $ibx->{-primary_address}) unless defined($to); delete $cc->{$to}; if ($obfs) { my $arg_to = $to; obfuscate_addrs($ibx, $arg_to, '$(echo .)'); push @arg, "--to=$arg_to"; # no $subj for $href below } else { push @arg, "--to=$to"; $subj = uri_escape_utf8($subj); } my @cc = sort values %$cc; $cc = ''; if (@cc) { if ($obfs) { push(@arg, map { my $addr = $_; obfuscate_addrs($ibx, $addr, '$(echo .)'); "--cc=$addr"; } @cc); } else { $cc = '&Cc=' . uri_escape_utf8(join(',', @cc)); push(@arg, map { "--cc=$_" } @cc); } } # I'm not sure if address obfuscation and mailto: links can # be made compatible; and address obfuscation is misguided, # anyways. return (\@arg, '', $reply_to_all) if $obfs; # keep `@' instead of using `%40' for RFC 6068 utf8::encode($to); $to =~ s!([^A-Za-z0-9\-\._~\@])!$URI::Escape::escapes{$1}!ge; # order matters, Subject is the least important header, # so it is last in case it's lost/truncated in a copy+paste my $href = "mailto:$to?In-Reply-To=$irt${cc}&Subject=$subj"; (\@arg, ascii_html($href), $reply_to_all); } 1; public-inbox-1.9.0/lib/PublicInbox/SaPlugin/000077500000000000000000000000001430031475700206265ustar00rootroot00000000000000public-inbox-1.9.0/lib/PublicInbox/SaPlugin/ListMirror.pm000066400000000000000000000057201430031475700232760ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # SpamAssassin rules useful for running a mailing list mirror. We want to: # * ensure Received: headers are really from the list mail server # users expect. This is to prevent malicious users from # injecting spam into mirrors without going through the expected # server # * flag messages where the mailing list is Bcc:-ed since it is # common for spam to have wrong or non-existent To:/Cc: headers. package PublicInbox::SaPlugin::ListMirror; use strict; use warnings; use base qw(Mail::SpamAssassin::Plugin); # constructor: register the eval rules sub new { my ($class, $mail) = @_; # some boilerplate... $class = ref($class) || $class; my $self = $class->SUPER::new($mail); bless $self, $class; $mail->{conf}->{list_mirror_check} = []; $self->register_eval_rule('check_list_mirror_received'); $self->register_eval_rule('check_list_mirror_bcc'); $self->set_config($mail->{conf}); $self; } sub check_list_mirror_received { my ($self, $pms) = @_; my $recvd = $pms->get('Received') || ''; $recvd =~ s/\n.*\z//s; foreach my $cfg (@{$pms->{conf}->{list_mirror_check}}) { my ($hdr, $hval, $host_re, $addr_re) = @$cfg; my $v = $pms->get($hdr) or next; local $/ = "\n"; chomp $v; next if $v ne $hval; return 1 if $recvd !~ $host_re; } 0; } sub check_list_mirror_bcc { my ($self, $pms) = @_; my $tocc = $pms->get('ToCc'); foreach my $cfg (@{$pms->{conf}->{list_mirror_check}}) { my ($hdr, $hval, $host_re, $addr_re) = @$cfg; defined $addr_re or next; my $v = $pms->get($hdr) or next; local $/ = "\n"; chomp $v; next if $v ne $hval; return 1 if !$tocc || $tocc !~ $addr_re; } 0; } # list_mirror HEADER HEADER_VALUE HOSTNAME_GLOB [LIST_ADDRESS] # list_mirror X-Mailing-List git@vger.kernel.org *.kernel.org # list_mirror List-Id *.example.org foo@example.org sub config_list_mirror { my ($self, $key, $value, $line) = @_; defined $value or return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; my ($hdr, $hval, $host_glob, @extra) = split(/\s+/, $value); my $addr = shift @extra; if (defined $addr) { $addr !~ /\@/ and return $Mail::SpamAssassin::Conf::INVALID_VALUE; $addr = join('|', map { quotemeta } split(/,/, $addr)); $addr = qr/\b$addr\b/i; } @extra and return $Mail::SpamAssassin::Conf::INVALID_VALUE; defined $host_glob or return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; my %patmap = ('*' => '\S+', '?' => '.', '[' => '[', ']' => ']'); $host_glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge; my $host_re = qr/\A\s*from\s+$host_glob(?:\s|$)/si; push @{$self->{list_mirror_check}}, [ $hdr, $hval, $host_re, $addr ]; } sub set_config { my ($self, $conf) = @_; my @cmds; push @cmds, { setting => 'list_mirror', default => '', type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, code => *config_list_mirror, }; $conf->{parser}->register_commands(\@cmds); } 1; public-inbox-1.9.0/lib/PublicInbox/SaPlugin/ListMirror.pod000066400000000000000000000066401430031475700234460ustar00rootroot00000000000000=head1 NAME PublicInbox::SaPlugin::ListMirror - SpamAssassin plugin for mailing list mirrors =head1 SYNOPSIS loadplugin PublicInbox::SaPlugin::ListMirror Declare some mailing lists based on the expected List-Id value, expected servers, and mailing list address: list_mirror List-Id *.example.com foo@example.com list_mirror List-Id *.example.com bar@example.com Bump the score for messages which come from unexpected servers: header LIST_MIRROR_RECEIVED eval:check_list_mirror_received() describe LIST_MIRROR_RECEIVED Received does not match expected score LIST_MIRROR_RECEIVED 10 Bump the score for messages which Bcc the list: header LIST_MIRROR_BCC eval:check_list_mirror_bcc() describe LIST_MIRROR_BCC Mailing list was Bcc-ed score LIST_MIRROR_BCC 5 =head1 DESCRIPTION This plugin contains common functions to provide accurate, ongoing mirrors of existing mailing lists. It may be used independently of the rest of public-inbox, it does not depend on any public-inbox code, only SpamAssassin. =head1 ADMINISTRATOR SETTINGS This plugin has no administrator settings, aside from the need to load it via C and enabling user rules C =head1 USER SETTINGS =over 4 =item list_mirror HEADER HEADER_VALUE HOSTNAME_GLOB [LIST_ADDRESS] Declare a list based on an expected C
matching C exactly coming from C. C is optional, but may specify the address of the mailing list being mirrored. C or C are common values of C
An example of C is Cfoo.example.orgE> if C
is C. C may be a wildcard match for machines where mail can come from or an exact match. C is only required if using the L eval rule C may be specified multiple times. =back =head1 EVAL FUNCTIONS =over 4 =item header LIST_MIRROR_RECEIVED eval:check_list_mirror_received() The C function implements C header checking based on L configuration values. This rule can be used to score and prevent messages from being injected directly into your mirror without going through the expected mailing list servers: ifplugin PublicInbox::SaPlugin::ListMirror header LIST_MIRROR_RECEIVED eval:check_list_mirror_received() describe LIST_MIRROR_RECEIVED Received does not match expected endif =item header LIST_MIRROR_BCC eval:check_list_mirror_bcc() The C function checks for Bcc to mailing lists declared with a C via L Spammers will often Bcc mailing lists; while it's uncommon and strange for valid messages to be Bcc-ed to any public mailing list. This rule allows users to assign a score to Bcc-ed messages ifplugin PublicInbox::SaPlugin::ListMirror header LIST_MIRROR_BCC eval:check_list_mirror_bcc() describe LIST_MIRROR_BCC Mailing list was Bcc-ed endif =back =head1 CONTACT Feedback welcome via plain-text mail to L The mail archives are hosted at L and L =head1 COPYRIGHT Copyright (C) 2016-2021 all contributors L License: AGPL-3.0+ L =head1 SEE ALSO L public-inbox-1.9.0/lib/PublicInbox/Search.pm000066400000000000000000000434161430031475700206570ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # based on notmuch, but with no concept of folders, files or flags # # Read-only search interface for use by the web and NNTP interfaces package PublicInbox::Search; use strict; use v5.10.1; use parent qw(Exporter); our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms); use List::Util qw(max); use POSIX qw(strftime); use Carp (); # values for searching, changing the numeric value breaks # compatibility with old indices (so don't change them it) use constant { TS => 0, # Received: in Unix time (IMAP INTERNALDATE, JMAP receivedAt) YYYYMMDD => 1, # redundant with DT below DT => 2, # Date: YYYYMMDDHHMMSS (IMAP SENT*, JMAP sentAt) # added for public-inbox 1.6.0+ BYTES => 3, # IMAP RFC822.SIZE UID => 4, # IMAP UID == NNTP article number == Xapian docid THREADID => 5, # RFC 8474, RFC 8621 # TODO # REPLYCNT => ?, # IMAP ANSWERED # SCHEMA_VERSION history # 0 - initial # 1 - subject_path is lower-cased # 2 - subject_path is id_compress in the index, only # 3 - message-ID is compressed if it includes '%' (hack!) # 4 - change "Re: " normalization, avoid circular Reference ghosts # 5 - subject_path drops trailing '.' # 6 - preserve References: order in document data # 7 - remove references and inreplyto terms # 8 - remove redundant/unneeded document data # 9 - disable Message-ID compression (SHA-1) # 10 - optimize doc for NNTP overviews # 11 - merge threads when vivifying ghosts # 12 - change YYYYMMDD value column to numeric # 13 - fix threading for empty References/In-Reply-To # (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0) # 14 - fix ghost root vivification # 15 - see public-inbox-v2-format(5) # further bumps likely unnecessary, we'll suggest in-place # "--reindex" use for further fixes and tweaks: # # public-inbox v1.5.0 adds (still SCHEMA_VERSION=15): # * "lid:" and "l:" for List-Id searches # # v1.6.0 adds BYTES, UID and THREADID values SCHEMA_VERSION => 15, }; use PublicInbox::Smsg; use PublicInbox::Over; our $QP_FLAGS; our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query); our $Xap; # 'Search::Xapian' or 'Xapian' our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor') # ENQ_DESCENDING and ENQ_ASCENDING weren't in SWIG Xapian.pm prior to 1.4.16, # let's hope the ABI is stable our $ENQ_DESCENDING = 0; our $ENQ_ASCENDING = 1; sub load_xapian () { return 1 if defined $Xap; # n.b. PI_XAPIAN is intended for development use only. We still # favor Search::Xapian since that's what's available in current # Debian stable (10.x) and derived distros. for my $x (($ENV{PI_XAPIAN} // 'Search::Xapian'), 'Xapian') { eval "require $x"; next if $@; $x->import(qw(:standard)); $Xap = $x; # `version_string' was added in Xapian 1.1 my $xver = eval('v'.eval($x.'::version_string()')) // eval('v'.eval($x.'::xapian_version_string()')); # NumberRangeProcessor was added in Xapian 1.3.6, # NumberValueRangeProcessor was removed for 1.5.0+, # favor the older /Value/ variant since that's what our # (currently) preferred Search::Xapian supports $NVRP = $x.'::'.($x eq 'Xapian' && $xver ge v1.5 ? 'NumberRangeProcessor' : 'NumberValueRangeProcessor'); $X{$_} = $Xap.'::'.$_ for (keys %X); *sortable_serialise = $x.'::sortable_serialise'; *sortable_unserialise = $x.'::sortable_unserialise'; # n.b. FLAG_PURE_NOT is expensive not suitable for a public # website as it could become a denial-of-service vector # FLAG_PHRASE also seems to cause performance problems chert # (and probably earlier Xapian DBs). glass seems fine... # TODO: make this an option, maybe? # or make indexlevel=medium as default $QP_FLAGS = FLAG_PHRASE() | FLAG_BOOLEAN() | FLAG_LOVEHATE() | FLAG_WILDCARD(); return 1; } undef; } # This is English-only, everything else is non-standard and may be confused as # a prefix common in patch emails our $LANG = 'english'; # note: the non-X term prefix allocations are shared with # Xapian omega, see xapian-applications/omega/docs/termprefixes.rst my %bool_pfx_external = ( mid => 'Q', # Message-ID (full/exact), this is mostly uniQue lid => 'G', # newsGroup (or similar entity), just inside <> dfpre => 'XDFPRE', dfpost => 'XDFPOST', dfblob => 'XDFPRE XDFPOST', patchid => 'XDFID', ); my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST XDFID'; my %prob_prefix = ( # for mairix compatibility s => 'S', m => 'XM', # 'mid:' (bool) is exact, 'm:' (prob) can do partial l => 'XL', # 'lid:' (bool) is exact, 'l:' (prob) can do partial f => 'A', t => 'XTO', tc => 'XTO XCC', c => 'XCC', tcf => 'XTO XCC A', a => 'XTO XCC A', b => $non_quoted_body . ' XQUOT', bs => $non_quoted_body . ' XQUOT S', n => 'XFN', q => 'XQUOT', nq => $non_quoted_body, dfn => 'XDFN', dfa => 'XDFA', dfb => 'XDFB', dfhh => 'XDFHH', dfctx => 'XDFCTX', # default: '' => 'XM S A XQUOT XFN ' . $non_quoted_body, ); # not documenting m: and mid: for now, the using the URLs works w/o Xapian # not documenting lid: for now, either, it is probably redundant with l:, # especially since we don't offer boolean searches for To/Cc/From # headers, either our @HELP = ( 's:' => 'match within Subject e.g. s:"a quick brown fox"', 'd:' => < 'match within message body, including text attachments', 'nq:' => 'match non-quoted text within message body', 'q:' => 'match quoted text within message body', 'n:' => 'match filename of attachment(s)', 't:' => 'match within the To header', 'c:' => 'match within the Cc header', 'f:' => 'match within the From header', 'a:' => 'match within the To, Cc, and From headers', 'tc:' => 'match within the To and Cc headers', 'l:' => 'match contents of the List-Id header', 'bs:' => 'match within the Subject and body', 'dfn:' => 'match filename from diff', 'dfa:' => 'match diff removed (-) lines', 'dfb:' => 'match diff added (+) lines', 'dfhh:' => 'match diff hunk header context (usually a function name)', 'dfctx:' => 'match diff context lines', 'dfpre:' => 'match pre-image git blob ID', 'dfpost:' => 'match post-image git blob ID', 'dfblob:' => 'match either pre or post-image git blob ID', 'patchid:' => "match `git patch-id --stable' output", 'rt:' => <{shard})) { $self->{xpfx}; } else { # v2 + extindex only: "$self->{xpfx}/$self->{shard}"; } } # returns all shards as separate Xapian::Database objects w/o combining sub xdb_shards_flat ($) { my ($self) = @_; my $xpfx = $self->{xpfx}; my (@xdb, $slow_phrase); load_xapian(); $self->{qp_flags} //= $QP_FLAGS; if ($xpfx =~ m!/xapian[0-9]+\z!) { @xdb = ($X{Database}->new($xpfx)); $self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert"; } else { opendir(my $dh, $xpfx) or return (); # not initialized yet # We need numeric sorting so shard[0] is first for reading # Xapian metadata, if needed my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return (); for (0..$last) { my $shard_dir = "$self->{xpfx}/$_"; push @xdb, $X{Database}->new($shard_dir); $slow_phrase ||= -f "$shard_dir/iamchert"; } $self->{qp_flags} |= FLAG_PHRASE() if !$slow_phrase; } @xdb; } # v2 Xapian docids don't conflict, so they're identical to # NNTP article numbers and IMAP UIDs. # https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID sub mdocid { my ($nshard, $mitem) = @_; my $docid = $mitem->get_docid; int(($docid - 1) / $nshard) + 1; } sub mset_to_artnums { my ($self, $mset) = @_; my $nshard = $self->{nshard}; [ map { mdocid($nshard, $_) } $mset->items ]; } sub xdb ($) { my ($self) = @_; $self->{xdb} // do { my @xdb = $self->xdb_shards_flat or return; $self->{nshard} = scalar(@xdb); my $xdb = shift @xdb; $xdb->add_database($_) for @xdb; $self->{xdb} = $xdb; }; } sub new { my ($class, $ibx) = @_; ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian'; my $xpfx = "$ibx->{inboxdir}/$xap".SCHEMA_VERSION; my $self = bless { xpfx => $xpfx }, $class; $self->{altid} = $ibx->{altid} if defined($ibx->{altid}); $self; } sub reopen { my ($self) = @_; if (my $xdb = $self->{xdb}) { $xdb->reopen; } $self; # make chaining easier } # Convert git "approxidate" ranges to something usable with our # Xapian indices. At the moment, Xapian only offers a C++-only API # and neither the SWIG nor XS bindings allow us to use custom code # to parse dates (and libgit2 doesn't expose git__date_parse, either, # so we're running git-rev-parse(1)). # This replaces things we need to send to $git->date_parse with # "\0".$strftime_format.['+'|$idx]."\0" placeholders sub date_parse_prepare { my ($to_parse, $pfx, $range) = @_; # are we inside a parenthesized statement? my $end = $range =~ s/([\)\s]*)\z// ? $1 : ''; my @r = split(/\.\./, $range, 2); # expand "d:20101002" => "d:20101002..20101003" and like # n.b. git doesn't do YYYYMMDD w/o '-', it needs YYYY-MM-DD # We upgrade "d:" to "dt:" to iff using approxidate if ($pfx eq 'd') { my $fmt = "\0%Y%m%d"; if (!defined($r[1])) { if ($r[0] =~ /\A([0-9]{4})([0-9]{2})([0-9]{2})\z/) { push @$to_parse, "$1-$2-$3"; # we could've handled as-is, but we need # to parse anyways for "d+" below } else { push @$to_parse, $r[0]; if ($r[0] !~ /\A[0-9]{4}-[0-9]{2}-[0-9]{2}\z/) { $pfx = 'dt'; $fmt = "\0%Y%m%d%H%M%S"; } } $r[0] = "$fmt+$#$to_parse\0"; $r[1] = "$fmt+\0"; } else { for my $x (@r) { next if $x eq '' || $x =~ /\A[0-9]{8}\z/; push @$to_parse, $x; if ($x !~ /\A[0-9]{4}-[0-9]{2}-[0-9]{2}\z/) { $pfx = 'dt'; } $x = "$fmt$#$to_parse\0"; } if ($pfx eq 'dt') { for (@r) { s/\0%Y%m%d/\0%Y%m%d%H%M%S/; s/\A([0-9]{8})\z/${1}000000/; } } } } elsif ($pfx eq 'dt') { if (!defined($r[1])) { # git needs gaps and not /\d{14}/ if ($r[0] =~ /\A([0-9]{4})([0-9]{2})([0-9]{2}) ([0-9]{2})([0-9]{2})([0-9]{2})\z/x) { push @$to_parse, "$1-$2-$3 $4:$5:$6"; } else { push @$to_parse, $r[0]; } $r[0] = "\0%Y%m%d%H%M%S$#$to_parse\0"; $r[1] = "\0%Y%m%d%H%M%S+\0"; } else { for my $x (@r) { next if $x eq '' || $x =~ /\A[0-9]{14}\z/; push @$to_parse, $x; $x = "\0%Y%m%d%H%M%S$#$to_parse\0"; } } } else { # "rt", let git interpret "YYYY", deal with Y10K later :P for my $x (@r) { next if $x eq '' || $x =~ /\A[0-9]{5,}\z/; push @$to_parse, $x; $x = "\0%s$#$to_parse\0"; } $r[1] //= "\0%s+\0"; # add 1 day } "$pfx:".join('..', @r).$end; } sub date_parse_finalize { my ($git, $to_parse) = @_; # git-rev-parse can handle any number of args up to system # limits (around (4096*32) bytes on Linux). my @r = $git->date_parse(@$to_parse); # n.b. git respects TZ, times stored in SQLite/Xapian are always UTC, # and gmtime doesn't seem to do the right thing when TZ!=UTC my ($i, $t); $_[2] =~ s/\0(%[%YmdHMSs]+)([0-9\+]+)\0/ $t = $2 eq '+' ? ($r[$i]+86400) : $r[$i=$2+0]; $1 eq '%s' ? $t : strftime($1, gmtime($t))/sge; } # n.b. argv never has NUL, though we'll need to filter it out # if this $argv isn't from a command execution sub query_argv_to_string { my (undef, $git, $argv) = @_; my $to_parse; my $tmp = join(' ', map {; if (s!\b(d|rt|dt):(\S+)\z!date_parse_prepare( $to_parse //= [], $1, $2)!sge) { $_; } elsif (/\s/) { s/(.*?)\b(\w+:)// ? qq{$1$2"$_"} : qq{"$_"}; } else { $_ } } @$argv); date_parse_finalize($git, $to_parse, $tmp) if $to_parse; $tmp } # this is for the WWW "q=" query parameter and "lei q --stdin" # it can't do d:"5 days ago", but it will do d:5.days.ago sub query_approxidate { my (undef, $git) = @_; # $_[2] = $query_string (modified in-place) my $DQ = qq<"\x{201c}\x{201d}>; # Xapian can use curly quotes $_[2] =~ tr/\x00/ /; # Xapian doesn't do NUL, we use it as a placeholder my ($terms, $phrase, $to_parse); $_[2] =~ s{([^$DQ]*)([$DQ][^$DQ]*[$DQ])?}{ ($terms, $phrase) = ($1, $2); $terms =~ s!\b(d|rt|dt):(\S+)! date_parse_prepare($to_parse //= [], $1, $2)!sge; $terms.($phrase // ''); }sge; date_parse_finalize($git, $to_parse, $_[2]) if $to_parse; } # read-only sub mset { my ($self, $query_string, $opts) = @_; $opts ||= {}; my $qp = $self->{qp} //= $self->qparse_new; my $query = $qp->parse_query($query_string, $self->{qp_flags}); _do_enquire($self, $query, $opts); } sub retry_reopen { my ($self, $cb, @arg) = @_; for my $i (1..10) { if (wantarray) { my @ret = eval { $cb->($self, @arg) }; return @ret unless $@; } else { my $ret = eval { $cb->($self, @arg) }; return $ret unless $@; } # Exception: The revision being read has been discarded - # you should call Xapian::Database::reopen() if (ref($@) =~ /\bDatabaseModifiedError\b/) { reopen($self); } else { # let caller decide how to spew, because ExtMsg queries # get wonky and trigger: # "something terrible happened at .../Xapian/Enquire.pm" Carp::croak($@); } } Carp::croak("Too many Xapian database modifications in progress\n"); } sub _do_enquire { my ($self, $query, $opts) = @_; retry_reopen($self, \&_enquire_once, $query, $opts); } # returns true if all docs have the THREADID value sub has_threadid ($) { my ($self) = @_; (xdb($self)->get_metadata('has_threadid') // '') eq '1'; } sub _enquire_once { # retry_reopen callback my ($self, $query, $opts) = @_; my $xdb = xdb($self); if (defined(my $eidx_key = $opts->{eidx_key})) { $query = $X{Query}->new(OP_FILTER(), $query, 'O'.$eidx_key); } if (defined(my $uid_range = $opts->{uid_range})) { my $range = $X{Query}->new(OP_VALUE_RANGE(), UID, sortable_serialise($uid_range->[0]), sortable_serialise($uid_range->[1])); $query = $X{Query}->new(OP_FILTER(), $query, $range); } my $enquire = $X{Enquire}->new($xdb); $enquire->set_query($query); $opts ||= {}; my $rel = $opts->{relevance} // 0; if ($rel == -2) { # ORDER BY docid/UID (highest first) $enquire->set_weighting_scheme($X{BoolWeight}->new); $enquire->set_docid_order($ENQ_DESCENDING); } elsif ($rel == -1) { # ORDER BY docid/UID (lowest first) $enquire->set_weighting_scheme($X{BoolWeight}->new); $enquire->set_docid_order($ENQ_ASCENDING); } elsif ($rel == 0) { $enquire->set_sort_by_value_then_relevance(TS, !$opts->{asc}); } else { # rel > 0 $enquire->set_sort_by_relevance_then_value(TS, !$opts->{asc}); } # `mairix -t / --threads' or JMAP collapseThreads if ($opts->{threads} && has_threadid($self)) { $enquire->set_collapse_key(THREADID); } $enquire->get_mset($opts->{offset} || 0, $opts->{limit} || 50); } sub mset_to_smsg { my ($self, $ibx, $mset) = @_; my $nshard = $self->{nshard}; my $i = 0; my %order = map { mdocid($nshard, $_) => ++$i } $mset->items; my @msgs = sort { $order{$a->{num}} <=> $order{$b->{num}} } @{$ibx->over->get_all(keys %order)}; wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs; } # read-write sub stemmer { $X{Stem}->new($LANG) } # read-only sub qparse_new { my ($self) = @_; my $xdb = xdb($self); my $qp = $X{QueryParser}->new; $qp->set_default_op(OP_AND()); $qp->set_database($xdb); $qp->set_stemmer(stemmer($self)); $qp->set_stemming_strategy(STEM_SOME()); my $cb = $qp->can('set_max_wildcard_expansion') // $qp->can('set_max_expansion'); # Xapian 1.5.0+ $cb->($qp, 100); $cb = $qp->can('add_valuerangeprocessor') // $qp->can('add_rangeprocessor'); # Xapian 1.5.0+ $cb->($qp, $NVRP->new(YYYYMMDD, 'd:')); $cb->($qp, $NVRP->new(DT, 'dt:')); # for IMAP, undocumented for WWW and may be split off go away $cb->($qp, $NVRP->new(BYTES, 'z:')); $cb->($qp, $NVRP->new(TS, 'rt:')); $cb->($qp, $NVRP->new(UID, 'uid:')); while (my ($name, $prefix) = each %bool_pfx_external) { $qp->add_boolean_prefix($name, $_) foreach split(/ /, $prefix); } # we do not actually create AltId objects, # just parse the spec to avoid the extra DB handles for now. if (my $altid = $self->{altid}) { my $user_pfx = $self->{-user_pfx} = []; for (@$altid) { # $_ = 'serial:gmane:/path/to/gmane.msgmap.sqlite3' # note: Xapian supports multibyte UTF-8, /^[0-9]+$/, # and '_' with prefixes matching \w+ /\Aserial:(\w+):/ or next; my $pfx = $1; push @$user_pfx, "$pfx:", < XGMANE $qp->add_boolean_prefix($pfx, 'X'.uc($pfx)); } chomp @$user_pfx; } while (my ($name, $prefix) = each %prob_prefix) { $qp->add_prefix($name, $_) foreach split(/ /, $prefix); } $qp; } sub help { my ($self) = @_; $self->{qp} //= $self->qparse_new; # parse altids my @ret = @HELP; if (my $user_pfx = $self->{-user_pfx}) { push @ret, @$user_pfx; } \@ret; } # always returns a scalar value sub int_val ($$) { my ($doc, $col) = @_; my $val = $doc->get_value($col) or return undef; # undef is '' in Xapian sortable_unserialise($val) + 0; # PV => IV conversion } sub get_pct ($) { # mset item # Capped at "99%" since "100%" takes an extra column in the # thread skeleton view. says the value isn't # very meaningful, anyways. my $n = $_[0]->get_percent; $n > 99 ? 99 : $n; } sub xap_terms ($$;@) { my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty () my %ret; my $end = $xdb_or_doc->termlist_end(@docid); my $cur = $xdb_or_doc->termlist_begin(@docid); for (; $cur != $end; $cur++) { $cur->skip_to($pfx); last if $cur == $end; my $tn = $cur->get_termname; $ret{substr($tn, length($pfx))} = undef if !index($tn, $pfx); } wantarray ? sort(keys(%ret)) : \%ret; } # get combined docid from over.num: # (not generic Xapian, only works with our sharding scheme) sub num2docid ($$) { my ($self, $num) = @_; my $nshard = $self->{nshard}; ($num - 1) * $nshard + $num % $nshard + 1; } 1; public-inbox-1.9.0/lib/PublicInbox/SearchIdx.pm000066400000000000000000001006451430031475700213220ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # based on notmuch, but with no concept of folders, files # # Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use # with the web and NNTP interfaces. This index maintains thread # relationships for use by PublicInbox::SearchThread. # This writes to the search index. package PublicInbox::SearchIdx; use strict; use v5.10.1; use parent qw(PublicInbox::Search PublicInbox::Lock Exporter); use PublicInbox::Eml; use PublicInbox::Search qw(xap_terms); use PublicInbox::InboxWritable; use PublicInbox::MID qw(mids_for_index mids); use PublicInbox::MsgIter; use PublicInbox::IdxStack; use Carp qw(croak carp); use POSIX qw(strftime); use Fcntl qw(SEEK_SET); use Time::Local qw(timegm); use PublicInbox::OverIdx; use PublicInbox::Spawn qw(spawn); use PublicInbox::Git qw(git_unquote); use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); use PublicInbox::Address; use Config; our @EXPORT_OK = qw(log2stack is_ancestor check_size prepare_stack index_text term_generator add_val is_bad_blob); my $X = \%PublicInbox::Search::X; our ($DB_CREATE_OR_OPEN, $DB_OPEN); our $DB_NO_SYNC = 0; our $DB_DANGEROUS = 0; our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff : # assume a typical 64-bit system has 8x more RAM than a # typical 32-bit system: (($Config{ptrsize} >= 8 ? 8192 : 1024) * 1024); use constant DEBUG => !!$ENV{DEBUG}; my $BASE85 = qr/\A[a-zA-Z0-9\!\#\$\%\&\(\)\*\+\-;<=>\?\@\^_`\{\|\}\~]+\z/; my $xapianlevels = qr/\A(?:full|medium)\z/; my $hex = '[a-f0-9]'; my $OID = $hex .'{40,}'; my @VMD_MAP = (kw => 'K', L => 'L'); our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/; sub new { my ($class, $ibx, $creat, $shard) = @_; ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; my $inboxdir = $ibx->{inboxdir}; my $version = $ibx->version; my $indexlevel = 'full'; my $altid = $ibx->{altid}; if ($altid) { require PublicInbox::AltId; $altid = [ map { PublicInbox::AltId->new($ibx, $_); } @$altid ]; } if ($ibx->{indexlevel}) { if ($ibx->{indexlevel} =~ $INDEXLEVELS) { $indexlevel = $ibx->{indexlevel}; } else { die("Invalid indexlevel $ibx->{indexlevel}\n"); } } $ibx = PublicInbox::InboxWritable->new($ibx); my $self = PublicInbox::Search->new($ibx); bless $self, $class; $self->{ibx} = $ibx; $self->{-altid} = $altid; $self->{indexlevel} = $indexlevel; $self->{-set_indexlevel_once} = 1 if $indexlevel eq 'medium'; if ($ibx->{-skip_docdata}) { $self->{-set_skip_docdata_once} = 1; $self->{-skip_docdata} = 1; } if ($version == 1) { $self->{lock_path} = "$inboxdir/ssoma.lock"; my $dir = $self->xdir; $self->{oidx} = PublicInbox::OverIdx->new("$dir/over.sqlite3"); $self->{oidx}->{-no_fsync} = 1 if $ibx->{-no_fsync}; } elsif ($version == 2) { defined $shard or die "shard is required for v2\n"; # shard is a number $self->{shard} = $shard; $self->{lock_path} = undef; } else { die "unsupported inbox version=$version\n"; } $self->{creat} = ($creat || 0) == 1; $self; } sub need_xapian ($) { $_[0]->{indexlevel} =~ $xapianlevels } sub idx_release { my ($self, $wake) = @_; if (need_xapian($self)) { my $xdb = delete $self->{xdb} or croak '{xdb} not acquired'; $xdb->close; } $self->lock_release($wake) if $self->{creat}; undef; } sub load_xapian_writable () { return 1 if $X->{WritableDatabase}; PublicInbox::Search::load_xapian() or die "failed to load Xapian: $@\n"; my $xap = $PublicInbox::Search::Xap; for (qw(Document TermGenerator WritableDatabase)) { $X->{$_} = $xap.'::'.$_; } eval 'require '.$X->{WritableDatabase} or die; *sortable_serialise = $xap.'::sortable_serialise'; $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()'); $DB_OPEN = eval($xap.'::DB_OPEN()'); my $ver = (eval($xap.'::major_version()') << 16) | (eval($xap.'::minor_version()') << 8) | eval($xap.'::revision()'); if ($ver >= 0x10400) { $DB_NO_SYNC = 0x4; $DB_DANGEROUS = 0x10; } # Xapian v1.2.21..v1.2.24 were missing close-on-exec on OFD locks $X->{CLOEXEC_UNSET} = 1 if $ver >= 0x010215 && $ver <= 0x010218; 1; } sub idx_acquire { my ($self) = @_; my $flag; my $dir = $self->xdir; if (need_xapian($self)) { croak 'already acquired' if $self->{xdb}; load_xapian_writable(); $flag = $self->{creat} ? $DB_CREATE_OR_OPEN : $DB_OPEN; } if ($self->{creat}) { require File::Path; $self->lock_acquire; # don't create empty Xapian directories if we don't need Xapian my $is_shard = defined($self->{shard}); if (!-d $dir && (!$is_shard || ($is_shard && need_xapian($self)))) { File::Path::mkpath($dir); require PublicInbox::Syscall; PublicInbox::Syscall::nodatacow_dir($dir); $self->{-set_has_threadid_once} = 1; if (($self->{ibx} // $self->{eidx})->{-dangerous}) { $flag |= $DB_DANGEROUS; } } } return unless defined $flag; $flag |= $DB_NO_SYNC if ($self->{ibx} // $self->{eidx})->{-no_fsync}; my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) }; croak "Failed opening $dir: $@" if $@; $self->{xdb} = $xdb; } sub add_val ($$$) { my ($doc, $col, $num) = @_; $num = sortable_serialise($num); $doc->add_value($col, $num); } sub term_generator ($) { # write-only my ($self) = @_; $self->{term_generator} //= do { my $tg = $X->{TermGenerator}->new; $tg->set_stemmer(PublicInbox::Search::stemmer($self)); $tg; } } sub index_phrase ($$$$) { my ($self, $text, $wdf_inc, $prefix) = @_; my $tg = term_generator($self); $tg->index_text($text, $wdf_inc, $prefix); $tg->increase_termpos; } sub index_text ($$$$) { my ($self, $text, $wdf_inc, $prefix) = @_; if ($self->{indexlevel} eq 'full') { index_phrase($self, $text, $wdf_inc, $prefix); } else { my $tg = term_generator($self); $tg->index_text_without_positions($text, $wdf_inc, $prefix); } } sub index_headers ($$) { my ($self, $smsg) = @_; my @x = (from => 'A', to => 'XTO', cc => 'XCC'); # A: Author while (my ($field, $pfx) = splice(@x, 0, 2)) { my $val = $smsg->{$field}; next if $val eq ''; # include "(comments)" after the address, too, so not using # PublicInbox::Address::names or pairs index_text($self, $val, 1, $pfx); # we need positional info for email addresses since they # can be considered phrases if ($self->{indexlevel} eq 'medium') { for my $addr (PublicInbox::Address::emails($val)) { index_phrase($self, $addr, 1, $pfx); } } } @x = (subject => 'S'); while (my ($field, $pfx) = splice(@x, 0, 2)) { my $val = $smsg->{$field}; index_text($self, $val, 1, $pfx) if $val ne ''; } } sub index_diff_inc ($$$$) { my ($self, $text, $pfx, $xnq) = @_; if (@$xnq) { index_text($self, join("\n", @$xnq), 1, 'XNQ'); @$xnq = (); } if ($pfx eq 'XDFN') { index_phrase($self, $text, 1, $pfx); } else { index_text($self, $text, 1, $pfx); } } sub index_old_diff_fn { my ($self, $seen, $fa, $fb, $xnq) = @_; # no renames or space support for traditional diffs, # find the number of leading common paths to strip: my @fa = split(m'/', $fa); my @fb = split(m'/', $fb); while (scalar(@fa) && scalar(@fb)) { $fa = join('/', @fa); $fb = join('/', @fb); if ($fa eq $fb) { unless ($seen->{$fa}++) { index_diff_inc($self, $fa, 'XDFN', $xnq); } return 1; } shift @fa; shift @fb; } 0; } sub index_diff ($$$) { my ($self, $txt, $doc) = @_; my %seen; my $in_diff; my $xnq = []; my @l = split(/\n/, $$txt); undef $$txt; while (defined($_ = shift @l)) { if ($in_diff && /^GIT binary patch/) { push @$xnq, $_; while (@l && $l[0] =~ /^(?:literal|delta) /) { # TODO allow searching by size range? # allows searching by exact size via: # "literal $SIZE" or "delta $SIZE" push @$xnq, shift(@l); # skip base85 and empty lines while (@l && ($l[0] =~ /$BASE85/o || $l[0] !~ /\S/)) { shift @l; } # loop hits trailing "literal 0\nHcmV?d00001\n" } } elsif ($in_diff && s/^ //) { # diff context index_diff_inc($self, $_, 'XDFCTX', $xnq); } elsif (/^-- $/) { # email signature begins $in_diff = undef; } elsif (m!^diff --git ("?[^/]+/.+) ("?[^/]+/.+)\z!) { # capture filenames here for binary diffs: my ($fa, $fb) = ($1, $2); push @$xnq, $_; $in_diff = 1; $fa = (split(m'/', git_unquote($fa), 2))[1]; $fb = (split(m'/', git_unquote($fb), 2))[1]; $seen{$fa}++ or index_diff_inc($self, $fa, 'XDFN', $xnq); $seen{$fb}++ or index_diff_inc($self, $fb, 'XDFN', $xnq); # traditional diff: } elsif (m/^diff -(.+) (\S+) (\S+)$/) { my ($opt, $fa, $fb) = ($1, $2, $3); push @$xnq, $_; # only support unified: next unless $opt =~ /[uU]/; $in_diff = index_old_diff_fn($self, \%seen, $fa, $fb, $xnq); } elsif (m!^--- ("?[^/]+/.+)!) { my $fn = $1; $fn = (split(m'/', git_unquote($fn), 2))[1]; $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq); $in_diff = 1; } elsif (m!^\+\+\+ ("?[^/]+/.+)!) { my $fn = $1; $fn = (split(m'/', git_unquote($fn), 2))[1]; $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq); $in_diff = 1; } elsif (/^--- (\S+)/) { $in_diff = $1; # old diff filename push @$xnq, $_; } elsif (defined $in_diff && /^\+\+\+ (\S+)/) { $in_diff = index_old_diff_fn($self, \%seen, $in_diff, $1, $xnq); } elsif ($in_diff && s/^\+//) { # diff added index_diff_inc($self, $_, 'XDFB', $xnq); } elsif ($in_diff && s/^-//) { # diff removed index_diff_inc($self, $_, 'XDFA', $xnq); } elsif (m!^index ([a-f0-9]+)\.\.([a-f0-9]+)!) { my ($ba, $bb) = ($1, $2); index_git_blob_id($doc, 'XDFPRE', $ba); index_git_blob_id($doc, 'XDFPOST', $bb); $in_diff = 1; } elsif (/^@@ (?:\S+) (?:\S+) @@\s*$/) { # traditional diff w/o -p } elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)$/) { # hunk header context index_diff_inc($self, $1, 'XDFHH', $xnq); # ignore the following lines: } elsif (/^(?:dis)similarity index/ || /^(?:old|new) mode/ || /^(?:deleted|new) file mode/ || /^(?:copy|rename) (?:from|to) / || /^(?:dis)?similarity index / || /^\\ No newline at end of file/ || /^Binary files .* differ/) { push @$xnq, $_; } elsif ($_ eq '') { # possible to be in diff context, some mail may be # stripped by MUA or even GNU diff(1). "git apply" # treats a bare "\n" as diff context, too } else { push @$xnq, $_; warn "non-diff line: $_\n" if DEBUG && $_ ne ''; $in_diff = undef; } } index_text($self, join("\n", @$xnq), 1, 'XNQ'); } sub index_xapian { # msg_iter callback my $part = $_[0]->[0]; # ignore $depth and $idx my ($self, $doc) = @{$_[1]}; my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; if (defined $fn && $fn ne '') { index_phrase($self, $fn, 1, 'XFN'); } if ($part->{is_submsg}) { my $mids = mids_for_index($part); index_ids($self, $doc, $part, $mids); my $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->populate($part); index_headers($self, $smsg); } my ($s, undef) = msg_part_text($part, $ct); defined $s or return; $_[0]->[0] = $part = undef; # free memory if ($s =~ /^(?:diff|---|\+\+\+) /ms) { open(my $fh, '+>:utf8', undef) or die "open: $!"; open(my $eh, '+>', undef) or die "open: $!"; $fh->autoflush(1); print $fh $s or die "print: $!"; sysseek($fh, 0, SEEK_SET) or die "sysseek: $!"; my $id = ($self->{ibx} // $self->{eidx})->git->qx( [qw(patch-id --stable)], {}, { 0 => $fh, 2 => $eh }); $id =~ /\A([a-f0-9]{40,})/ and $doc->add_term('XDFID'.$1); seek($eh, 0, SEEK_SET) or die "seek: $!"; while (<$eh>) { warn $_ } } # split off quoted and unquoted blocks: my @sections = PublicInbox::MsgIter::split_quotes($s); undef $s; # free memory for my $txt (@sections) { if ($txt =~ /\A>/) { index_text($self, $txt, 0, 'XQUOT'); } else { # does it look like a diff? if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { index_diff($self, \$txt, $doc); } else { index_text($self, $txt, 1, 'XNQ'); } } undef $txt; # free memory } } sub index_list_id ($$$) { my ($self, $doc, $hdr) = @_; for my $l ($hdr->header_raw('List-Id')) { $l =~ /<([^>]+)>/ or next; my $lid = lc $1; $doc->add_boolean_term('G' . $lid); index_phrase($self, $lid, 1, 'XL'); # probabilistic } } sub index_ids ($$$$) { my ($self, $doc, $hdr, $mids) = @_; for my $mid (@$mids) { index_phrase($self, $mid, 1, 'XM'); # because too many Message-IDs are prefixed with # "Pine.LNX."... if ($mid =~ /\w{12,}/) { my @long = ($mid =~ /(\w{3,}+)/g); index_phrase($self, join(' ', @long), 1, 'XM'); } } $doc->add_boolean_term('Q' . $_) for @$mids; index_list_id($self, $doc, $hdr); } sub eml2doc ($$$;$) { my ($self, $eml, $smsg, $mids) = @_; $mids //= mids_for_index($eml); my $doc = $X->{Document}->new; add_val($doc, PublicInbox::Search::TS(), $smsg->{ts}); my @ds = gmtime($smsg->{ds}); my $yyyymmdd = strftime('%Y%m%d', @ds); add_val($doc, PublicInbox::Search::YYYYMMDD(), $yyyymmdd); my $dt = strftime('%Y%m%d%H%M%S', @ds); add_val($doc, PublicInbox::Search::DT(), $dt); add_val($doc, PublicInbox::Search::BYTES(), $smsg->{bytes}); add_val($doc, PublicInbox::Search::UID(), $smsg->{num}); add_val($doc, PublicInbox::Search::THREADID, $smsg->{tid}); my $tg = term_generator($self); $tg->set_document($doc); index_headers($self, $smsg); if (defined(my $eidx_key = $smsg->{eidx_key})) { $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.'; } msg_iter($eml, \&index_xapian, [ $self, $doc ]); index_ids($self, $doc, $eml, $mids); # by default, we maintain compatibility with v1.5.0 and earlier # by writing to docdata.glass, users who never expect to downgrade can # use --skip-docdata if (!$self->{-skip_docdata}) { # WWW doesn't need {to} or {cc}, only NNTP $smsg->{to} = $smsg->{cc} = ''; $smsg->parse_references($eml, $mids); my $data = $smsg->to_doc_data; $doc->set_data($data); } if (my $altid = $self->{-altid}) { foreach my $alt (@$altid) { my $pfx = $alt->{xprefix}; foreach my $mid (@$mids) { my $id = $alt->mid2alt($mid); next unless defined $id; $doc->add_boolean_term($pfx . $id); } } } $doc; } sub add_xapian ($$$$) { my ($self, $eml, $smsg, $mids) = @_; begin_txn_lazy($self); my $merge_vmd = delete $smsg->{-merge_vmd}; my $doc = eml2doc($self, $eml, $smsg, $mids); if (my $old = $merge_vmd ? _get_doc($self, $smsg->{num}) : undef) { my @x = @VMD_MAP; while (my ($field, $pfx) = splice(@x, 0, 2)) { for my $term (xap_terms($pfx, $old)) { $doc->add_boolean_term($pfx.$term); } } } $self->{xdb}->replace_document($smsg->{num}, $doc); } sub _msgmap_init ($) { my ($self) = @_; die "BUG: _msgmap_init is only for v1\n" if $self->{ibx}->version != 1; $self->{mm} //= do { require PublicInbox::Msgmap; PublicInbox::Msgmap->new_file($self->{ibx}, 1); }; } sub add_message { # mime = PublicInbox::Eml or Email::MIME object my ($self, $mime, $smsg, $sync) = @_; begin_txn_lazy($self); my $mids = mids_for_index($mime); $smsg //= bless { blob => '' }, 'PublicInbox::Smsg'; # test-only compat $smsg->{mid} //= $mids->[0]; # v1 compatibility $smsg->{num} //= do { # v1 _msgmap_init($self); index_mm($self, $mime, $smsg->{blob}, $sync); }; # v1 and tests only: $smsg->populate($mime, $sync); $smsg->{bytes} //= length($mime->as_string); eval { # order matters, overview stores every possible piece of # data in doc_data (deflated). Xapian only stores a subset # of the fields which exist in over.sqlite3. We may stop # storing doc_data in Xapian sometime after we get multi-inbox # search working. if (my $oidx = $self->{oidx}) { # v1 only $oidx->add_overview($mime, $smsg); } if (need_xapian($self)) { add_xapian($self, $mime, $smsg, $mids); } }; if ($@) { warn "failed to index message <".join('> <',@$mids).">: $@\n"; return undef; } $smsg->{num}; } sub _get_doc ($$) { my ($self, $docid) = @_; my $doc = eval { $self->{xdb}->get_document($docid) }; $doc // do { warn "E: $@\n" if $@; warn "E: #$docid missing in Xapian\n"; undef; } } sub add_eidx_info { my ($self, $docid, $eidx_key, $eml) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; term_generator($self)->set_document($doc); # '.' is special for lei_store $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.'; index_list_id($self, $doc, $eml); $self->{xdb}->replace_document($docid, $doc); } sub get_terms { my ($self, $pfx, $docid) = @_; begin_txn_lazy($self); xap_terms($pfx, $self->{xdb}, $docid); } sub remove_eidx_info { my ($self, $docid, $eidx_key, $eml) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; eval { $doc->remove_term('O'.$eidx_key) }; warn "W: ->remove_term O$eidx_key: $@\n" if $@; for my $l ($eml ? $eml->header_raw('List-Id') : ()) { $l =~ /<([^>]+)>/ or next; my $lid = lc $1; eval { $doc->remove_term('G' . $lid) }; warn "W: ->remove_term G$lid: $@\n" if $@; # nb: we don't remove the XL probabilistic terms # since terms may overlap if cross-posted. # # IOW, a message which has both # and would have overlapping # "XLexample" and "XLcom" as terms and which we # wouldn't know if they're safe to remove if we just # unindex while preserving # . # # In any case, this entire sub is will likely never # be needed and users using the "l:" prefix are probably # rarer. } $self->{xdb}->replace_document($docid, $doc); } sub set_vmd { my ($self, $docid, $vmd) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; my ($end, @rm, @add); my @x = @VMD_MAP; while (my ($field, $pfx) = splice(@x, 0, 2)) { my $set = $vmd->{$field} // next; my %keep = map { $_ => 1 } @$set; my %add = %keep; $end //= $doc->termlist_end; for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) { $cur->skip_to($pfx); last if $cur == $end; my $v = $cur->get_termname; $v =~ s/\A$pfx//s or next; $keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v); } push(@add, map { $pfx.$_ } keys %add); } return unless scalar(@rm) || scalar(@add); $doc->remove_term($_) for @rm; $doc->add_boolean_term($_) for @add; $self->{xdb}->replace_document($docid, $doc); } sub apply_vmd_mod ($$) { my ($doc, $vmd_mod) = @_; my $updated = 0; my @x = @VMD_MAP; while (my ($field, $pfx) = splice(@x, 0, 2)) { # field: "L" or "kw" for my $val (@{$vmd_mod->{"-$field"} // []}) { eval { $doc->remove_term($pfx . $val); ++$updated; }; } for my $val (@{$vmd_mod->{"+$field"} // []}) { $doc->add_boolean_term($pfx . $val); ++$updated; } } $updated; } sub add_vmd { my ($self, $docid, $vmd) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; my @x = @VMD_MAP; my $updated = 0; while (my ($field, $pfx) = splice(@x, 0, 2)) { my $add = $vmd->{$field} // next; $doc->add_boolean_term($pfx . $_) for @$add; $updated += scalar(@$add); } $updated += apply_vmd_mod($doc, $vmd); $self->{xdb}->replace_document($docid, $doc) if $updated; } sub remove_vmd { my ($self, $docid, $vmd) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; my $replace; my @x = @VMD_MAP; while (my ($field, $pfx) = splice(@x, 0, 2)) { my $rm = $vmd->{$field} // next; for (@$rm) { eval { $doc->remove_term($pfx . $_); $replace = 1; }; } } $self->{xdb}->replace_document($docid, $doc) if $replace; } sub update_vmd { my ($self, $docid, $vmd_mod) = @_; begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; my $updated = apply_vmd_mod($doc, $vmd_mod); $self->{xdb}->replace_document($docid, $doc) if $updated; $updated; } sub xdb_remove { my ($self, @docids) = @_; begin_txn_lazy($self); my $xdb = $self->{xdb} // die 'BUG: missing {xdb}'; for my $docid (@docids) { eval { $xdb->delete_document($docid) }; warn "E: #$docid not in in Xapian? $@\n" if $@; } } sub xdb_remove_quiet { my ($self, $docid) = @_; begin_txn_lazy($self); my $xdb = $self->{xdb} // die 'BUG: missing {xdb}'; eval { $xdb->delete_document($docid) }; ++$self->{-quiet_rm} unless $@; } sub nr_quiet_rm { delete($_[0]->{-quiet_rm}) // 0 } sub index_git_blob_id { my ($doc, $pfx, $objid) = @_; my $len = length($objid); for (my $len = length($objid); $len >= 7; ) { $doc->add_term($pfx.$objid); $objid = substr($objid, 0, --$len); } } # v1 only sub unindex_eml { my ($self, $oid, $eml) = @_; my $mids = mids($eml); my $nr = 0; my %tmp; for my $mid (@$mids) { my @removed = $self->{oidx}->remove_oid($oid, $mid); $nr += scalar @removed; $tmp{$_}++ for @removed; } if (!$nr) { my $m = join('> <', @$mids); warn "W: <$m> missing for removal from overview\n"; } while (my ($num, $nr) = each %tmp) { warn "BUG: $num appears >1 times ($nr) for $oid\n" if $nr != 1; } if ($nr) { $self->{mm}->num_delete($_) for (keys %tmp); } else { # just in case msgmap and over.sqlite3 become desynched: $self->{mm}->mid_delete($mids->[0]); } xdb_remove($self, keys %tmp) if need_xapian($self); } sub index_mm { my ($self, $mime, $oid, $sync) = @_; my $mids = mids($mime); my $mm = $self->{mm}; if ($sync->{reindex}) { my $oidx = $self->{oidx}; for my $mid (@$mids) { my ($num, undef) = $oidx->num_mid0_for_oid($oid, $mid); return $num if defined $num; } $mm->num_for($mids->[0]) // $mm->mid_insert($mids->[0]); } else { # fallback to num_for since filters like RubyLang set the number $mm->mid_insert($mids->[0]) // $mm->num_for($mids->[0]); } } sub is_bad_blob ($$$$) { my ($oid, $type, $size, $expect_oid) = @_; if ($type ne 'blob') { carp "W: $expect_oid is not a blob (type=$type)"; return 1; } croak "BUG: $oid != $expect_oid" if $oid ne $expect_oid; $size == 0 ? 1 : 0; # size == 0 means purged } sub index_both { # git->cat_async callback my ($bref, $oid, $type, $size, $sync) = @_; return if is_bad_blob($oid, $type, $size, $sync->{oid}); my ($nr, $max) = @$sync{qw(nr max)}; ++$$nr; $$max -= $size; my $smsg = bless { blob => $oid }, 'PublicInbox::Smsg'; $smsg->set_bytes($$bref, $size); my $self = $sync->{sidx}; local $self->{current_info} = "$self->{current_info}: $oid"; my $eml = PublicInbox::Eml->new($bref); $smsg->{num} = index_mm($self, $eml, $oid, $sync) or die "E: could not generate NNTP article number for $oid"; add_message($self, $eml, $smsg, $sync); ++$self->{nidx}; my $cur_cmt = $sync->{cur_cmt} // die 'BUG: {cur_cmt} missing'; ${$sync->{latest_cmt}} = $cur_cmt; } sub unindex_both { # git->cat_async callback my ($bref, $oid, $type, $size, $sync) = @_; return if is_bad_blob($oid, $type, $size, $sync->{oid}); my $self = $sync->{sidx}; local $self->{current_info} = "$self->{current_info}: $oid"; unindex_eml($self, $oid, PublicInbox::Eml->new($bref)); # may be undef if leftover if (defined(my $cur_cmt = $sync->{cur_cmt})) { ${$sync->{latest_cmt}} = $cur_cmt; } ++$self->{nidx}; } sub with_umask { my $self = shift; ($self->{ibx} // $self->{eidx})->with_umask(@_); } # called by public-inbox-index sub index_sync { my ($self, $opt) = @_; delete $self->{lock_path} if $opt->{-skip_lock}; $self->with_umask(\&_index_sync, $self, $opt); if ($opt->{reindex} && !$opt->{quit} && !grep(defined, @$opt{qw(since until)})) { my %again = %$opt; delete @again{qw(rethread reindex)}; index_sync($self, \%again); $opt->{quit} = $again{quit}; # propagate to caller } } sub check_size { # check_async cb for -index --max-size=... my ($oid, $type, $size, $arg, $git) = @_; (($type // '') eq 'blob') or die "E: bad $oid in $git->{git_dir}"; if ($size <= $arg->{max_size}) { $git->cat_async($oid, $arg->{index_oid}, $arg); } else { warn "W: skipping $oid ($size > $arg->{max_size})\n"; } } sub v1_checkpoint ($$;$) { my ($self, $sync, $stk) = @_; $self->{ibx}->git->async_wait_all; # $newest may be undef my $newest = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}}; if (defined($newest)) { my $cur = $self->{mm}->last_commit; if (need_update($self, $sync, $cur, $newest)) { $self->{mm}->last_commit($newest); } } ${$sync->{max}} = $self->{batch_bytes}; $self->{mm}->{dbh}->commit; eval { $self->{mm}->{dbh}->do('PRAGMA optimize') }; my $xdb = $self->{xdb}; if ($newest && $xdb) { my $cur = $xdb->get_metadata('last_commit'); if (need_update($self, $sync, $cur, $newest)) { $xdb->set_metadata('last_commit', $newest); } } if ($stk) { # all done if $stk is passed # let SearchView know a full --reindex was done so it can # generate ->has_threadid-dependent links if ($xdb && $sync->{reindex} && !ref($sync->{reindex})) { my $n = $xdb->get_metadata('has_threadid'); $xdb->set_metadata('has_threadid', '1') if $n ne '1'; } $self->{oidx}->rethread_done($sync->{-opt}); # all done } commit_txn_lazy($self); $sync->{ibx}->git->cleanup; my $nr = ${$sync->{nr}}; idx_release($self, $nr); # let another process do some work... if (my $pr = $sync->{-opt}->{-progress}) { $pr->("indexed $nr/$sync->{ntodo}\n") if $nr; } if (!$stk && !$sync->{quit}) { # more to come begin_txn_lazy($self); $self->{mm}->{dbh}->begin_work; } } # only for v1 sub process_stack { my ($self, $sync, $stk) = @_; my $git = $sync->{ibx}->git; my $max = $self->{batch_bytes}; my $nr = 0; $sync->{nr} = \$nr; $sync->{max} = \$max; $sync->{sidx} = $self; $sync->{latest_cmt} = \(my $latest_cmt); $self->{mm}->{dbh}->begin_work; if (my @leftovers = keys %{delete($sync->{D}) // {}}) { warn('W: unindexing '.scalar(@leftovers)." leftovers\n"); for my $oid (@leftovers) { last if $sync->{quit}; $oid = unpack('H*', $oid); $git->cat_async($oid, \&unindex_both, $sync); } } if ($sync->{max_size} = $sync->{-opt}->{max_size}) { $sync->{index_oid} = \&index_both; } while (my ($f, $at, $ct, $oid, $cur_cmt) = $stk->pop_rec) { my $arg = { %$sync, cur_cmt => $cur_cmt, oid => $oid }; last if $sync->{quit}; if ($f eq 'm') { $arg->{autime} = $at; $arg->{cotime} = $ct; if ($sync->{max_size}) { $git->check_async($oid, \&check_size, $arg); } else { $git->cat_async($oid, \&index_both, $arg); } v1_checkpoint($self, $sync) if $max <= 0; } elsif ($f eq 'd') { $git->cat_async($oid, \&unindex_both, $arg); } } v1_checkpoint($self, $sync, $sync->{quit} ? undef : $stk); } sub log2stack ($$$) { my ($sync, $git, $range) = @_; my $D = $sync->{D}; # OID_BIN => NR (if reindexing, undef otherwise) my ($add, $del); if ($sync->{ibx}->version == 1) { my $path = $hex.'{2}/'.$hex.'{38}'; $add = qr!\A:000000 100644 \S+ ($OID) A\t$path$!; $del = qr!\A:100644 000000 ($OID) \S+ D\t$path$!; } else { $del = qr!\A:\d{6} 100644 $OID ($OID) [AM]\td$!; $add = qr!\A:\d{6} 100644 $OID ($OID) [AM]\tm$!; } # Count the new files so they can be added newest to oldest # and still have numbers increasing from oldest to newest my @cmd = qw(log --raw -r --pretty=tformat:%at-%ct-%H --no-notes --no-color --no-renames --no-abbrev); for my $k (qw(since until)) { my $v = $sync->{-opt}->{$k} // next; next if !$sync->{-opt}->{reindex}; push @cmd, "--$k=$v"; } my $fh = $git->popen(@cmd, $range); my ($at, $ct, $stk, $cmt, $l); while (defined($l = <$fh>)) { return if $sync->{quit}; if ($l =~ /\A([0-9]+)-([0-9]+)-($OID)$/o) { ($at, $ct, $cmt) = ($1 + 0, $2 + 0, $3); $stk //= PublicInbox::IdxStack->new($cmt); } elsif ($l =~ /$del/) { my $oid = $1; if ($D) { # reindex case $D->{pack('H*', $oid)}++; } else { # non-reindex case: $stk->push_rec('d', $at, $ct, $oid, $cmt); } } elsif ($l =~ /$add/) { my $oid = $1; if ($D) { my $oid_bin = pack('H*', $oid); my $nr = --$D->{$oid_bin}; delete($D->{$oid_bin}) if $nr <= 0; # nr < 0 (-1) means it never existed next if $nr >= 0; } $stk->push_rec('m', $at, $ct, $oid, $cmt); } } close $fh or die "git log failed: \$?=$?"; $stk //= PublicInbox::IdxStack->new; $stk->read_prepare; } sub prepare_stack ($$) { my ($sync, $range) = @_; my $git = $sync->{ibx}->git; if (index($range, '..') < 0) { # don't show annoying git errors to users who run -index # on empty inboxes $git->qx(qw(rev-parse -q --verify), "$range^0"); return PublicInbox::IdxStack->new->read_prepare if $?; } $sync->{D} = $sync->{reindex} ? {} : undef; # OID_BIN => NR log2stack($sync, $git, $range); } # --is-ancestor requires git 1.8.0+ sub is_ancestor ($$$) { my ($git, $cur, $tip) = @_; return 0 unless $git->check($cur); my $cmd = [ 'git', "--git-dir=$git->{git_dir}", qw(merge-base --is-ancestor), $cur, $tip ]; my $pid = spawn($cmd); waitpid($pid, 0) == $pid or die join(' ', @$cmd) .' did not finish'; $? == 0; } sub need_update ($$$$) { my ($self, $sync, $cur, $new) = @_; my $git = $self->{ibx}->git; $cur //= ''; # XS Search::Xapian ->get_metadata doesn't give undef # don't rewind if --{since,until,before,after} are in use return if $cur ne '' && grep(defined, @{$sync->{-opt}}{qw(since until)}) && is_ancestor($git, $new, $cur); return 1 if $cur ne '' && !is_ancestor($git, $cur, $new); my $range = $cur eq '' ? $new : "$cur..$new"; chomp(my $n = $git->qx(qw(rev-list --count), $range)); ($n eq '' || $n > 0); } # The last git commit we indexed with Xapian or SQLite (msgmap) # This needs to account for cases where Xapian or SQLite is # out-of-date with respect to the other. sub _last_x_commit { my ($self, $mm) = @_; my $lm = $mm->last_commit || ''; my $lx = ''; if (need_xapian($self)) { $lx = $self->{xdb}->get_metadata('last_commit') || ''; } else { $lx = $lm; } # Use last_commit from msgmap if it is older or unset if (!$lm || ($lx && $lm && is_ancestor($self->{ibx}->git, $lm, $lx))) { $lx = $lm; } $lx; } sub reindex_from ($$) { my ($reindex, $last_commit) = @_; return $last_commit unless $reindex; ref($reindex) eq 'HASH' ? $reindex->{from} : ''; } sub quit_cb ($) { my ($sync) = @_; sub { # we set {-opt}->{quit} too, so ->index_sync callers # can abort multi-inbox loops this way $sync->{quit} = $sync->{-opt}->{quit} = 1; warn "gracefully quitting\n"; } } # indexes all unindexed messages (v1 only) sub _index_sync { my ($self, $opt) = @_; my $tip = $opt->{ref} || 'HEAD'; my $ibx = $self->{ibx}; local $self->{current_info} = "$ibx->{inboxdir}"; $self->{batch_bytes} = $opt->{batch_size} // $BATCH_BYTES; $ibx->git->batch_prepare; my $pr = $opt->{-progress}; my $sync = { reindex => $opt->{reindex}, -opt => $opt, ibx => $ibx }; my $quit = quit_cb($sync); local $SIG{QUIT} = $quit; local $SIG{INT} = $quit; local $SIG{TERM} = $quit; my $xdb = $self->begin_txn_lazy; $self->{oidx}->rethread_prepare($opt); my $mm = _msgmap_init($self); if ($sync->{reindex}) { my $last = $mm->last_commit; if ($last) { $tip = $last; } else { # somebody just blindly added --reindex when indexing # for the first time, allow it: undef $sync->{reindex}; } } my $last_commit = _last_x_commit($self, $mm); my $lx = reindex_from($sync->{reindex}, $last_commit); my $range = $lx eq '' ? $tip : "$lx..$tip"; $pr->("counting changes\n\t$range ... ") if $pr; my $stk = prepare_stack($sync, $range); $sync->{ntodo} = $stk ? $stk->num_records : 0; $pr->("$sync->{ntodo}\n") if $pr; # continue previous line process_stack($self, $sync, $stk) if !$sync->{quit}; } sub DESTROY { # order matters for unlocking $_[0]->{xdb} = undef; $_[0]->{lockfh} = undef; } sub _begin_txn { my ($self) = @_; my $xdb = $self->{xdb} || idx_acquire($self); $self->{oidx}->begin_lazy if $self->{oidx}; $xdb->begin_transaction if $xdb; $self->{txn} = 1; $xdb; } sub begin_txn_lazy { my ($self) = @_; $self->with_umask(\&_begin_txn, $self) if !$self->{txn}; } # store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard) # This metadata is read by Admin::detect_indexlevel: sub set_metadata_once { my ($self) = @_; return if $self->{shard}; # only continue if undef or 0, not >0 my $xdb = $self->{xdb}; if (delete($self->{-set_has_threadid_once})) { $xdb->set_metadata('has_threadid', '1'); } if (delete($self->{-set_indexlevel_once})) { my $level = $xdb->get_metadata('indexlevel'); if (!$level || $level ne 'medium') { $xdb->set_metadata('indexlevel', 'medium'); } } if (delete($self->{-set_skip_docdata_once})) { $xdb->get_metadata('skip_docdata') or $xdb->set_metadata('skip_docdata', '1'); } } sub _commit_txn { my ($self) = @_; if (my $eidx = $self->{eidx}) { $eidx->git->async_wait_all; $eidx->{transact_bytes} = 0; } if (my $xdb = $self->{xdb}) { set_metadata_once($self); $xdb->commit_transaction; } $self->{oidx}->commit_lazy if $self->{oidx}; } sub commit_txn_lazy { my ($self) = @_; delete($self->{txn}) and $self->with_umask(\&_commit_txn, $self); } sub eidx_shard_new { my ($class, $eidx, $shard) = @_; my $self = bless { eidx => $eidx, xpfx => $eidx->{xpfx}, indexlevel => $eidx->{indexlevel}, -skip_docdata => 1, shard => $shard, creat => 1, }, $class; $self->{-set_indexlevel_once} = 1 if $self->{indexlevel} eq 'medium'; $self; } 1; public-inbox-1.9.0/lib/PublicInbox/SearchIdxShard.pm000066400000000000000000000042601430031475700223000ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ # Internal interface for a single Xapian shard in V2 inboxes. # See L for more info on how we shard Xapian package PublicInbox::SearchIdxShard; use strict; use v5.10.1; use parent qw(PublicInbox::SearchIdx PublicInbox::IPC); use PublicInbox::OnDestroy; sub new { my ($class, $v2w, $shard) = @_; # v2w may be ExtSearchIdx my $ibx = $v2w->{ibx}; my $self = $ibx ? $class->SUPER::new($ibx, 1, $shard) : $class->eidx_shard_new($v2w, $shard); # create the DB before forking: $self->idx_acquire; $self->set_metadata_once; $self->idx_release; if ($v2w->{parallel}) { local $self->{-v2w_afc} = $v2w; $self->ipc_worker_spawn("shard[$shard]"); # F_SETPIPE_SZ = 1031 on Linux; increasing the pipe size for # inputs speeds V2Writable batch imports across 8 cores by # nearly 20%. Since any of our responses are small, make # the response pipe as small as possible if ($^O eq 'linux') { fcntl($self->{-ipc_req}, 1031, 1048576); fcntl($self->{-ipc_res}, 1031, 4096); } } $self; } sub _worker_done { my ($self) = @_; if ($self->need_xapian) { die "$$ $0 xdb not released\n" if $self->{xdb}; } die "$$ $0 still in transaction\n" if $self->{txn}; } sub ipc_atfork_child { # called automatically before ipc_worker_loop my ($self) = @_; my $v2w = delete $self->{-v2w_afc} or die 'BUG: {-v2w_afc} missing'; $v2w->atfork_child; # calls ipc_sibling_atfork_child on our siblings $v2w->{current_info} = "[$self->{shard}]"; # for $SIG{__WARN__} $self->begin_txn_lazy; # caller must capture this: PublicInbox::OnDestroy->new($$, \&_worker_done, $self); } sub index_eml { my ($self, $eml, $smsg, $eidx_key) = @_; $smsg->{eidx_key} = $eidx_key if defined $eidx_key; $self->ipc_do('add_xapian', $eml, $smsg); } # wait for return to determine when ipc_do('commit_txn_lazy') is done sub echo { shift; "@_"; } sub idx_close { my ($self) = @_; die "transaction in progress $self\n" if $self->{txn}; $self->idx_release if $self->{xdb}; } sub shard_close { my ($self) = @_; $self->ipc_do('idx_close'); $self->ipc_worker_stop; } 1; public-inbox-1.9.0/lib/PublicInbox/SearchQuery.pm000066400000000000000000000024251430031475700217000ustar00rootroot00000000000000# Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ # used by PublicInbox::SearchView and PublicInbox::WwwListing package PublicInbox::SearchQuery; use strict; use v5.10.1; use URI::Escape qw(uri_escape); use PublicInbox::MID qw(MID_ESC); our $LIM = 200; sub new { my ($class, $qp) = @_; my $r = $qp->{r}; # relevance my $t = $qp->{t}; # collapse threads my ($l) = (($qp->{l} || '') =~ /([0-9]+)/); $l = $LIM if !$l || $l > $LIM; bless { q => $qp->{'q'}, x => $qp->{x} || '', o => (($qp->{o} || '0') =~ /(-?[0-9]+)/), l => $l, r => (defined $r && $r ne '0'), t => (defined $t && $t ne '0'), }, $class; } sub qs_html { my ($self, %override) = @_; if (scalar(keys(%override))) { $self = bless { (%$self, %override) }, ref($self); } my $qs = ''; if (defined(my $q = $self->{'q'})) { $q = uri_escape($q, MID_ESC); $q =~ s/%20/+/g; # improve URL readability $qs .= "q=$q"; } if (my $o = $self->{o}) { # ignore o == 0 $qs .= "&o=$o"; } if (my $l = $self->{l}) { $qs .= "&l=$l" unless $l == $LIM; } for my $bool (qw(r t)) { $qs .= "&$bool" if $self->{$bool}; } if (my $x = $self->{x}) { $qs .= "&x=$x" if ($x eq 't' || $x eq 'A' || $x eq 'm'); } $qs; } 1; public-inbox-1.9.0/lib/PublicInbox/SearchThread.pm000066400000000000000000000117721430031475700220070ustar00rootroot00000000000000# This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # This license differs from the rest of public-inbox # # Our own jwz-style threading class based on Mail::Thread from CPAN. # Mail::Thread is unmaintained and unavailable on some distros. # We also do not want pruning or subject grouping, since we want # to encourage strict threading and hopefully encourage people # to use proper In-Reply-To/References. # # This includes fixes from several open bugs for Mail::Thread # # Avoid circular references # - https://rt.cpan.org/Public/Bug/Display.html?id=22817 # # And avoid recursion in recurse_down: # - https://rt.cpan.org/Ticket/Display.html?id=116727 # - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479 package PublicInbox::SearchThread; use strict; use warnings; use PublicInbox::MID qw($MID_EXTRACT); sub thread { my ($msgs, $ordersub, $ctx) = @_; my (%id_table, @imposters); keys(%id_table) = scalar @$msgs; # pre-size # A. put all current non-imposter $msgs (non-ghosts) into %id_table # (imposters are messages with reused Message-IDs) # Sadly, we sort here anyways since the fill-in-the-blanks References: # can be shakier if somebody used In-Reply-To with multiple, disparate # messages. So, take the client Date: into account since we can't # always determine ordering when somebody uses multiple In-Reply-To. my @kids = sort { $a->{ds} <=> $b->{ds} } grep { # this delete saves around 4K across 1K messages # TODO: move this to a more appropriate place, breaks tests # if we do it during psgi_cull delete $_->{num}; bless $_, 'PublicInbox::SearchThread::Msg'; if (exists $id_table{$_->{mid}}) { $_->{children} = []; push @imposters, $_; # we'll deal with them later undef; } else { $_->{children} = {}; # will become arrayref later $id_table{$_->{mid}} = $_; defined($_->{references}); } } @$msgs; for my $smsg (@kids) { # This loop exists to help fill in gaps left from missing # messages. It is not needed in a perfect world where # everything is perfectly referenced, only the last ref # matters. my $prev; for my $ref ($smsg->{references} =~ m/$MID_EXTRACT/go) { # Find a Container object for the given Message-ID my $cont = $id_table{$ref} //= PublicInbox::SearchThread::Msg::ghost($ref); # Link the References field's Containers together in # the order implied by the References header # # * If they are already linked don't change the # existing links # * Do not add a link if adding that link would # introduce a loop... if ($prev && !$cont->{parent} && # already linked !$cont->has_descendent($prev) # would loop ) { $prev->add_child($cont); } $prev = $cont; } # C. Set the parent of this message to be the last element in # References. if (defined $prev && !$smsg->has_descendent($prev)) { $prev->add_child($smsg); } } my $ibx = $ctx->{ibx}; my @rootset = grep { # n.b.: delete prevents cyclic refs !delete($_->{parent}) && $_->visible($ibx) } values %id_table; $ordersub->(\@rootset); $_->order_children($ordersub, $ctx) for @rootset; # parent imposter messages with reused Message-IDs unshift(@{$id_table{$_->{mid}}->{children}}, $_) for @imposters; \@rootset; } package PublicInbox::SearchThread::Msg; use base qw(PublicInbox::Smsg); use strict; use warnings; use Carp qw(croak); # declare a ghost smsg (determined by absence of {blob}) sub ghost { bless { mid => $_[0], children => {}, # becomes an array when sorted by ->order(...) }, __PACKAGE__; } sub topmost { my ($self) = @_; my @q = ($self); while (my $cont = shift @q) { return $cont if $cont->{blob}; push @q, values %{$cont->{children}}; } undef; } sub add_child { my ($self, $child) = @_; croak "Cowardly refusing to become my own parent: $self" if $self == $child; my $cid = $child->{mid}; # reparenting: if (defined(my $parent = $child->{parent})) { delete $parent->{children}->{$cid}; } $self->{children}->{$cid} = $child; $child->{parent} = $self; } sub has_descendent { my ($self, $child) = @_; my %seen; # loop prevention while ($child) { return 1 if $self == $child || $seen{$child}++; $child = $child->{parent}; } 0; } # Do not show/keep ghosts iff they have no children. Sometimes # a ghost Message-ID is the result of a long header line # being folded/mangled by a MUA, and not a missing message. sub visible ($$) { my ($self, $ibx) = @_; return 1 if $self->{blob}; if (my $by_mid = $ibx->smsg_by_mid($self->{mid})) { %$self = (%$self, %$by_mid); 1; } else { (scalar values %{$self->{children}}); } } sub order_children { my ($cur, $ordersub, $ctx) = @_; my %seen = ($cur => 1); # self-referential loop prevention my @q = ($cur); my $ibx = $ctx->{ibx}; while (defined($cur = shift @q)) { # the {children} hashref here... my @c = grep { !$seen{$_}++ && visible($_, $ibx) } values %{delete $cur->{children}}; $ordersub->(\@c) if scalar(@c) > 1; $cur->{children} = \@c; # ...becomes an arrayref push @q, @c; } } 1; public-inbox-1.9.0/lib/PublicInbox/SearchView.pm000066400000000000000000000245421430031475700215110ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # # Displays search results for the web interface package PublicInbox::SearchView; use strict; use v5.10.1; use List::Util qw(min max); use URI::Escape qw(uri_unescape); use PublicInbox::Smsg; use PublicInbox::Hval qw(ascii_html obfuscate_addrs mid_href fmt_ts); use PublicInbox::View; use PublicInbox::WwwAtomStream; use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::SearchThread; use PublicInbox::SearchQuery; use PublicInbox::Search qw(get_pct); my %rmap_inc; sub mbox_results { my ($ctx) = @_; my $q = PublicInbox::SearchQuery->new($ctx->{qp}); if ($ctx->{env}->{'psgi.input'}->read(my $buf, 3)) { $q->{t} = 1 if $buf =~ /\Ax=[^0]/; } require PublicInbox::Mbox; $q->{x} eq 'm' ? PublicInbox::Mbox::mbox_all($ctx, $q) : sres_top_html($ctx); } sub sres_top_html { my ($ctx) = @_; my $srch = $ctx->{ibx}->isrch or return PublicInbox::WWW::need($ctx, 'Search'); my $q = PublicInbox::SearchQuery->new($ctx->{qp}); my $x = $q->{x}; my $o = $q->{o}; my $asc; if ($o < 0) { $asc = 1; $o = -($o + 1); # so [-1] is the last element, like Perl lists } my $code = 200; # double the limit for expanded views: my $opts = { limit => $q->{l}, offset => $o, relevance => $q->{r}, threads => $q->{t}, asc => $asc, }; my ($mset, $total, $err, $html); retry: eval { my $query = $q->{'q'}; $srch->query_approxidate($ctx->{ibx}->git, $query); $mset = $srch->mset($query, $opts); $total = $mset->get_matches_estimated; }; $err = $@; ctx_prepare($q, $ctx); if ($err) { $code = 400; $html = '
'.err_txt($ctx, $err).'

'; } elsif ($total == 0) { if (defined($ctx->{-uxs_retried})) { # undo retry damage: $q->{'q'} = $ctx->{-uxs_retried}; } elsif (index($q->{'q'}, '%') >= 0) { $ctx->{-uxs_retried} = $q->{'q'}; $q->{'q'} = uri_unescape($q->{'q'}); goto retry; } $code = 404; $html = "
\n[No results found]

"; } else { return adump($_[0], $mset, $q, $ctx) if $x eq 'A'; $ctx->{-html_tip} = search_nav_top($mset, $q, $ctx); return mset_thread($ctx, $mset, $q) if $x eq 't'; mset_summary($ctx, $mset, $q); # appends to {-html_tip} $html = ''; } html_oneshot($ctx, $code); } # display non-nested search results similar to what users expect from # regular WWW search engines: sub mset_summary { my ($ctx, $mset, $q) = @_; my $total = $mset->get_matches_estimated; my $pad = length("$total"); my $pfx = ' ' x $pad; my $res = \($ctx->{-html_tip}); my $ibx = $ctx->{ibx}; my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef; my @nums = @{$ibx->isrch->mset_to_artnums($mset)}; my %num2msg = map { $_->{num} => $_ } @{$ibx->over->get_all(@nums)}; my ($min, $max, %seen); foreach my $m ($mset->items) { my $num = shift @nums; my $smsg = delete($num2msg{$num}) // do { warn "$m $num expired\n"; next; }; my $mid = $smsg->{mid}; next if $seen{$mid}++; $mid = mid_href($mid); $ctx->{-t_max} //= $smsg->{ts}; my $rank = sprintf("%${pad}d", $m->get_rank + 1); my $pct = get_pct($m); # only when sorting by relevance, ->items is always # ordered descending: $max //= $pct; $min = $pct; my $s = ascii_html($smsg->{subject}); my $f = ascii_html(delete $smsg->{from_name}); if ($obfs_ibx) { obfuscate_addrs($obfs_ibx, $s); obfuscate_addrs($obfs_ibx, $f); } my $date = fmt_ts($smsg->{ds}); $s = '(no subject)' if $s eq ''; $$res .= qq{$rank. }. $s . "\n"; $$res .= "$pfx - by $f @ $date UTC [$pct%]\n\n"; } if ($q->{r}) { # for descriptions in search_nav_bot $q->{-min_pct} = $min; $q->{-max_pct} = $max; } $$res .= search_nav_bot($mset, $q); undef; } # shorten "/full/path/to/Foo/Bar.pm" to "Foo/Bar.pm" so error # messages don't reveal FS layout info in case people use non-standard # installation paths sub path2inc ($) { my $full = $_[0]; if (my $short = $rmap_inc{$full}) { return $short; } elsif (!scalar(keys %rmap_inc) && -e $full) { %rmap_inc = map {; "$INC{$_}" => $_ } keys %INC; # fall back to basename as last resort $rmap_inc{$full} // (split('/', $full))[-1]; } else { $full; } } sub err_txt { my ($ctx, $err) = @_; my $u = $ctx->{ibx}->base_url($ctx->{env}) . '_/text/help/'; $err =~ s/^\s*Exception:\s*//; # bad word to show users :P $err =~ s!(\S+)!path2inc($1)!sge; $err = ascii_html($err); "\nBad query: $err\n" . qq{See $u for help on using search}; } sub search_nav_top { my ($mset, $q, $ctx) = @_; my $m = $q->qs_html(x => 'm', r => undef, t => undef); my $rv = qq{
};
	my $initial_q = $ctx->{-uxs_retried};
	if (defined $initial_q) {
		my $rewritten = $q->{'q'};
		utf8::decode($initial_q);
		utf8::decode($rewritten);
		$initial_q = ascii_html($initial_q);
		$rewritten = ascii_html($rewritten);
		$rv .= " Warning: Initial query:\n $initial_q\n";
		$rv .= " returned no results, used:\n";
		$rv .= " $rewritten\n instead\n\n";
	}

	$rv .= 'Search results ordered by [';
	if ($q->{r}) {
		my $d = $q->qs_html(r => 0);
		$rv .= qq{date|relevance};
	} else {
		my $d = $q->qs_html(r => 1);
		$rv .= qq{date|relevance};
	}

	$rv .= ']  view[';

	my $x = $q->{x};
	my $pfx = "\t\t\t";
	if ($x eq 't') {
		my $s = $q->qs_html(x => '');
		$rv .= qq{summary|nested};
		$pfx = "thread overview below | ";
	} else {
		my $t = $q->qs_html(x => 't');
		$rv .= qq{summary|nested}
	}
	my $A = $q->qs_html(x => 'A', r => undef);
	$rv .= qq{|Atom feed]\n};
	$rv .= <{t};
*** "t=1" collapses threads in summary, "full threads" requires mbox.gz ***
EOM
	$rv .= <{ibx}->isrch->has_threadid) {
		$rv .= qq{${pfx}download mbox.gz: } .
			# we set name=z w/o using it since it seems required for
			# lynx (but works fine for w3m).
			qq{} .
			qq{|};
	} else { # BOFH needs to --reindex
		$rv .= qq{${pfx}download: } .
			qq{}
	}
	$rv .= qq{
};
}

sub search_nav_bot { # also used by WwwListing for searching extindex miscidx
	my ($mset, $q) = @_;
	my $total = $mset->get_matches_estimated;
	my $l = $q->{l};
	my $rv = '

';
	my $o = $q->{o};
	my $off = $o < 0 ? -($o + 1) : $o;
	my $end = $off + $mset->size;
	my $beg = $off + 1;

	if ($beg <= $end) {
		my $approx = $end == $total ? '' : '~';
		$rv .= "Results $beg-$end of $approx$total";
	} else {
		$rv .= "No more results, only $total";
	}
	my ($next, $join, $prev, $nd, $pd);

	if ($o >= 0) { # sort descending
		my $n = $o + $l;
		if ($n < $total) {
			$next = $q->qs_html(o => $n, l => $l);
			$nd = $q->{r} ? "[<= $q->{-min_pct}%]" : '(older)';
		}
		if ($o > 0) {
			$join = $n < $total ? ' | ' : "\t";
			my $p = $o - $l;
			$prev = $q->qs_html(o => ($p > 0 ? $p : 0));
			$pd = $q->{r} ? "[>= $q->{-max_pct}%]" : '(newer)';
		}
	} else { # o < 0, sort ascending
		my $n = $o - $l;

		if (-$n < $total) {
			$next = $q->qs_html(o => $n, l => $l);
			$nd = $q->{r} ? "[<= $q->{-min_pct}%]" : '(newer)';
		}
		if ($o < -1) {
			$join = -$n < $total ? ' | ' : "\t";
			my $p = $o + $l;
			$prev = $q->qs_html(o => ($p < 0 ? $p : 0));
			$pd = $q->{r} ? "[>= $q->{-max_pct}%]" : '(older)';
		}
	}

	$rv .= qq{  next $nd} if $next;
	$rv .= $join if $join;
	$rv .= qq{prev $pd} if $prev;

	my $rev = $q->qs_html(o => $o < 0 ? 0 : -1);
	$rv .= qq{ | reverse} .
		q{ | sort options + mbox downloads } .
		q{above
}; } sub sort_relevance { @{$_[0]} = sort { (eval { $b->topmost->{pct} } // 0) <=> (eval { $a->topmost->{pct} } // 0) } @{$_[0]}; } sub mset_thread { my ($ctx, $mset, $q) = @_; my $ibx = $ctx->{ibx}; my @pct = map { get_pct($_) } $mset->items; my $msgs = $ibx->isrch->mset_to_smsg($ibx, $mset); my $i = 0; $_->{pct} = $pct[$i++] for @$msgs; my $r = $q->{r}; if ($r) { # for descriptions in search_nav_bot $q->{-min_pct} = min(@pct); $q->{-max_pct} = max(@pct); } my $rootset = PublicInbox::SearchThread::thread($msgs, $r ? \&sort_relevance : \&PublicInbox::View::sort_ds, $ctx); my $skel = search_nav_bot($mset, $q). "
-- links below jump to the message on this page --\n";

	$ctx->{-upfx} = '';
	$ctx->{anchor_idx} = 1;
	$ctx->{cur_level} = 0;
	$ctx->{skel} = \$skel;
	$ctx->{mapping} = {};
	$ctx->{searchview} = 1;
	$ctx->{prev_attr} = '';
	$ctx->{prev_level} = 0;
	$ctx->{s_nr} = scalar(@$msgs).'+ results';

	# reduce hash lookups in skel_dump
	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
	PublicInbox::View::walk_thread($rootset, $ctx,
		\&PublicInbox::View::pre_thread);

	# link $INBOX_DIR/description text to "recent" view around
	# the newest message in this result set:
	$ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs);

	@$msgs = reverse @$msgs if $r;
	$ctx->{msgs} = $msgs;
	PublicInbox::WwwStream::aresponse($ctx, 200, \&mset_thread_i);
}

# callback for PublicInbox::WwwStream::getline
sub mset_thread_i {
	my ($ctx, $eml) = @_;
	$ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip};
	$eml and return PublicInbox::View::eml_entry($ctx, $eml);
	my $smsg = shift @{$ctx->{msgs}} or
		$ctx->zmore(${delete($ctx->{skel})});
	$smsg;
}

sub ctx_prepare {
	my ($q, $ctx) = @_;
	my $qh = $q->{'q'};
	utf8::decode($qh);
	$qh = ascii_html($qh);
	$ctx->{-q_value_html} = $qh;
	$ctx->{-atom} = '?'.$q->qs_html(x => 'A', r => undef);
	$ctx->{-title_html} = "$qh - search results";
	my $extra = '';
	$extra .= qq{} if $q->{r};
	if (my $x = $q->{x}) {
		$x = ascii_html($x);
		$extra .= qq{};
	}
	$ctx->{-extra_form_html} = $extra;
}

sub adump {
	my ($cb, $mset, $q, $ctx) = @_;
	$ctx->{ids} = $ctx->{ibx}->isrch->mset_to_artnums($mset);
	$ctx->{search_query} = $q; # used by WwwAtomStream::atom_header
	PublicInbox::WwwAtomStream->response($ctx, 200, \&adump_i);
}

# callback for PublicInbox::WwwAtomStream::getline
sub adump_i {
	my ($ctx) = @_;
	while (my $num = shift @{$ctx->{ids}}) {
		my $smsg = eval { $ctx->{ibx}->over->get_art($num) } or next;
		return $smsg;
	}
}

1;
public-inbox-1.9.0/lib/PublicInbox/SharedKV.pm000066400000000000000000000106471430031475700211210ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# fork()-friendly key-value store.  Will be used for making
# augmenting Maildirs and mboxes less expensive, maybe.
# We use flock(2) to avoid SQLite lock problems (busy timeouts, backoff)
package PublicInbox::SharedKV;
use strict;
use v5.10.1;
use parent qw(PublicInbox::Lock);
use File::Temp qw(tempdir);
use DBI qw(:sql_types); # SQL_BLOB
use PublicInbox::Spawn;
use File::Path qw(rmtree make_path);

sub dbh {
	my ($self, $lock) = @_;
	$self->{dbh} // do {
		my $f = $self->{filename};
		$lock //= $self->lock_for_scope_fast;
		my $dbh = DBI->connect("dbi:SQLite:dbname=$f", '', '', {
			AutoCommit => 1,
			RaiseError => 1,
			PrintError => 0,
			sqlite_use_immediate_transaction => 1,
			# no sqlite_unicode here, this is for binary data
		});
		my $opt = $self->{opt} // {};
		$dbh->do('PRAGMA synchronous = OFF') if !$opt->{fsync};
		$dbh->do('PRAGMA journal_mode = '.
				($opt->{journal_mode} // 'WAL'));
		$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS kv (
	k VARBINARY PRIMARY KEY NOT NULL,
	v VARBINARY NOT NULL,
	UNIQUE (k)
)

		$self->{dbh} = $dbh;
	}
}

sub new {
	my ($cls, $dir, $base, $opt) = @_;
	my $self = bless { opt => $opt }, $cls;
	make_path($dir) if defined($dir) && !-d $dir;
	$dir //= $self->{"tmp$$.$self"} = tempdir("skv.$$-XXXX", TMPDIR => 1);
	$base //= '';
	my $f = $self->{filename} = "$dir/$base.sqlite3";
	$self->{lock_path} = $opt->{lock_path} // "$dir/$base.flock";
	unless (-s $f) {
		require PublicInbox::Syscall;
		PublicInbox::Syscall::nodatacow_dir($dir); # for journal/shm/wal
		open my $fh, '+>>', $f or die "failed to open $f: $!";
	}
	$self;
}

sub set_maybe {
	my ($self, $key, $val, $lock) = @_;
	$lock //= $self->lock_for_scope_fast;
	my $sth = $self->{dbh}->prepare_cached(<<'');
INSERT OR IGNORE INTO kv (k,v) VALUES (?, ?)

	$sth->bind_param(1, $key, SQL_BLOB);
	$sth->bind_param(2, $val, SQL_BLOB);
	my $e = $sth->execute;
	$e == 0 ? undef : $e;
}

# caller calls sth->fetchrow_array
sub each_kv_iter {
	my ($self) = @_;
	my $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
SELECT k,v FROM kv

	$sth->execute;
	$sth
}

sub keys {
	my ($self, @pfx) = @_;
	my $sql = 'SELECT k FROM kv';
	if (defined $pfx[0]) {
		$sql .= ' WHERE k LIKE ? ESCAPE ?';
		my $anywhere = !!$pfx[1];
		$pfx[1] = '\\';
		$pfx[0] =~ s/([%_\\])/\\$1/g; # glob chars
		$pfx[0] .= '%';
		substr($pfx[0], 0, 0, '%') if $anywhere;
	} else {
		@pfx = (); # [0] may've been undef
	}
	my $sth = $self->dbh->prepare($sql);
	if (@pfx) {
		$sth->bind_param(1, $pfx[0], SQL_BLOB);
		$sth->bind_param(2, $pfx[1]);
	}
	$sth->execute;
	map { $_->[0] } @{$sth->fetchall_arrayref};
}

sub set {
	my ($self, $key, $val) = @_;
	if (defined $val) {
		my $sth = $self->{dbh}->prepare_cached(<<'');
INSERT OR REPLACE INTO kv (k,v) VALUES (?,?)

		$sth->bind_param(1, $key, SQL_BLOB);
		$sth->bind_param(2, $val, SQL_BLOB);
		my $e = $sth->execute;
		$e == 0 ? undef : $e;
	} else {
		my $sth = $self->{dbh}->prepare_cached(<<'');
DELETE FROM kv WHERE k = ?

		$sth->bind_param(1, $key, SQL_BLOB);
	}
}

sub get {
	my ($self, $key) = @_;
	my $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
SELECT v FROM kv WHERE k = ?

	$sth->bind_param(1, $key, SQL_BLOB);
	$sth->execute;
	$sth->fetchrow_array;
}

sub xchg {
	my ($self, $key, $newval, $lock) = @_;
	$lock //= $self->lock_for_scope_fast;
	my $oldval = get($self, $key);
	if (defined $newval) {
		set($self, $key, $newval);
	} else {
		my $sth = $self->{dbh}->prepare_cached(<<'');
DELETE FROM kv WHERE k = ?

		$sth->bind_param(1, $key, SQL_BLOB);
		$sth->execute;
	}
	$oldval;
}

sub count {
	my ($self) = @_;
	my $sth = $self->{dbh}->prepare_cached(<<'');
SELECT COUNT(k) FROM kv

	$sth->execute;
	$sth->fetchrow_array;
}

# faster than ->count due to how SQLite works
sub has_entries {
	my ($self) = @_;
	my @n = $self->{dbh}->selectrow_array('SELECT k FROM kv LIMIT 1');
	scalar(@n) ? 1 : undef;
}

sub dbh_release {
	my ($self, $lock) = @_;
	my $dbh = delete $self->{dbh} or return;
	$lock //= $self->lock_for_scope_fast; # may be needed for WAL
	%{$dbh->{CachedKids}} = (); # cleanup prepare_cached
	$dbh->disconnect;
}

sub DESTROY {
	my ($self) = @_;
	dbh_release($self);
	my $dir = delete $self->{"tmp$$.$self"} or return;
	my $tries = 0;
	do {
		$! = 0;
		eval { rmtree($dir) };
	} while ($@ && $!{ENOENT} && $tries++ < 5);
	warn "error removing $dir: $@" if $@;
	warn "Took $tries tries to remove $dir\n" if $tries;
}

1;
public-inbox-1.9.0/lib/PublicInbox/Sigfd.pm000066400000000000000000000035001430031475700204740ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors 
# License: AGPL-3.0+ 

# Wraps a signalfd (or similar) for PublicInbox::DS
# fields: (sig: hashref similar to %SIG, but signal numbers as keys)
package PublicInbox::Sigfd;
use strict;
use parent qw(PublicInbox::DS);
use PublicInbox::Syscall qw(signalfd EPOLLIN EPOLLET);
use POSIX ();

# returns a coderef to unblock signals if neither signalfd or kqueue
# are available.
sub new {
	my ($class, $sig, $nonblock) = @_;
	my %signo = map {;
		my $cb = $sig->{$_};
		# SIGWINCH is 28 on FreeBSD, NetBSD, OpenBSD
		my $num = ($_ eq 'WINCH' && $^O =~ /linux|bsd/i) ? 28 : do {
			my $m = "SIG$_";
			POSIX->$m;
		};
		$num => $cb;
	} keys %$sig;
	my $self = bless { sig => \%signo }, $class;
	my $io;
	my $fd = signalfd([keys %signo], $nonblock);
	if (defined $fd && $fd >= 0) {
		open($io, '+<&=', $fd) or die "open: $!";
	} elsif (eval { require PublicInbox::DSKQXS }) {
		$io = PublicInbox::DSKQXS->signalfd([keys %signo], $nonblock);
	} else {
		return; # wake up every second to check for signals
	}
	if ($nonblock) { # it can go into the event loop
		$self->SUPER::new($io, EPOLLIN | EPOLLET);
	} else { # master main loop
		$self->{sock} = $io;
		$self;
	}
}

# PublicInbox::Daemon in master main loop (blocking)
sub wait_once ($) {
	my ($self) = @_;
	# 128 == sizeof(struct signalfd_siginfo)
	my $r = sysread($self->{sock}, my $buf, 128 * 64);
	if (defined($r)) {
		my $nr = $r / 128 - 1; # $nr may be -1
		for my $off (0..$nr) {
			# the first uint32_t of signalfd_siginfo: ssi_signo
			my $signo = unpack('L', substr($buf, 128 * $off, 4));
			my $cb = $self->{sig}->{$signo};
			$cb->($signo) if $cb ne 'IGNORE';
		}
	}
	$r;
}

# called by PublicInbox::DS in epoll_wait loop
sub event_step {
	while (wait_once($_[0])) {} # non-blocking
}

1;
public-inbox-1.9.0/lib/PublicInbox/Smsg.pm000066400000000000000000000112611430031475700203540ustar00rootroot00000000000000# Copyright (C) 2015-2021 all contributors 
# License: AGPL-3.0+ 
#
# A small/skeleton/slim representation of a message.

# This used to be "SearchMsg", but we split out overview
# indexing into over.sqlite3 so it's not just "search".  There
# may be many of these objects loaded in memory at once for
# large threads in our WWW UI and the NNTP range responses.
package PublicInbox::Smsg;
use strict;
use v5.10.1;
use parent qw(Exporter);
our @EXPORT_OK = qw(subject_normalized);
use PublicInbox::MID qw(mids references);
use PublicInbox::Address;
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);

sub oidbin { pack('H*', $_[0]->{blob}) }

sub to_doc_data {
	my ($self) = @_;
	join("\n",
		$self->{subject},
		$self->{from},
		$self->{references} // '',
		$self->{to},
		$self->{cc},
		$self->{blob},
		$self->{mid},
		$self->{bytes} // '',
		$self->{lines} // ''
	);
}

sub load_from_data ($$) {
	my ($self) = $_[0]; # data = $_[1]
	utf8::decode($_[1]);
	(
		$self->{subject},
		$self->{from},
		$self->{references},

		# To: and Cc: are stored to optimize HDR/XHDR in NNTP since
		# some NNTP clients will use that for message displays.
		# NNTP only, and only stored in Over(view), not Xapian
		$self->{to},
		$self->{cc},

		$self->{blob},
		$self->{mid},

		# NNTP only
		$self->{bytes},
		$self->{lines}
	) = split(/\n/, $_[1]);
}

sub psgi_cull ($) {
	my ($self) = @_;

	# drop NNTP-only fields which aren't relevant to PSGI results:
	# saves ~80K on a 200 item search result:
	# TODO: we may need to keep some of these for JMAP...
	my ($f) = delete @$self{qw(from tid to cc bytes lines)};
	# ghosts don't have ->{from}
	$self->{from_name} = join(', ', PublicInbox::Address::names($f // ''));
	$self;
}

sub parse_references ($$$) {
	my ($smsg, $hdr, $mids) = @_;
	my $refs = references($hdr);
	push(@$refs, @$mids) if scalar(@$mids) > 1;
	return $refs if scalar(@$refs) == 0;

	# prevent circular references here:
	my %seen = ( ($smsg->{mid} // '') => 1 );
	my @keep;
	foreach my $ref (@$refs) {
		if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
			warn "References: <$ref> too long, ignoring\n";
			next;
		}
		$seen{$ref} //= push(@keep, $ref);
	}
	$smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
	\@keep;
}

# used for v2, Import and v1 non-SQLite WWW code paths
sub populate {
	my ($self, $hdr, $sync) = @_;
	for my $f (qw(From To Cc Subject)) {
		my @all = $hdr->header($f);
		my $val = join(', ', @all);
		$val =~ tr/\r//d;
		# MIME decoding can create NULs, replace them with spaces
		# to protect git and NNTP clients
		$val =~ tr/\0\t\n/   /;

		# rare: in case headers have wide chars (not RFC2047-encoded)
		utf8::decode($val);

		# lower-case fields for read-only stuff
		$self->{lc($f)} = $val;

		# Capitalized From/Subject for git-fast-import
		next if $f eq 'To' || $f eq 'Cc';
		if (scalar(@all) > 1) {
			$val = $all[0];
			$val =~ tr/\r//d;
			$val =~ tr/\0\t\n/   /;
		}
		$self->{$f} = $val if $val ne '';
	}
	$sync //= {};
	my @ds = msg_datestamp($hdr, $sync->{autime} // $self->{ds});
	my @ts = msg_timestamp($hdr, $sync->{cotime} // $self->{ts});
	$self->{-ds} = \@ds;
	$self->{-ts} = \@ts;
	$self->{ds} //= $ds[0]; # no zone
	$self->{ts} //= $ts[0];
	$self->{mid} //= mids($hdr)->[0];
}

# no strftime, that is locale-dependent and not for RFC822
my @DoW = qw(Sun Mon Tue Wed Thu Fri Sat);
my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);

sub date ($) { # for NNTP
	my ($self) = @_;
	my $ds = $self->{ds};
	return unless defined $ds;
	my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($ds);
	"$DoW[$wday], " . sprintf("%02d $MoY[$mon] %04d %02d:%02d:%02d +0000",
				$mday, $year+1900, $hour, $min, $sec);
}

sub internaldate { # for IMAP
	my ($self) = @_;
	my ($sec, $min, $hour, $mday, $mon, $year) = gmtime($self->{ts} // 0);
	sprintf("%02d-$MoY[$mon]-%04d %02d:%02d:%02d +0000",
				$mday, $year+1900, $hour, $min, $sec);
}

our $REPLY_RE = qr/^re:\s+/i;

# TODO: see RFC 5256 sec 2.1 "Base Subject" and evaluate compatibility
# w/ existing indices...
sub subject_normalized ($) {
	my ($subj) = @_;
	$subj =~ s/\A\s+//s; # no leading space
	$subj =~ s/\s+\z//s; # no trailing space
	$subj =~ s/\s+/ /gs; # no redundant spaces
	$subj =~ s/\.+\z//; # no trailing '.'
	$subj =~ s/$REPLY_RE//igo; # remove reply prefix
	$subj;
}

# returns the number of bytes to add if given a non-CRLF arg
sub crlf_adjust ($) {
	if (index($_[0], "\r\n") < 0) {
		# common case is LF-only, every \n needs an \r;
		# so favor a cheap tr// over an expensive m//g
		$_[0] =~ tr/\n/\n/;
	} else { # count number of '\n' w/o '\r', expensive:
		scalar(my @n = ($_[0] =~ m/(?{bytes} = $_[2] + crlf_adjust($_[1]) }

1;
public-inbox-1.9.0/lib/PublicInbox/SolverGit.pm000066400000000000000000000503031430031475700213610ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# "Solve" blobs which don't exist in git code repositories by
# searching inboxes for post-image blobs.

# this emits a lot of debugging/tracing information which may be
# publicly viewed over HTTP(S).  Be careful not to expose
# local filesystem layouts in the process.
package PublicInbox::SolverGit;
use strict;
use v5.10.1;
use File::Temp 0.19 (); # 0.19 for ->newdir
use Fcntl qw(SEEK_SET);
use PublicInbox::Git qw(git_unquote git_quote);
use PublicInbox::MsgIter qw(msg_part_text);
use PublicInbox::Qspawn;
use PublicInbox::Tmpfile;
use PublicInbox::GitAsyncCat;
use PublicInbox::Eml;
use URI::Escape qw(uri_escape_utf8);

# POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to
# subtract 2048 bytes.  We also don't factor in environment variable
# headroom into this.
use POSIX qw(sysconf _SC_ARG_MAX);
my $ARG_SIZE_MAX = (sysconf(_SC_ARG_MAX) || 4096) - 2048;
my $OID_MIN = 7;

# By default, "git format-patch" generates filenames with a four-digit
# prefix, so that means 9999 patch series are OK, right? :>
# Maybe we can make this configurable, main concern is disk space overhead
# for uncompressed patch fragments.  Aside from space, public-inbox-httpd
# is otherwise unaffected by having many patches, here, as it can share
# work fairly.  Other PSGI servers may have trouble, though.
my $MAX_PATCH = 9999;

my $LF = qr!\r?\n!;
my $ANY = qr![^\r\n]+!;
my $MODE = '100644|120000|100755';
my $FN = qr!(?:("?[^/\n]+/[^\r\n]+)|/dev/null)!;
my %BAD_COMPONENT = ('' => 1, '.' => 1, '..' => 1);

# di = diff info / a hashref with information about a diff ($di):
# {
#	oid_a => abbreviated pre-image oid,
#	oid_b => abbreviated post-image oid,
#	tmp => anonymous file handle with the diff,
#	hdr_lines => string of various header lines for mode information
#	mode_a => original mode of oid_a (string, not integer),
#	ibx => PublicInbox::Inbox object containing the diff
#	smsg => PublicInbox::Smsg object containing diff
#	path_a => pre-image path
#	path_b => post-image path
#	n => numeric path of the patch (relative to worktree)
# }

sub dbg ($$) {
	print { $_[0]->{out} } $_[1], "\n" or ERR($_[0], "print(dbg): $!");
}

sub done ($$) {
	my ($self, $res) = @_;
	my $ucb = delete($self->{user_cb}) or return;
	$ucb->($res, $self->{uarg});
}

sub ERR ($$) {
	my ($self, $err) = @_;
	print { $self->{out} } $err, "\n";
	eval { done($self, $err) };
	die $err;
}

# look for existing objects already in git repos, returns arrayref
# if found, number of remaining git coderepos to try if not.
sub solve_existing ($$) {
	my ($self, $want) = @_;
	my $try = $want->{try_gits} //= [ @{$self->{gits}} ]; # array copy
	my $git = shift @$try or die 'BUG {try_gits} empty';
	my $oid_b = $want->{oid_b};
	my ($oid_full, $type, $size) = $git->check($oid_b);

	# other than {oid_b, try_gits, try_ibxs}
	my $have_hints = scalar keys %$want > 3;
	if (defined($type) && (!$have_hints || $type eq 'blob')) {
		delete $want->{try_gits};
		return [ $git, $oid_full, $type, int($size) ]; # done, success
	}

	# TODO: deal with 40-char "abbreviations" with future SHA-256 git
	return scalar(@$try) if length($oid_b) >= 40;

	# parse stderr of "git cat-file --batch-check"
	my $err = $git->last_check_err;
	my (@oids) = ($err =~ /\b([a-f0-9]{40,})\s+blob\b/g);
	return scalar(@$try) unless scalar(@oids);

	# TODO: do something with the ambiguous array?
	# push @ambiguous, [ $git, @oids ];

	dbg($self, "`$oid_b' ambiguous in " .
			join("\n\t", $git->pub_urls($self->{psgi_env}))
			. "\n" .
			join('', map { "$_ blob\n" } @oids));
	scalar(@$try);
}

sub extract_diff ($$) {
	my ($p, $arg) = @_;
	my ($self, $want, $smsg) = @$arg;
	my ($part) = @$p; # ignore $depth and @idx;
	my $ct = $part->content_type || 'text/plain';
	my $post = $want->{oid_b};
	my $pre = $want->{oid_a};
	if (!defined($pre) || $pre !~ /\A[a-f0-9]+\z/) {
		$pre = '[a-f0-9]{7}'; # for RE below
	}

	# Email::MIME::Encodings forces QP to be CRLF upon decoding,
	# change it back to LF:
	my $cte = $part->header('Content-Transfer-Encoding') || '';
	my ($s, undef) = msg_part_text($part, $ct);
	defined $s or return;
	delete $part->{bdy};
	if ($cte =~ /\bquoted-printable\b/i && $part->crlf eq "\n") {
		$s =~ s/\r\n/\n/sg;
	}
	$s =~ m!( # $1 start header lines we save for debugging:

		# everything before ^index is optional, but we don't
		# want to match ^(old|copy|rename|deleted|...) unless
		# we match /^diff --git/ first:
		(?: # begin optional stuff:

		# try to get the pre-and-post filenames as $2 and $3
		(?:^diff\x20--git\x20$FN\x20$FN$LF)

		(?:^(?: # pass all this to git-apply:
			# old mode $4
			(?:old\x20mode\x20($MODE))
			|
			# new mode (possibly new file) ($5)
			(?:new\x20(?:file\x20)?mode\x20($MODE))
			|
			(?:(?:copy|rename|deleted|
				dissimilarity|similarity)$ANY)
		)$LF)*

		)? # end of optional stuff, everything below is required

		# match the pre and post-image OIDs as $6 $7
		^index\x20(${pre}[a-f0-9]*)\.\.(${post}[a-f0-9]*)
			# mode if unchanged $8
			(?:\x20(100644|120000|100755))?$LF
	) # end of header lines ($1)
	( # $9 is the patch body
		# "--- a/foo.c" sets pre-filename ($10) in case
		# $2 is missing
		(?:^---\x20$FN$LF)

		# "+++ b/foo.c" sets post-filename ($11) in case
		# $3 is missing
		(?:^\+{3}\x20$FN$LF)

		# the meat of the diff, including "^\\No newline ..."
		# We also allow for totally blank lines w/o leading spaces,
		# because git-apply(1) handles that case, too
		(?:^(?:[\@\+\x20\-\\][^\n]*|)$LF)+
	)!smx or return;
	undef $s; # free memory

	my $di = {
		hdr_lines => $1,
		oid_a => $6,
		oid_b => $7,
		mode_a => $5 // $8 // $4, # new (file) // unchanged // old
	};
	my $path_a = $2 // $10;
	my $path_b = $3 // $11;
	my $patch = $9;

	# don't care for leading 'a/' and 'b/'
	my (undef, @a) = split(m{/}, git_unquote($path_a)) if defined($path_a);
	my (undef, @b) = split(m{/}, git_unquote($path_b));

	# get rid of path-traversal attempts and junk patches:
	# it's junk at best, an attack attempt at worse:
	foreach (@a, @b) { return if $BAD_COMPONENT{$_} }

	$di->{path_a} = join('/', @a) if @a;
	$di->{path_b} = join('/', @b);

	my $path = ++$self->{tot};
	$di->{n} = $path;
	open(my $tmp, '>:utf8', $self->{tmp}->dirname . "/$path") or
		die "open(tmp): $!";
	print $tmp $di->{hdr_lines}, $patch or die "print(tmp): $!";
	close $tmp or die "close(tmp): $!";

	# for debugging/diagnostics:
	$di->{ibx} = $want->{cur_ibx};
	$di->{smsg} = $smsg;

	push @{$self->{tmp_diffs}}, $di;
}

sub path_searchable ($) { defined($_[0]) && $_[0] =~ m!\A[\w/\. \-]+\z! }

# ".." appears in path names, which confuses Xapian into treating
# it as a range query.  So we split on ".." since Xapian breaks
# on punctuation anyways:
sub filename_query ($) {
	join('', map { qq( dfn:"$_") } split(/\.\./, $_[0]));
}

sub find_smsgs ($$$) {
	my ($self, $ibx, $want) = @_;
	my $srch = $ibx->isrch or return;

	my $post = $want->{oid_b} or die 'BUG: no {oid_b}';
	$post =~ /\A[a-f0-9]+\z/ or die "BUG: oid_b not hex: $post";

	my $q = "dfpost:$post";
	my $pre = $want->{oid_a};
	if (defined $pre && $pre =~ /\A[a-f0-9]+\z/) {
		$q .= " dfpre:$pre";
	}

	my $path_b = $want->{path_b};
	if (path_searchable($path_b)) {
		$q .= filename_query($path_b);

		my $path_a = $want->{path_a};
		if (path_searchable($path_a) && $path_a ne $path_b) {
			$q .= filename_query($path_a);
		}
	}
	my $mset = $srch->mset($q, { relevance => 1 });
	$mset->size ? $srch->mset_to_smsg($ibx, $mset) : undef;
}

sub update_index_result ($$) {
	my ($bref, $self) = @_;
	my ($qsp, $msg) = delete @$self{qw(-qsp -msg)};
	if (my $err = $qsp->{err}) {
		ERR($self, "git update-index error: $err");
	}
	dbg($self, $msg);
	next_step($self); # onto do_git_apply
}

sub prepare_index ($) {
	my ($self) = @_;
	my $patches = $self->{patches};
	$self->{nr} = 0;

	my $di = $patches->[0] or die 'no patches';
	my $oid_a = $di->{oid_a} or die '{oid_a} unset';
	my $existing = $self->{found}->{$oid_a};

	# no index creation for added files
	$oid_a =~ /\A0+\z/ and return next_step($self);

	die "BUG: $oid_a not found" unless $existing;

	my $oid_full = $existing->[1];
	my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full";
	my $mode_a = $di->{mode_a} // '100644';

	my $in = tmpfile("update-index.$oid_full") or die "tmpfile: $!";
	print $in "$mode_a $oid_full\t$path_a\0" or die "print: $!";
	$in->flush or die "flush: $!";
	sysseek($in, 0, SEEK_SET) or die "seek: $!";

	dbg($self, 'preparing index');
	my $rdr = { 0 => $in };
	my $cmd = [ qw(git update-index -z --index-info) ];
	my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr);
	$path_a = git_quote($path_a);
	$self->{-qsp} = $qsp;
	$self->{-msg} = "index prepared:\n$mode_a $oid_full\t$path_a";
	$qsp->psgi_qx($self->{psgi_env}, undef, \&update_index_result, $self);
}

# pure Perl "git init"
sub do_git_init ($) {
	my ($self) = @_;
	my $dir = $self->{tmp}->dirname;
	my $git_dir = "$dir/git";

	foreach ('', qw(objects refs objects/info refs/heads)) {
		mkdir("$git_dir/$_") or die "mkdir $_: $!";
	}
	open my $fh, '>', "$git_dir/config" or die "open git/config: $!";
	my $first = $self->{gits}->[0];
	my $fmt = $first->object_format;
	my $v = defined($$fmt) ? 1 : 0;
	print $fh <', "$git_dir/HEAD" or die "open git/HEAD: $!";
	print $fh "ref: refs/heads/master\n" or die "print git/HEAD: $!";
	close $fh or die "close git/HEAD: $!";

	my $f = 'objects/info/alternates';
	open $fh, '>', "$git_dir/$f" or die "open: $f: $!";
	foreach my $git (@{$self->{gits}}) {
		print $fh $git->git_path('objects'),"\n" or die "print $f: $!";
	}
	close $fh or die "close: $f: $!";
	my $tmp_git = $self->{tmp_git} = PublicInbox::Git->new($git_dir);
	$tmp_git->{-tmp} = $self->{tmp};
	$self->{git_env} = {
		GIT_DIR => $git_dir,
		GIT_INDEX_FILE => "$git_dir/index",
		GIT_TEST_FSYNC => 0, # undocumented git env
	};
	prepare_index($self);
}

sub do_finish ($) {
	my ($self) = @_;
	my ($found, $oid_want) = @$self{qw(found oid_want)};
	if (my $exists = $found->{$oid_want}) {
		return done($self, $exists);
	}

	# let git disambiguate if oid_want was too short,
	# but long enough to be unambiguous:
	my $tmp_git = $self->{tmp_git};
	if (my @res = $tmp_git->check($oid_want)) {
		return done($self, $found->{$res[0]});
	}
	if (my $err = $tmp_git->last_check_err) {
		dbg($self, $err);
	}
	done($self, undef);
}

sub event_step ($) {
	my ($self) = @_;
	eval {
		# step 1: resolve blobs to patches in the todo queue
		if (my $want = pop @{$self->{todo}}) {
			# this populates {patches} and {todo}
			resolve_patch($self, $want);

		# step 2: then we instantiate a working tree once
		# the todo queue is finally empty:
		} elsif (!defined($self->{tmp_git})) {
			do_git_init($self);

		# step 3: apply each patch in the stack
		} elsif (scalar @{$self->{patches}}) {
			do_git_apply($self);

		# step 4: execute the user-supplied callback with
		# our result: (which may be undef)
		# Other steps may call user_cb to terminate prematurely
		# on error
		} elsif (exists $self->{user_cb}) {
			do_finish($self);
		} else {
			die 'about to call user_cb twice'; # Oops :x
		}
	}; # eval
	my $err = $@;
	if ($err) {
		$err =~ s/^\s*Exception:\s*//; # bad word to show users :P
		dbg($self, "E: $err");
		eval { done($self, $err) };
	}
}

sub next_step ($) {
	my ($self) = @_;
	# if outside of public-inbox-httpd, caller is expected to be
	# looping event_step, anyways
	my $async = $self->{psgi_env}->{'pi-httpd.async'} or return;
	# PublicInbox::HTTPD::Async->new
	$async->(undef, undef, $self);
}

sub mark_found ($$$) {
	my ($self, $oid, $found_info) = @_;
	my $found = $self->{found};
	$found->{$oid} = $found_info;
	my $oid_cur = $found_info->[1];
	while ($oid_cur ne $oid && length($oid_cur) > $OID_MIN) {
		$found->{$oid_cur} = $found_info;
		chop($oid_cur);
	}
}

sub parse_ls_files ($$) {
	my ($self, $bref) = @_;
	my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)};
	if (my $err = $qsp->{err}) {
		die "git ls-files error: $err";
	}

	my ($line, @extra) = split(/\0/, $$bref);
	scalar(@extra) and die "BUG: extra files in index: <",
				join('> <', @extra), ">";

	my ($info, $file) = split(/\t/, $line, 2);
	my ($mode_b, $oid_b_full, $stage) = split(/ /, $info);
	if ($file ne $di->{path_b}) {
		die
"BUG: index mismatch: file=$file != path_b=$di->{path_b}";
	}

	my $tmp_git = $self->{tmp_git} or die 'no git working tree';
	my (undef, undef, $size) = $tmp_git->check($oid_b_full);
	defined($size) or die "check $oid_b_full failed";

	dbg($self, "index at:\n$mode_b $oid_b_full\t$file");
	my $created = [ $tmp_git, $oid_b_full, 'blob', $size, $di ];
	mark_found($self, $di->{oid_b}, $created);
	next_step($self); # onto the next patch
}

sub ls_files_result {
	my ($bref, $self) = @_;
	eval { parse_ls_files($self, $bref) };
	ERR($self, $@) if $@;
}

sub oids_same_ish ($$) {
	(index($_[0], $_[1]) == 0) || (index($_[1], $_[0]) == 0);
}

sub skip_identical ($$$) {
	my ($self, $patches, $cur_oid_b) = @_;
	while (my $nxt = $patches->[0]) {
		if (oids_same_ish($cur_oid_b, $nxt->{oid_b})) {
			dbg($self, 'skipping '.di_url($self, $nxt).
				" for $cur_oid_b");
			shift @$patches;
		} else {
			return;
		}
	}
}

sub apply_result ($$) {
	my ($bref, $self) = @_;
	my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)};
	dbg($self, $$bref);
	my $patches = $self->{patches};
	if (my $err = $qsp->{err}) {
		my $msg = "git apply error: $err";
		my $nxt = $patches->[0];
		if ($nxt && oids_same_ish($nxt->{oid_b}, $di->{oid_b})) {
			dbg($self, $msg);
			dbg($self, 'trying '.di_url($self, $nxt));
			return do_git_apply($self);
		} else {
			ERR($self, $msg);
		}
	} else {
		skip_identical($self, $patches, $di->{oid_b});
	}

	my @cmd = qw(git ls-files -s -z);
	$qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env});
	$self->{-cur_di} = $di;
	$self->{-qsp} = $qsp;
	$qsp->psgi_qx($self->{psgi_env}, undef, \&ls_files_result, $self);
}

sub do_git_apply ($) {
	my ($self) = @_;
	my $dn = $self->{tmp}->dirname;
	my $patches = $self->{patches};

	# we need --ignore-whitespace because some patches are CRLF
	my @cmd = (qw(git apply --cached --ignore-whitespace
			--unidiff-zero --whitespace=warn --verbose));
	my $len = length(join(' ', @cmd));
	my $total = $self->{tot};
	my $di; # keep track of the last one for "git ls-files"
	my $prv_oid_b;

	do {
		my $i = ++$self->{nr};
		$di = shift @$patches;
		dbg($self, "\napplying [$i/$total] " . di_url($self, $di) .
			"\n" . $di->{hdr_lines});
		my $path = $di->{n};
		$len += length($path) + 1;
		push @cmd, $path;
		$prv_oid_b = $di->{oid_b};
	} while (@$patches && $len < $ARG_SIZE_MAX &&
		 !oids_same_ish($patches->[0]->{oid_b}, $prv_oid_b));

	my $opt = { 2 => 1, -C => $dn, quiet => 1 };
	my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}, $opt);
	$self->{-cur_di} = $di;
	$self->{-qsp} = $qsp;
	$qsp->psgi_qx($self->{psgi_env}, undef, \&apply_result, $self);
}

sub di_url ($$) {
	my ($self, $di) = @_;
	# note: we don't pass the PSGI env unconditionally, here,
	# different inboxes can have different HTTP_HOST on the same instance.
	my $ibx = $di->{ibx};
	my $env = $self->{psgi_env} if $ibx eq $self->{inboxes}->[0];
	my $url = $ibx->base_url($env);
	my $mid = $di->{smsg}->{mid};
	defined($url) ? "$url$mid/" : "<$mid>";
}

sub retry_current {
	my ($self, $want) = @_;
	push @{$self->{todo}}, $want;
	next_step($self); # retry solve_existing
}

sub try_harder ($$) {
	my ($self, $want) = @_;

	# do we have more inboxes to try?
	return retry_current($self, $want) if scalar @{$want->{try_ibxs}};

	my $cur_want = $want->{oid_b};
	if (length($cur_want) > $OID_MIN) { # maybe a shorter OID will work
		delete $want->{try_ibxs}; # drop empty arrayref
		chop($cur_want);
		dbg($self, "retrying $want->{oid_b} as $cur_want");
		$want->{oid_b} = $cur_want;
		return retry_current($self, $want); # retry with shorter abbrev
	}

	dbg($self, "could not find $cur_want");
	eval { done($self, undef) };
	die "E: $@" if $@;
}

sub extract_diffs_done {
	my ($self, $want) = @_;

	delete $want->{try_smsgs};
	delete $want->{cur_ibx};

	my $diffs = delete $self->{tmp_diffs};
	if (scalar @$diffs) {
		unshift @{$self->{patches}}, @$diffs;
		dbg($self, "found $want->{oid_b} in " .  join(" ||\n\t",
			map { di_url($self, $_) } @$diffs));

		# good, we can find a path to the oid we $want, now
		# lets see if we need to apply more patches:
		my $di = $diffs->[0];
		my $src = $di->{oid_a};

		unless ($src =~ /\A0+\z/) {
			# we have to solve it using another oid, fine:
			my $job = { oid_b => $src, path_b => $di->{path_a} };
			push @{$self->{todo}}, $job;
		}
		return next_step($self); # onto the next todo item
	}
	try_harder($self, $want);
}

sub extract_diff_async {
	my ($bref, $oid, $type, $size, $x) = @_;
	my ($self, $want, $smsg) = @$x;
	if (defined($oid)) {
		$smsg->{blob} eq $oid or
				ERR($self, "BUG: $smsg->{blob} != $oid");
		PublicInbox::Eml->new($bref)->each_part(\&extract_diff, $x, 1);
	}

	scalar(@{$want->{try_smsgs}}) ? retry_current($self, $want)
					: extract_diffs_done($self, $want);
}

sub resolve_patch ($$) {
	my ($self, $want) = @_;

	my $cur_want = $want->{oid_b};
	if (scalar(@{$self->{patches}}) > $MAX_PATCH) {
		die "Aborting, too many steps to $self->{oid_want}";
	}

	if (my $msgs = $want->{try_smsgs}) {
		my $smsg = shift @$msgs;
		if ($self->{psgi_env}->{'pi-httpd.async'}) {
			return ibx_async_cat($want->{cur_ibx}, $smsg->{blob},
						\&extract_diff_async,
						[$self, $want, $smsg]);
		} else {
			if (my $eml = $want->{cur_ibx}->smsg_eml($smsg)) {
				$eml->each_part(\&extract_diff,
						[ $self, $want, $smsg ], 1);
			}
		}

		return scalar(@$msgs) ? retry_current($self, $want)
					: extract_diffs_done($self, $want);
	}

	# see if we can find the blob in an existing git repo:
	if (!$want->{try_ibxs} && $self->{seen_oid}->{$cur_want}++) {
		die "Loop detected solving $cur_want\n";
	}
	$want->{try_ibxs} //= [ @{$self->{inboxes}} ]; # array copy
	my $existing = solve_existing($self, $want);
	if (ref $existing) {
		my ($found_git, undef, $type, undef) = @$existing;
		dbg($self, "found $cur_want in " .
			join(" ||\n\t",
				$found_git->pub_urls($self->{psgi_env})));

		if ($cur_want eq $self->{oid_want} || $type ne 'blob') {
			eval { done($self, $existing) };
			die "E: $@" if $@;
			return;
		}
		mark_found($self, $cur_want, $existing);
		return next_step($self); # onto patch application
	} elsif ($existing > 0) {
		return retry_current($self, $want);
	} else { # $existing == 0: we may retry if inbox scan (below) fails
		delete $want->{try_gits};
	}

	# scan through inboxes to look for emails which results in
	# the oid we want:
	my $ibx = shift(@{$want->{try_ibxs}}) or die 'BUG: {try_ibxs} empty';
	if (my $msgs = find_smsgs($self, $ibx, $want)) {
		$want->{try_smsgs} = $msgs;
		$want->{cur_ibx} = $ibx;
		$self->{tmp_diffs} = [];
		return retry_current($self, $want);
	}
	try_harder($self, $want);
}

# this API is designed to avoid creating self-referential structures;
# so user_cb never references the SolverGit object
sub new {
	my ($class, $ibx, $user_cb, $uarg) = @_;

	bless {
		gits => $ibx->{-repo_objs},
		user_cb => $user_cb,
		uarg => $uarg,
		# -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks

		# TODO: config option for searching related inboxes
		inboxes => [ $ibx ],
	}, $class;
}

# recreate $oid_want using $hints
# hints keys: path_a, path_b, oid_a (note: `oid_b' is NOT a hint)
# Calls {user_cb} with: [ ::Git object, oid_full, type, size, di (diff_info) ]
# with found object, or undef if nothing was found
# Calls {user_cb} with a string error on fatal errors
sub solve ($$$$$) {
	my ($self, $env, $out, $oid_want, $hints) = @_;

	# should we even get here? Probably not, but somebody
	# could be manually typing URLs:
	return done($self, undef) if $oid_want =~ /\A0+\z/;

	$self->{oid_want} = $oid_want;
	$self->{out} = $out;
	$self->{seen_oid} = {};
	$self->{tot} = 0;
	$self->{psgi_env} = $env;
	$self->{todo} = [ { %$hints, oid_b => $oid_want } ];
	$self->{patches} = []; # [ $di, $di, ... ]
	$self->{found} = {}; # { abbr => [ ::Git, oid, type, size, $di ] }
	$self->{tmp} = File::Temp->newdir("solver.$oid_want-XXXX", TMPDIR => 1);

	dbg($self, "solving $oid_want ...");
	if (my $async = $env->{'pi-httpd.async'}) {
		# PublicInbox::HTTPD::Async->new
		$async->(undef, undef, $self);
	} else {
		event_step($self) while $self->{user_cb};
	}
}

1;
public-inbox-1.9.0/lib/PublicInbox/Spamcheck.pm000066400000000000000000000011561430031475700213430ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors 
# License: AGPL-3.0+ 

# Spamchecking used by -watch and -mda tools
package PublicInbox::Spamcheck;
use strict;
use warnings;

sub get {
	my ($cfg, $key, $default) = @_;
	my $spamcheck = $cfg->{$key};
	$spamcheck = $default unless $spamcheck;

	return if !$spamcheck || $spamcheck eq 'none';

	if ($spamcheck eq 'spamc') {
		$spamcheck = 'PublicInbox::Spamcheck::Spamc';
	}
	if ($spamcheck =~ /::/) {
		eval "require $spamcheck";
		return $spamcheck->new;
	}
	warn "unsupported $key=$spamcheck\n";
	undef;
}

1;
public-inbox-1.9.0/lib/PublicInbox/Spamcheck/000077500000000000000000000000001430031475700210025ustar00rootroot00000000000000public-inbox-1.9.0/lib/PublicInbox/Spamcheck/Spamc.pm000066400000000000000000000034341430031475700224070ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors 
# License: AGPL-3.0+ 

# Default spam filter class for wrapping spamc(1)
package PublicInbox::Spamcheck::Spamc;
use strict;
use warnings;
use PublicInbox::Spawn qw(popen_rd spawn);
use IO::Handle;
use Fcntl qw(SEEK_SET);

sub new {
	my ($class) = @_;
	bless {
		checkcmd => [qw(spamc -E --headers)],
		hamcmd => [qw(spamc -L ham)],
		spamcmd => [qw(spamc -L spam)],
	}, $class;
}

sub spamcheck {
	my ($self, $msg, $out) = @_;

	my $rdr = { 0 => _msg_to_fh($self, $msg) };
	my ($fh, $pid) = popen_rd($self->{checkcmd}, undef, $rdr);
	unless (ref $out) {
		my $buf = '';
		$out = \$buf;
	}
	$$out = do { local $/; <$fh> };
	close $fh or die "close failed: $!";
	waitpid($pid, 0);
	($? || $$out eq '') ? 0 : 1;
}

sub hamlearn {
	my ($self, $msg, $rdr) = @_;
	_learn($self, $msg, $rdr, 'hamcmd');
}

sub spamlearn {
	my ($self, $msg, $rdr) = @_;
	_learn($self, $msg, $rdr, 'spamcmd');
}

sub _learn {
	my ($self, $msg, $rdr, $field) = @_;
	$rdr ||= {};
	$rdr->{0} = _msg_to_fh($self, $msg);
	$rdr->{1} ||= $self->_devnull;
	$rdr->{2} ||= $self->_devnull;
	my $pid = spawn($self->{$field}, undef, $rdr);
	waitpid($pid, 0);
	!$?;
}

sub _devnull {
	my ($self) = @_;
	$self->{-devnull} //= do {
		open my $fh, '+>', '/dev/null' or
				die "failed to open /dev/null: $!";
		$fh
	}
}

sub _msg_to_fh {
	my ($self, $msg) = @_;
	if (my $ref = ref($msg)) {
		my $fd = eval { fileno($msg) };
		return $msg if defined($fd) && $fd >= 0;

		open(my $tmpfh, '+>', undef) or die "failed to open: $!";
		$tmpfh->autoflush(1);
		$msg = \($msg->as_string) if $ref ne 'SCALAR';
		print $tmpfh $$msg or die "failed to print: $!";
		sysseek($tmpfh, 0, SEEK_SET) or
			die "sysseek(fh) failed: $!";

		return $tmpfh;
	}
	$msg;
}

1;
public-inbox-1.9.0/lib/PublicInbox/Spawn.pm000066400000000000000000000241051430031475700205340ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 
#
# This allows vfork to be used for spawning subprocesses if
# ~/.cache/public-inbox/inline-c is writable or if PERL_INLINE_DIRECTORY
# is explicitly defined in the environment (and writable).
# Under Linux, vfork can make a big difference in spawning performance
# as process size increases (fork still needs to mark pages for CoW use).
# Currently, we only use this for code intended for long running
# daemons (inside the PSGI code (-httpd) and -nntpd).  The short-lived
# scripts (-mda, -index, -learn, -init) either use IPC::run or standard
# Perl routines.
#
# There'll probably be more OS-level C stuff here, down the line.
# We don't want too many DSOs: https://udrepper.livejournal.com/8790.html

package PublicInbox::Spawn;
use v5.12;
use parent qw(Exporter);
use Symbol qw(gensym);
use Fcntl qw(LOCK_EX SEEK_SET);
use IO::Handle ();
use PublicInbox::ProcessPipe;
our @EXPORT_OK = qw(which spawn popen_rd run_die);
our @RLIMITS = qw(RLIMIT_CPU RLIMIT_CORE RLIMIT_DATA);

BEGIN {
	my $all_libc = <<'ALL_LIBC'; # all *nix systems we support
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

/* some platforms need alloca.h, but some don't */
#if defined(__GNUC__) && !defined(alloca)
#  define alloca(sz) __builtin_alloca(sz)
#endif

#include 
#include 

/*
 * From the av_len apidoc:
 *   Note that, unlike what the name implies, it returns
 *   the highest index in the array, so to get the size of
 *   the array you need to use "av_len(av) + 1".
 *   This is unlike "sv_len", which returns what you would expect.
 */
#define AV2C_COPY(dst, src) do { \
	I32 i; \
	I32 top_index = av_len(src); \
	I32 real_len = top_index + 1; \
	I32 capa = real_len + 1; \
	dst = alloca(capa * sizeof(char *)); \
	for (i = 0; i < real_len; i++) { \
		SV **sv = av_fetch(src, i, 0); \
		dst[i] = SvPV_nolen(*sv); \
	} \
	dst[real_len] = 0; \
} while (0)

/* needs to be safe inside a vfork'ed process */
static void exit_err(const char *fn, volatile int *cerrnum)
{
	*cerrnum = errno;
	write(2, fn, strlen(fn));
	_exit(1);
}

/*
 * unstable internal API.  It'll be updated depending on
 * whatever we'll need in the future.
 * Be sure to update PublicInbox::SpawnPP if this changes
 */
int pi_fork_exec(SV *redirref, SV *file, SV *cmdref, SV *envref, SV *rlimref,
		 const char *cd, int pgid)
{
	AV *redir = (AV *)SvRV(redirref);
	AV *cmd = (AV *)SvRV(cmdref);
	AV *env = (AV *)SvRV(envref);
	AV *rlim = (AV *)SvRV(rlimref);
	const char *filename = SvPV_nolen(file);
	pid_t pid;
	char **argv, **envp;
	sigset_t set, old;
	int ret, perrnum;
	volatile int cerrnum = 0; /* shared due to vfork */
	int chld_is_member;
	I32 max_fd = av_len(redir);

	AV2C_COPY(argv, cmd);
	AV2C_COPY(envp, env);

	if (sigfillset(&set)) return -1;
	if (sigprocmask(SIG_SETMASK, &set, &old)) return -1;
	chld_is_member = sigismember(&old, SIGCHLD);
	if (chld_is_member < 0) return -1;
	if (chld_is_member > 0)
		sigdelset(&old, SIGCHLD);

	pid = vfork();
	if (pid == 0) {
		int sig;
		I32 i, child_fd, max_rlim;

		for (child_fd = 0; child_fd <= max_fd; child_fd++) {
			SV **parent = av_fetch(redir, child_fd, 0);
			int parent_fd = SvIV(*parent);
			if (parent_fd == child_fd)
				continue;
			if (dup2(parent_fd, child_fd) < 0)
				exit_err("dup2", &cerrnum);
		}
		if (pgid >= 0 && setpgid(0, pgid) < 0)
			exit_err("setpgid", &cerrnum);
		for (sig = 1; sig < NSIG; sig++)
			signal(sig, SIG_DFL); /* ignore errors on signals */
		if (*cd && chdir(cd) < 0)
			exit_err("chdir", &cerrnum);

		max_rlim = av_len(rlim);
		for (i = 0; i < max_rlim; i += 3) {
			struct rlimit rl;
			SV **res = av_fetch(rlim, i, 0);
			SV **soft = av_fetch(rlim, i + 1, 0);
			SV **hard = av_fetch(rlim, i + 2, 0);

			rl.rlim_cur = SvIV(*soft);
			rl.rlim_max = SvIV(*hard);
			if (setrlimit(SvIV(*res), &rl) < 0)
				exit_err("setrlimit", &cerrnum);
		}

		(void)sigprocmask(SIG_SETMASK, &old, NULL);
		execve(filename, argv, envp);
		exit_err("execve", &cerrnum);
	}
	perrnum = errno;
	if (chld_is_member > 0)
		sigaddset(&old, SIGCHLD);
	ret = sigprocmask(SIG_SETMASK, &old, NULL);
	assert(ret == 0 && "BUG calling sigprocmask to restore");
	if (cerrnum) {
		int err_fd = STDERR_FILENO;
		if (err_fd <= max_fd) {
			SV **parent = av_fetch(redir, err_fd, 0);
			err_fd = SvIV(*parent);
		}
		if (pid > 0)
			waitpid(pid, NULL, 0);
		pid = -1;
		/* continue message started by exit_err in child */
		dprintf(err_fd, ": %s\n", strerror(cerrnum));
		errno = cerrnum;
	} else if (perrnum) {
		errno = perrnum;
	}
	return (int)pid;
}

static int sleep_wait(unsigned *tries, int err)
{
	const struct timespec req = { 0, 100000000 }; /* 100ms */
	switch (err) {
	case ENOBUFS: case ENOMEM: case ETOOMANYREFS:
		if (++*tries < 50) {
			fprintf(stderr, "sleeping on sendmsg: %s (#%u)\n",
				strerror(err), *tries);
			nanosleep(&req, NULL);
			return 1;
		}
	default:
		return 0;
	}
}

#if defined(CMSG_SPACE) && defined(CMSG_LEN)
#define SEND_FD_CAPA 10
#define SEND_FD_SPACE (SEND_FD_CAPA * sizeof(int))
union my_cmsg {
	struct cmsghdr hdr;
	char pad[sizeof(struct cmsghdr) + 16 + SEND_FD_SPACE];
};

SV *send_cmd4(PerlIO *s, SV *svfds, SV *data, int flags)
{
	struct msghdr msg = { 0 };
	union my_cmsg cmsg = { 0 };
	STRLEN dlen = 0;
	struct iovec iov;
	ssize_t sent;
	AV *fds = (AV *)SvRV(svfds);
	I32 i, nfds = av_len(fds) + 1;
	int *fdp;
	unsigned tries = 0;

	if (SvOK(data)) {
		iov.iov_base = SvPV(data, dlen);
		iov.iov_len = dlen;
	}
	if (!dlen) { /* must be non-zero */
		iov.iov_base = &msg.msg_namelen; /* whatever */
		iov.iov_len = 1;
	}
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;
	if (nfds) {
		if (nfds > SEND_FD_CAPA) {
			fprintf(stderr, "FIXME: bump SEND_FD_CAPA=%d\n", nfds);
			nfds = SEND_FD_CAPA;
		}
		msg.msg_control = &cmsg.hdr;
		msg.msg_controllen = CMSG_SPACE(nfds * sizeof(int));
		cmsg.hdr.cmsg_level = SOL_SOCKET;
		cmsg.hdr.cmsg_type = SCM_RIGHTS;
		cmsg.hdr.cmsg_len = CMSG_LEN(nfds * sizeof(int));
		fdp = (int *)CMSG_DATA(&cmsg.hdr);
		for (i = 0; i < nfds; i++) {
			SV **fd = av_fetch(fds, i, 0);
			*fdp++ = SvIV(*fd);
		}
	}
	do {
		sent = sendmsg(PerlIO_fileno(s), &msg, flags);
	} while (sent < 0 && sleep_wait(&tries, errno));
	return sent >= 0 ? newSViv(sent) : &PL_sv_undef;
}

void recv_cmd4(PerlIO *s, SV *buf, STRLEN n)
{
	union my_cmsg cmsg = { 0 };
	struct msghdr msg = { 0 };
	struct iovec iov;
	ssize_t i;
	Inline_Stack_Vars;
	Inline_Stack_Reset;

	if (!SvOK(buf))
		sv_setpvn(buf, "", 0);
	iov.iov_base = SvGROW(buf, n + 1);
	iov.iov_len = n;
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;
	msg.msg_control = &cmsg.hdr;
	msg.msg_controllen = CMSG_SPACE(SEND_FD_SPACE);

	i = recvmsg(PerlIO_fileno(s), &msg, 0);
	if (i < 0)
		Inline_Stack_Push(&PL_sv_undef);
	else
		SvCUR_set(buf, i);
	if (i > 0 && cmsg.hdr.cmsg_level == SOL_SOCKET &&
			cmsg.hdr.cmsg_type == SCM_RIGHTS) {
		size_t len = cmsg.hdr.cmsg_len;
		int *fdp = (int *)CMSG_DATA(&cmsg.hdr);
		for (i = 0; CMSG_LEN((i + 1) * sizeof(int)) <= len; i++)
			Inline_Stack_Push(sv_2mortal(newSViv(*fdp++)));
	}
	Inline_Stack_Done;
}
#endif /* defined(CMSG_SPACE) && defined(CMSG_LEN) */
ALL_LIBC

	my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= (
			$ENV{XDG_CACHE_HOME} //
			( ($ENV{HOME} // '/nonexistent').'/.cache' )
		).'/public-inbox/inline-c';
	warn "$inline_dir exists, not writable\n" if -e $inline_dir && !-w _;
	$all_libc = undef unless -d _ && -w _;
	if (defined $all_libc) {
		my $f = "$inline_dir/.public-inbox.lock";
		open my $oldout, '>&', \*STDOUT or die "dup(1): $!";
		open my $olderr, '>&', \*STDERR or die "dup(2): $!";
		open my $fh, '+>', $f or die "open($f): $!";
		open STDOUT, '>&', $fh or die "1>$f: $!";
		open STDERR, '>&', $fh or die "2>$f: $!";
		STDERR->autoflush(1);
		STDOUT->autoflush(1);

		# CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
		flock($fh, LOCK_EX) or die "LOCK_EX($f): $!";
		eval <<'EOM';
use Inline C => $all_libc, BUILD_NOISY => 1;
EOM
		my $err = $@;
		my $ndc_err = '';
		$err = $@;
		open(STDERR, '>&', $olderr) or warn "restore stderr: $!";
		open(STDOUT, '>&', $oldout) or warn "restore stdout: $!";
		if ($err) {
			seek($fh, 0, SEEK_SET);
			my @msg = <$fh>;
			warn "Inline::C build failed:\n",
				$ndc_err, $err, "\n", @msg;
			$all_libc = undef;
		}
	}
	unless ($all_libc) {
		require PublicInbox::SpawnPP;
		*pi_fork_exec = \&PublicInbox::SpawnPP::pi_fork_exec
	}
} # /BEGIN

sub which ($) {
	my ($file) = @_;
	return $file if index($file, '/') >= 0;
	for my $p (split(/:/, $ENV{PATH})) {
		$p .= "/$file";
		return $p if -x $p;
	}
	undef;
}

sub spawn ($;$$) {
	my ($cmd, $env, $opts) = @_;
	my $f = which($cmd->[0]) // die "$cmd->[0]: command not found\n";
	my @env;
	$opts ||= {};
	my %env = (%ENV, $env ? %$env : ());
	while (my ($k, $v) = each %env) {
		push @env, "$k=$v" if defined($v);
	}
	my $redir = [];
	for my $child_fd (0..2) {
		my $parent_fd = $opts->{$child_fd};
		if (defined($parent_fd) && $parent_fd !~ /\A[0-9]+\z/) {
			my $fd = fileno($parent_fd) //
					die "$parent_fd not an IO GLOB? $!";
			$parent_fd = $fd;
		}
		$redir->[$child_fd] = $parent_fd // $child_fd;
	}
	my $rlim = [];

	foreach my $l (@RLIMITS) {
		my $v = $opts->{$l} // next;
		my $r = eval "require BSD::Resource; BSD::Resource::$l();";
		unless (defined $r) {
			warn "$l undefined by BSD::Resource: $@\n";
			next;
		}
		push @$rlim, $r, @$v;
	}
	my $cd = $opts->{'-C'} // ''; # undef => NULL mapping doesn't work?
	my $pgid = $opts->{pgid} // -1;
	my $pid = pi_fork_exec($redir, $f, $cmd, \@env, $rlim, $cd, $pgid);
	die "fork_exec @$cmd failed: $!\n" unless $pid > 0;
	$pid;
}

sub popen_rd {
	my ($cmd, $env, $opt) = @_;
	pipe(my ($r, $w)) or die "pipe: $!\n";
	$opt ||= {};
	$opt->{1} = fileno($w);
	my $pid = spawn($cmd, $env, $opt);
	return ($r, $pid) if wantarray;
	my $ret = gensym;
	tie *$ret, 'PublicInbox::ProcessPipe', $pid, $r, @$opt{qw(cb arg)};
	$ret;
}

sub run_die ($;$$) {
	my ($cmd, $env, $rdr) = @_;
	my $pid = spawn($cmd, $env, $rdr);
	waitpid($pid, 0) == $pid or die "@$cmd did not finish";
	$? == 0 or die "@$cmd failed: \$?=$?\n";
}

1;
public-inbox-1.9.0/lib/PublicInbox/SpawnPP.pm000066400000000000000000000040311430031475700207700ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors 
# License: AGPL-3.0+ 

# Pure-Perl implementation of "spawn".  This can't take advantage
# of vfork, so no speedups under Linux for spawning from large processes.
package PublicInbox::SpawnPP;
use strict;
use v5.10.1;
use POSIX qw(dup2 _exit setpgid :signal_h);

# Pure Perl implementation for folks that do not use Inline::C
sub pi_fork_exec ($$$$$$$) {
	my ($redir, $f, $cmd, $env, $rlim, $cd, $pgid) = @_;
	my $old = POSIX::SigSet->new();
	my $set = POSIX::SigSet->new();
	$set->fillset or die "fillset failed: $!";
	sigprocmask(SIG_SETMASK, $set, $old) or die "can't block signals: $!";
	my $syserr;
	pipe(my ($r, $w));
	my $pid = fork;
	unless (defined $pid) { # compat with Inline::C version
		$syserr = $!;
		$pid = -1;
	}
	if ($pid == 0) {
		close $r;
		$SIG{__DIE__} = sub {
			warn(@_);
			syswrite($w, my $num = $! + 0);
			_exit(1);
		};
		for my $child_fd (0..$#$redir) {
			my $parent_fd = $redir->[$child_fd];
			next if $parent_fd == $child_fd;
			dup2($parent_fd, $child_fd) or
				die "dup2($parent_fd, $child_fd): $!";
		}
		if ($pgid >= 0 && !defined(setpgid(0, $pgid))) {
			die "setpgid(0, $pgid): $!";
		}
		for (keys %SIG) {
			$SIG{$_} = 'DEFAULT' if substr($_, 0, 1) ne '_';
		}
		if ($cd ne '') {
			chdir $cd or die "chdir $cd: $!";
		}
		while (@$rlim) {
			my ($r, $soft, $hard) = splice(@$rlim, 0, 3);
			BSD::Resource::setrlimit($r, $soft, $hard) or
				die "setrlimit($r=[$soft,$hard]: $!)";
		}
		$old->delset(POSIX::SIGCHLD) or die "delset SIGCHLD: $!";
		sigprocmask(SIG_SETMASK, $old) or die "SETMASK: ~SIGCHLD: $!";
		$cmd->[0] = $f;
		if ($ENV{MOD_PERL}) {
			@$cmd = (which('env'), '-i', @$env, @$cmd);
		} else {
			%ENV = map { split(/=/, $_, 2) } @$env;
		}
		undef $r;
		exec { $f } @$cmd;
		die "exec @$cmd failed: $!";
	}
	close $w;
	sigprocmask(SIG_SETMASK, $old) or die "can't unblock signals: $!";
	if (my $cerrnum = do { local $/, <$r> }) {
		$pid = -1;
		$! = $cerrnum;
	} else {
		$! = $syserr;
	}
	$pid;
}

1;
public-inbox-1.9.0/lib/PublicInbox/Syscall.pm000066400000000000000000000364461430031475700210710ustar00rootroot00000000000000# This is a fork of the (for now) unmaintained Sys::Syscall 0.25,
# specifically the Debian libsys-syscall-perl 0.25-6 version to
# fix upstream regressions in 0.25.
#
# See devel/syscall-list in the public-inbox source tree for maintenance
# , and machines from the GCC Farm:
# 
#
# This license differs from the rest of public-inbox
#
# This module is Copyright (c) 2005 Six Apart, Ltd.
# Copyright (C) all contributors 
#
# All rights reserved.
#
# You may distribute under the terms of either the GNU General Public
# License or the Artistic License, as specified in the Perl README file.
package PublicInbox::Syscall;
use v5.12;
use parent qw(Exporter);
use POSIX qw(ENOENT ENOSYS EINVAL O_NONBLOCK);
use Socket qw(SOL_SOCKET SCM_RIGHTS);
use Config;

# $VERSION = '0.25'; # Sys::Syscall version
our @EXPORT_OK = qw(epoll_ctl epoll_create epoll_wait
                  EPOLLIN EPOLLOUT EPOLLET
                  EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
                  EPOLLONESHOT EPOLLEXCLUSIVE
                  signalfd rename_noreplace);
our %EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait
                             EPOLLIN EPOLLOUT
                             EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
                             EPOLLONESHOT EPOLLEXCLUSIVE)],
                );

use constant {
	EPOLLIN => 1,
	EPOLLOUT => 4,
	# EPOLLERR => 8,
	# EPOLLHUP => 16,
	# EPOLLRDBAND => 128,
	EPOLLEXCLUSIVE => (1 << 28),
	EPOLLONESHOT => (1 << 30),
	EPOLLET => (1 << 31),
	EPOLL_CTL_ADD => 1,
	EPOLL_CTL_DEL => 2,
	EPOLL_CTL_MOD => 3,
	SIZEOF_int => $Config{intsize},
	SIZEOF_size_t => $Config{sizesize},
	NUL => "\0",
};

use constant {
	TMPL_size_t => SIZEOF_size_t == 8 ? 'Q' : 'L',
	BYTES_4_hole => SIZEOF_size_t == 8 ? 'L' : '',
	# cmsg_len, cmsg_level, cmsg_type
	SIZEOF_cmsghdr => SIZEOF_int * 2 + SIZEOF_size_t,
};

my @BYTES_4_hole = BYTES_4_hole ? (0) : ();
our $loaded_syscall = 0;

sub _load_syscall {
    # props to Gaal for this!
    return if $loaded_syscall++;
    my $clean = sub {
        delete @INC{qw};
    };
    $clean->(); # don't trust modules before us
    my $rv = eval { require 'syscall.ph'; 1 } || eval { require 'sys/syscall.ph'; 1 };
    $clean->(); # don't require modules after us trust us
    $rv;
}


our (
     $SYS_epoll_create,
     $SYS_epoll_ctl,
     $SYS_epoll_wait,
     $SYS_signalfd4,
     $SYS_renameat2,
     );

my ($SYS_sendmsg, $SYS_recvmsg);
my $SYS_fstatfs; # don't need fstatfs64, just statfs.f_type
my ($FS_IOC_GETFLAGS, $FS_IOC_SETFLAGS);
my $SFD_CLOEXEC = 02000000; # Perl does not expose O_CLOEXEC
our $no_deprecated = 0;

if ($^O eq "linux") {
    my (undef, undef, $release, undef, $machine) = POSIX::uname();
    my ($maj, $min) = ($release =~ /\A([0-9]+)\.([0-9]+)/);
    $SYS_renameat2 = 0 if "$maj.$min" < 3.15;
    # whether the machine requires 64-bit numbers to be on 8-byte
    # boundaries.
    my $u64_mod_8 = 0;

    # if we're running on an x86_64 kernel, but a 32-bit process,
    # we need to use the x32 or i386 syscall numbers.
    if ($machine eq "x86_64" && $Config{ptrsize} == 4) {
        $machine = $Config{cppsymbols} =~ /\b__ILP32__=1\b/ ? 'x32' : 'i386';
    }

    # Similarly for mips64 vs mips
    if ($machine eq "mips64" && $Config{ptrsize} == 4) {
        $machine = "mips";
    }

    if ($machine =~ m/^i[3456]86$/) {
        $SYS_epoll_create = 254;
        $SYS_epoll_ctl    = 255;
        $SYS_epoll_wait   = 256;
        $SYS_signalfd4 = 327;
        $SYS_renameat2 //= 353;
	$SYS_fstatfs = 100;
	$SYS_sendmsg = 370;
	$SYS_recvmsg = 372;
	$FS_IOC_GETFLAGS = 0x80046601;
	$FS_IOC_SETFLAGS = 0x40046602;
    } elsif ($machine eq "x86_64") {
        $SYS_epoll_create = 213;
        $SYS_epoll_ctl    = 233;
        $SYS_epoll_wait   = 232;
        $SYS_signalfd4 = 289;
	$SYS_renameat2 //= 316;
	$SYS_fstatfs = 138;
	$SYS_sendmsg = 46;
	$SYS_recvmsg = 47;
	$FS_IOC_GETFLAGS = 0x80086601;
	$FS_IOC_SETFLAGS = 0x40086602;
    } elsif ($machine eq 'x32') {
        $SYS_epoll_create = 1073742037;
        $SYS_epoll_ctl = 1073742057;
        $SYS_epoll_wait = 1073742056;
        $SYS_signalfd4 = 1073742113;
	$SYS_renameat2 //= 0x40000000 + 316;
	$SYS_fstatfs = 138;
	$SYS_sendmsg = 0x40000206;
	$SYS_recvmsg = 0x40000207;
	$FS_IOC_GETFLAGS = 0x80046601;
	$FS_IOC_SETFLAGS = 0x40046602;
    } elsif ($machine eq 'sparc64') {
	$SYS_epoll_create = 193;
	$SYS_epoll_ctl = 194;
	$SYS_epoll_wait = 195;
	$u64_mod_8 = 1;
	$SYS_signalfd4 = 317;
	$SYS_renameat2 //= 345;
	$SFD_CLOEXEC = 020000000;
	$SYS_fstatfs = 158;
	$SYS_sendmsg = 114;
	$SYS_recvmsg = 113;
	$FS_IOC_GETFLAGS = 0x40086601;
	$FS_IOC_SETFLAGS = 0x80086602;
    } elsif ($machine =~ m/^parisc/) {
        $SYS_epoll_create = 224;
        $SYS_epoll_ctl    = 225;
        $SYS_epoll_wait   = 226;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 309;
    } elsif ($machine =~ m/^ppc64/) {
        $SYS_epoll_create = 236;
        $SYS_epoll_ctl    = 237;
        $SYS_epoll_wait   = 238;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 313;
	$SYS_renameat2 //= 357;
	$SYS_fstatfs = 100;
	$SYS_sendmsg = 341;
	$SYS_recvmsg = 342;
	$FS_IOC_GETFLAGS = 0x40086601;
	$FS_IOC_SETFLAGS = 0x80086602;
    } elsif ($machine eq "ppc") {
        $SYS_epoll_create = 236;
        $SYS_epoll_ctl    = 237;
        $SYS_epoll_wait   = 238;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 313;
	$SYS_renameat2 //= 357;
	$SYS_fstatfs = 100;
	$FS_IOC_GETFLAGS = 0x40086601;
	$FS_IOC_SETFLAGS = 0x80086602;
    } elsif ($machine =~ m/^s390/) { # untested, no machine on cfarm
        $SYS_epoll_create = 249;
        $SYS_epoll_ctl    = 250;
        $SYS_epoll_wait   = 251;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 322;
	$SYS_renameat2 //= 347;
	$SYS_fstatfs = 100;
	$SYS_sendmsg = 370;
	$SYS_recvmsg = 372;
    } elsif ($machine eq 'ia64') { # untested, no machine on cfarm
        $SYS_epoll_create = 1243;
        $SYS_epoll_ctl    = 1244;
        $SYS_epoll_wait   = 1245;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 289;
    } elsif ($machine eq "alpha") { # untested, no machine on cfarm
        # natural alignment, ints are 32-bits
        $SYS_epoll_create = 407;
        $SYS_epoll_ctl    = 408;
        $SYS_epoll_wait   = 409;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 484;
	$SFD_CLOEXEC = 010000000;
    } elsif ($machine =~ /\A(?:loong)?aarch64\z/ || $machine eq 'riscv64') {
        $SYS_epoll_create = 20;  # (sys_epoll_create1)
        $SYS_epoll_ctl    = 21;
        $SYS_epoll_wait   = 22;  # (sys_epoll_pwait)
        $u64_mod_8        = 1;
        $no_deprecated    = 1;
        $SYS_signalfd4 = 74;
	$SYS_renameat2 //= 276;
	$SYS_fstatfs = 44;
	$SYS_sendmsg = 211;
	$SYS_recvmsg = 212;
	$FS_IOC_GETFLAGS = 0x80086601;
	$FS_IOC_SETFLAGS = 0x40086602;
    } elsif ($machine =~ m/arm(v\d+)?.*l/) { # ARM OABI (untested on cfarm)
        $SYS_epoll_create = 250;
        $SYS_epoll_ctl    = 251;
        $SYS_epoll_wait   = 252;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 355;
	$SYS_renameat2 //= 382;
	$SYS_fstatfs = 100;
	$SYS_sendmsg = 296;
	$SYS_recvmsg = 297;
    } elsif ($machine =~ m/^mips64/) { # cfarm only has 32-bit userspace
        $SYS_epoll_create = 5207;
        $SYS_epoll_ctl    = 5208;
        $SYS_epoll_wait   = 5209;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 5283;
	$SYS_renameat2 //= 5311;
	$SYS_fstatfs = 5135;
	$SYS_sendmsg = 5045;
	$SYS_recvmsg = 5046;
	$FS_IOC_GETFLAGS = 0x40046601;
	$FS_IOC_SETFLAGS = 0x80046602;
    } elsif ($machine =~ m/^mips/) { # 32-bit, tested on mips64 cfarm machine
        $SYS_epoll_create = 4248;
        $SYS_epoll_ctl    = 4249;
        $SYS_epoll_wait   = 4250;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 4324;
	$SYS_renameat2 //= 4351;
	$SYS_fstatfs = 4100;
	$SYS_sendmsg = 4179;
	$SYS_recvmsg = 4177;
	$FS_IOC_GETFLAGS = 0x40046601;
	$FS_IOC_SETFLAGS = 0x80046602;
    } else {
        # as a last resort, try using the *.ph files which may not
        # exist or may be wrong
        _load_syscall();
        $SYS_epoll_create = eval { &SYS_epoll_create; } || 0;
        $SYS_epoll_ctl    = eval { &SYS_epoll_ctl;    } || 0;
        $SYS_epoll_wait   = eval { &SYS_epoll_wait;   } || 0;

	# Note: do NOT add new syscalls to depend on *.ph, here.
	# Better to miss syscalls (so we can fallback to IO::Poll)
	# than to use wrong ones, since the names are not stable
	# (at least not on FreeBSD), if the actual numbers are.
    }

    if ($u64_mod_8) {
        *epoll_wait = \&epoll_wait_mod8;
        *epoll_ctl = \&epoll_ctl_mod8;
    } else {
        *epoll_wait = \&epoll_wait_mod4;
        *epoll_ctl = \&epoll_ctl_mod4;
    }
}
# use Inline::C for *BSD-only or general POSIX stuff.
# Linux guarantees stable syscall numbering, BSDs only offer a stable libc
# use scripts/syscall-list on Linux to detect new syscall numbers

############################################################################
# epoll functions
############################################################################

sub epoll_defined { $SYS_epoll_create ? 1 : 0; }

sub epoll_create {
	syscall($SYS_epoll_create, $no_deprecated ? 0 : 100);
}

# epoll_ctl wrapper
# ARGS: (epfd, op, fd, events_mask)
sub epoll_ctl_mod4 {
    syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLL", $_[3], $_[2], 0));
}
sub epoll_ctl_mod8 {
    syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLLL", $_[3], 0, $_[2], 0));
}

# epoll_wait wrapper
# ARGS: (epfd, maxevents, timeout (milliseconds), arrayref)
#  arrayref: values modified to be [$fd, $event]
our $epoll_wait_events = '';
our $epoll_wait_size = 0;
sub epoll_wait_mod4 {
	my ($epfd, $maxevents, $timeout_msec, $events) = @_;
	# resize our static buffer if maxevents bigger than we've ever done
	if ($maxevents > $epoll_wait_size) {
		$epoll_wait_size = $maxevents;
		vec($epoll_wait_events, $maxevents * 12 * 8 - 1, 1) = 0;
	}
	@$events = ();
	my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
			$maxevents, $timeout_msec);
	for (0..$ct - 1) {
		# 12-byte struct epoll_event
		# 4 bytes uint32_t events mask (skipped, useless to us)
		# 8 bytes: epoll_data_t union (first 4 bytes are the fd)
		# So we skip the first 4 bytes and take the middle 4:
		$events->[$_] = unpack('L', substr($epoll_wait_events,
							12 * $_ + 4, 4));
	}
}

sub epoll_wait_mod8 {
	my ($epfd, $maxevents, $timeout_msec, $events) = @_;

	# resize our static buffer if maxevents bigger than we've ever done
	if ($maxevents > $epoll_wait_size) {
		$epoll_wait_size = $maxevents;
		vec($epoll_wait_events, $maxevents * 16 * 8 - 1, 1) = 0;
	}
	@$events = ();
	my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events,
			$maxevents, $timeout_msec,
			$no_deprecated ? undef : ());
	for (0..$ct - 1) {
		# 16-byte struct epoll_event
		# 4 bytes uint32_t events mask (skipped, useless to us)
		# 4 bytes padding (skipped, useless)
		# 8 bytes epoll_data_t union (first 4 bytes are the fd)
		# So skip the first 8 bytes, take 4, and ignore the last 4:
		$events->[$_] = unpack('L', substr($epoll_wait_events,
							16 * $_ + 8, 4));
	}
}

sub signalfd ($$) {
	my ($signos, $nonblock) = @_;
	if ($SYS_signalfd4) {
		my $set = POSIX::SigSet->new(@$signos);
		syscall($SYS_signalfd4, -1, "$$set",
			# $Config{sig_count} is NSIG, so this is NSIG/8:
			int($Config{sig_count}/8),
			# SFD_NONBLOCK == O_NONBLOCK for every architecture
			($nonblock ? O_NONBLOCK : 0) |$SFD_CLOEXEC);
	} else {
		$! = ENOSYS;
		undef;
	}
}

sub _rename_noreplace_racy ($$) {
	my ($old, $new) = @_;
	if (link($old, $new)) {
		warn "unlink $old: $!\n" if !unlink($old) && $! != ENOENT;
		1
	} else {
		undef;
	}
}

# TODO: support FD args?
sub rename_noreplace ($$) {
	my ($old, $new) = @_;
	if ($SYS_renameat2) { # RENAME_NOREPLACE = 1, AT_FDCWD = -100
		my $ret = syscall($SYS_renameat2, -100, $old, -100, $new, 1);
		if ($ret == 0) {
			1; # like rename() perlop
		} elsif ($! == ENOSYS || $! == EINVAL) {
			undef $SYS_renameat2;
			_rename_noreplace_racy($old, $new);
		} else {
			undef
		}
	} else {
		_rename_noreplace_racy($old, $new);
	}
}

sub nodatacow_fh ($) {
	my ($fh) = @_;
	my $buf = "\0" x 120;
	syscall($SYS_fstatfs // return, fileno($fh), $buf) == 0 or
		return warn("fstatfs: $!\n");
	my $f_type = unpack('l!', $buf); # statfs.f_type is a signed word
	return if $f_type != 0x9123683E; # BTRFS_SUPER_MAGIC

	$FS_IOC_GETFLAGS //
		return warn('FS_IOC_GETFLAGS undefined for platform');
	ioctl($fh, $FS_IOC_GETFLAGS, $buf) //
		return warn("FS_IOC_GETFLAGS: $!\n");
	my $attr = unpack('l!', $buf);
	return if ($attr & 0x00800000); # FS_NOCOW_FL;
	ioctl($fh, $FS_IOC_SETFLAGS, pack('l', $attr | 0x00800000)) //
		warn("FS_IOC_SETFLAGS: $!\n");
}

sub nodatacow_dir {
	if (open my $fh, '<', $_[0]) { nodatacow_fh($fh) }
}

sub CMSG_ALIGN ($) { ($_[0] + SIZEOF_size_t - 1) & ~(SIZEOF_size_t - 1) }
use constant CMSG_ALIGN_SIZEOF_cmsghdr => CMSG_ALIGN(SIZEOF_cmsghdr);
sub CMSG_SPACE ($) { CMSG_ALIGN($_[0]) + CMSG_ALIGN_SIZEOF_cmsghdr }
sub CMSG_LEN ($) { CMSG_ALIGN_SIZEOF_cmsghdr + $_[0] }
use constant msg_controllen => CMSG_SPACE(10 * SIZEOF_int) + 16; # 10 FDs

if (defined($SYS_sendmsg) && defined($SYS_recvmsg)) {
no warnings 'once';
*send_cmd4 = sub ($$$$) {
	my ($sock, $fds, undef, $flags) = @_;
	my $iov = pack('P'.TMPL_size_t,
			$_[2] // NUL, length($_[2] // NUL) || 1);
	my $cmsghdr = pack(TMPL_size_t . # cmsg_len
			'LL' .  # cmsg_level, cmsg_type,
			('i' x scalar(@$fds)),
			CMSG_LEN(scalar(@$fds) * SIZEOF_int), # cmsg_len
			SOL_SOCKET, SCM_RIGHTS, # cmsg_{level,type}
			@$fds); # CMSG_DATA
	my $mh = pack('PL' . # msg_name, msg_namelen (socklen_t (U32))
			BYTES_4_hole . # 4-byte padding on 64-bit
			'P'.TMPL_size_t . # msg_iov, msg_iovlen,
			'P'.TMPL_size_t . # msg_control, msg_controllen,
			'i', # msg_flags
			NUL, 0, # msg_name, msg_namelen (unused)
			@BYTES_4_hole,
			$iov, 1, # msg_iov, msg_iovlen
			$cmsghdr, # msg_control
			CMSG_SPACE(scalar(@$fds) * SIZEOF_int), # msg_controllen
			0); # msg_flags
	my $sent;
	my $try = 0;
	do {
		$sent = syscall($SYS_sendmsg, fileno($sock), $mh, $flags);
	} while ($sent < 0 &&
			($!{ENOBUFS} || $!{ENOMEM} || $!{ETOOMANYREFS}) &&
			(++$try < 50) &&
			warn "sleeping on sendmsg: $! (#$try)\n" &&
			select(undef, undef, undef, 0.1) == 0);
	$sent >= 0 ? $sent : undef;
};

*recv_cmd4 = sub ($$$) {
	my ($sock, undef, $len) = @_;
	vec($_[1], ($len + 1) * 8, 1) = 0;
	my $cmsghdr = "\0" x msg_controllen; # 10 * sizeof(int)
	my $iov = pack('P'.TMPL_size_t, $_[1], $len);
	my $mh = pack('PL' . # msg_name, msg_namelen (socklen_t (U32))
			BYTES_4_hole . # 4-byte padding on 64-bit
			'P'.TMPL_size_t . # msg_iov, msg_iovlen,
			'P'.TMPL_size_t . # msg_control, msg_controllen,
			'i', # msg_flags
			NUL, 0, # msg_name, msg_namelen (unused)
			@BYTES_4_hole,
			$iov, 1, # msg_iov, msg_iovlen
			$cmsghdr, # msg_control
			msg_controllen,
			0); # msg_flags
	my $r = syscall($SYS_recvmsg, fileno($sock), $mh, 0);
	return (undef) if $r < 0; # $! set
	substr($_[1], $r, length($_[1]), '');
	my @ret;
	if ($r > 0) {
		my ($len, $lvl, $type, @fds) = unpack(TMPL_size_t . # cmsg_len
					'LLi*', # cmsg_level, cmsg_type, @fds
					$cmsghdr);
		if ($lvl == SOL_SOCKET && $type == SCM_RIGHTS) {
			$len -= CMSG_ALIGN_SIZEOF_cmsghdr;
			@ret = @fds[0..(($len / SIZEOF_int) - 1)];
		}
	}
	@ret;
};
}

1;

=head1 WARRANTY

This is free software. IT COMES WITHOUT WARRANTY OF ANY KIND.

=head1 AUTHORS

Brad Fitzpatrick 
public-inbox-1.9.0/lib/PublicInbox/TLS.pm000066400000000000000000000024411430031475700201050ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# IO::Socket::SSL support code
package PublicInbox::TLS;
use strict;
use IO::Socket::SSL;
use PublicInbox::Syscall qw(EPOLLIN EPOLLOUT);
use Carp qw(carp croak);

sub err () { $SSL_ERROR }

# returns the EPOLL event bit which matches the existing SSL error
sub epollbit () {
	return EPOLLIN if $SSL_ERROR == SSL_WANT_READ;
	return EPOLLOUT if $SSL_ERROR == SSL_WANT_WRITE;
	carp "unexpected SSL error: $SSL_ERROR";
	undef;
}

sub _ctx_new ($) {
	my ($tlsd) = @_;
	my $ctx = IO::Socket::SSL::SSL_Context->new(
				@{$tlsd->{ssl_ctx_opt}}, SSL_server => 1) or
		croak "SSL_Context->new: $SSL_ERROR";

	# save ~34K per idle connection (cf. SSL_CTX_set_mode(3ssl))
	# RSS goes from 346MB to 171MB with 10K idle NNTPS clients on amd64
	# cf. https://rt.cpan.org/Ticket/Display.html?id=129463
	my $mode = eval { Net::SSLeay::MODE_RELEASE_BUFFERS() };
	if ($mode && $ctx->{context}) {
		eval { Net::SSLeay::CTX_set_mode($ctx->{context}, $mode) };
		warn "W: $@ (setting SSL_MODE_RELEASE_BUFFERS)\n" if $@;
	}
	$ctx;
}

sub start {
	my ($io, $tlsd) = @_;
	IO::Socket::SSL->start_SSL($io, SSL_server => 1,
		SSL_reuse_ctx => ($tlsd->{ssl_ctx} //= _ctx_new($tlsd)),
		SSL_startHandshake => 0);
}

1;
public-inbox-1.9.0/lib/PublicInbox/TestCommon.pm000066400000000000000000000516511430031475700215420ustar00rootroot00000000000000# Copyright (C) all contributors 
# License: AGPL-3.0+ 

# internal APIs used only for tests
package PublicInbox::TestCommon;
use strict;
use parent qw(Exporter);
use v5.10.1;
use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD :seek);
use POSIX qw(dup2);
use IO::Socket::INET;
use File::Spec;
our @EXPORT;
my $lei_loud = $ENV{TEST_LEI_ERR_LOUD};
my $tail_cmd = $ENV{TAIL};
our ($lei_opt, $lei_out, $lei_err, $lei_cwdfh);
BEGIN {
	@EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
		run_script start_script key2sub xsys xsys_e xqx eml_load tick
		have_xapian_compact json_utf8 setup_public_inboxes create_inbox
		tcp_host_port test_lei lei lei_ok $lei_out $lei_err $lei_opt
		test_httpd xbail require_cmd is_xdeeply tail_f
		ignore_inline_c_missing);
	require Test::More;
	my @methods = grep(!/\W/, @Test::More::EXPORT);
	eval(join('', map { "*$_=\\&Test::More::$_;" } @methods));
	die $@ if $@;
	push @EXPORT, @methods;
}

sub xbail (@) { BAIL_OUT join(' ', map { ref() ? (explain($_)) : ($_) } @_) }

sub eml_load ($) {
	my ($path, $cb) = @_;
	open(my $fh, '<', $path) or die "open $path: $!";
	require PublicInbox::Eml;
	PublicInbox::Eml->new(\(do { local $/; <$fh> }));
}

sub tmpdir (;$) {
	my ($base) = @_;
	require File::Temp;
	unless (defined $base) {
		($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
	}
	my $tmpdir = File::Temp->newdir("pi-$base-$$-XXXX", TMPDIR => 1);
	($tmpdir->dirname, $tmpdir);
}

sub tcp_server () {
	my %opt = (
		ReuseAddr => 1,
		Proto => 'tcp',
		Type => Socket::SOCK_STREAM(),
		Listen => 1024,
		Blocking => 0,
	);
	eval {
		die 'IPv4-only' if $ENV{TEST_IPV4_ONLY};
		require IO::Socket::INET6;
		IO::Socket::INET6->new(%opt, LocalAddr => '[::1]')
	} || eval {
		die 'IPv6-only' if $ENV{TEST_IPV6_ONLY};
		IO::Socket::INET->new(%opt, LocalAddr => '127.0.0.1')
	} || BAIL_OUT "failed to create TCP server: $! ($@)";
}

sub tcp_host_port ($) {
	my ($s) = @_;
	my ($h, $p) = ($s->sockhost, $s->sockport);
	my $ipv4 = $s->sockdomain == Socket::AF_INET();
	if (wantarray) {
		$ipv4 ? ($h, $p) : ("[$h]", $p);
	} else {
		$ipv4 ? "$h:$p" : "[$h]:$p";
	}
}

sub tcp_connect {
	my ($dest, %opt) = @_;
	my $addr = tcp_host_port($dest);
	my $s = ref($dest)->new(
		Proto => 'tcp',
		Type => Socket::SOCK_STREAM(),
		PeerAddr => $addr,
		%opt,
	) or BAIL_OUT "failed to connect to $addr: $!";
	$s->autoflush(1);
	$s;
}

sub require_cmd ($;$) {
	my ($cmd, $maybe) = @_;
	require PublicInbox::Spawn;
	my $bin = PublicInbox::Spawn::which($cmd);
	return $bin if $bin;
	$maybe ? 0 : plan(skip_all => "$cmd missing from PATH for $0");
}

sub have_xapian_compact () {
	require_cmd($ENV{XAPIAN_COMPACT} || 'xapian-compact', 1);
}

sub require_git ($;$) {
	my ($req, $maybe) = @_;
	my ($req_maj, $req_min, $req_sub) = split(/\./, $req);
	my ($cur_maj, $cur_min, $cur_sub) = (xqx([qw(git --version)])
			=~ /version (\d+)\.(\d+)(?:\.(\d+))?/);

	my $req_int = ($req_maj << 24) | ($req_min << 16) | ($req_sub // 0);
	my $cur_int = ($cur_maj << 24) | ($cur_min << 16) | ($cur_sub // 0);
	if ($cur_int < $req_int) {
		return 0 if $maybe;
		plan skip_all =>
			"git $req+ required, have $cur_maj.$cur_min.$cur_sub";
	}
	1;
}

sub require_mods {
	my @mods = @_;
	my $maybe = pop @mods if $mods[-1] =~ /\A[0-9]+\z/;
	my @need;
	while (my $mod = shift(@mods)) {
		if ($mod eq 'lei') {
			require_git(2.6, $maybe ? $maybe : ());
			push @mods, qw(DBD::SQLite Search::Xapian);
			$mod = 'json'; # fall-through
		}
		if ($mod eq 'json') {
			$mod = 'Cpanel::JSON::XS||JSON::MaybeXS||JSON||JSON::PP'
		} elsif ($mod eq '-httpd') {
			push @mods, qw(Plack::Builder Plack::Util);
			next;
		} elsif ($mod eq '-imapd') {
			push @mods, qw(Parse::RecDescent DBD::SQLite
					Email::Address::XS||Mail::Address);
			next;
		} elsif ($mod eq '-nntpd' || $mod eq 'v2') {
			push @mods, qw(DBD::SQLite);
			next;
		}
		if ($mod eq 'Search::Xapian') {
			if (eval { require PublicInbox::Search } &&
				PublicInbox::Search::load_xapian()) {
				next;
			}
		} elsif (index($mod, '||') >= 0) { # "Foo||Bar"
			my $ok;
			for my $m (split(/\Q||\E/, $mod)) {
				eval "require $m";
				next if $@;
				$ok = $m;
				last;
			}
			next if $ok;
		} else {
			eval "require $mod";
		}
		if ($@) {
			diag "require $mod: $@" if $mod =~ /Gcf2/;
			push @need, $mod;
		} elsif ($mod eq 'IO::Socket::SSL' &&
			# old versions of IO::Socket::SSL aren't supported
			# by libnet, at least:
			# https://rt.cpan.org/Ticket/Display.html?id=100529
				!eval{ IO::Socket::SSL->VERSION(2.007); 1 }) {
			push @need, $@;
		}
	}
	return unless @need;
	my $m = join(', ', @need)." missing for $0";
	skip($m, $maybe) if $maybe;
	plan(skip_all => $m)
}

sub key2script ($) {
	my ($key) = @_;
	return $key if ($key eq 'git' || index($key, '/') >= 0);
	# n.b. we may have scripts which don't start with "public-inbox" in
	# the future:
	$key =~ s/\A([-\.])/public-inbox$1/;
	'blib/script/'.$key;
}

my @io_mode = ([ *STDIN{IO}, '+<&' ], [ *STDOUT{IO}, '+>&' ],
		[ *STDERR{IO}, '+>&' ]);

sub _prepare_redirects ($) {
	my ($fhref) = @_;
	my $orig_io = [];
	for (my $fd = 0; $fd <= $#io_mode; $fd++) {
		my $fh = $fhref->[$fd] or next;
		my ($oldfh, $mode) = @{$io_mode[$fd]};
		open my $orig, $mode, $oldfh or die "$oldfh $mode stash: $!";
		$orig_io->[$fd] = $orig;
		open $oldfh, $mode, $fh or die "$oldfh $mode redirect: $!";
	}
	$orig_io;
}

sub _undo_redirects ($) {
	my ($orig_io) = @_;
	for (my $fd = 0; $fd <= $#io_mode; $fd++) {
		my $fh = $orig_io->[$fd] or next;
		my ($oldfh, $mode) = @{$io_mode[$fd]};
		open $oldfh, $mode, $fh or die "$$oldfh $mode redirect: $!";
	}
}

# $opt->{run_mode} (or $ENV{TEST_RUN_MODE}) allows choosing between
# three ways to spawn our own short-lived Perl scripts for testing:
#
# 0 - (fork|vfork) + execve, the most realistic but slowest
# 1 - (not currently implemented)
# 2 - preloading and running in current process (slightly faster than 1)
#
# 2 is not compatible with scripts which use "exit" (which we'll try to
# avoid in the future).
# The default is 2.
our $run_script_exit_code;
sub RUN_SCRIPT_EXIT () { "RUN_SCRIPT_EXIT\n" };
sub run_script_exit {
	$run_script_exit_code = $_[0] // 0;
	die RUN_SCRIPT_EXIT;
}

our %cached_scripts;
sub key2sub ($) {
	my ($key) = @_;
	$cached_scripts{$key} //= do {
		my $f = key2script($key);
		open my $fh, '<', $f or die "open $f: $!";
		my $str = do { local $/; <$fh> };
		my $pkg = (split(m!/!, $f))[-1];
		$pkg =~ s/([a-z])([a-z0-9]+)(\.t)?\z/\U$1\E$2/;
		$pkg .= "_T" if $3;
		$pkg =~ tr/-.//d;
		$pkg = "PublicInbox::TestScript::$pkg";
		eval <can('main');
	}
}

sub _run_sub ($$$) {
	my ($sub, $key, $argv) = @_;
	local @ARGV = @$argv;
	$run_script_exit_code = undef;
	my $exit_code = eval { $sub->(@$argv) };
	if ($@ eq RUN_SCRIPT_EXIT) {
		$@ = '';
		$exit_code = $run_script_exit_code;
		$? = ($exit_code << 8);
	} elsif (defined($exit_code)) {
		$? = ($exit_code << 8);
	} elsif ($@) { # mimic die() behavior when uncaught
		warn "E: eval-ed $key: $@\n";
		$? = ($! << 8) if $!;
		$? = (255 << 8) if $? == 0;
	} else {
		die "BUG: eval-ed $key: no exit code or \$@\n";
	}
}

sub run_script ($;$$) {
	my ($cmd, $env, $opt) = @_;
	my ($key, @argv) = @$cmd;
	my $run_mode = $ENV{TEST_RUN_MODE} // $opt->{run_mode} // 1;
	my $sub = $run_mode == 0 ? undef : key2sub($key);
	my $fhref = [];
	my $spawn_opt = {};
	my @tail_paths;
	for my $fd (0..2) {
		my $redir = $opt->{$fd};
		my $ref = ref($redir);
		if ($ref eq 'SCALAR') {
			my $fh;
			if ($tail_cmd && $ENV{TAIL_ALL} && $fd > 0) {
				require File::Temp;
				$fh = File::Temp->new("fd.$fd-XXXX", TMPDIR=>1);
				push @tail_paths, $fh->filename;
			} else {
				open $fh, '+>', undef;
			}
			$fh or xbail $!;
			$fhref->[$fd] = $fh;
			$spawn_opt->{$fd} = $fh;
			next if $fd > 0;
			$fh->autoflush(1);
			print $fh $$redir or die "print: $!";
			seek($fh, 0, SEEK_SET) or die "seek: $!";
		} elsif ($ref eq 'GLOB') {
			$spawn_opt->{$fd} = $fhref->[$fd] = $redir;
		} elsif ($ref) {
			die "unable to deal with $ref $redir";
		}
	}
	my $tail = @tail_paths ? tail_f(@tail_paths) : undef;
	if ($key =~ /-(index|convert|extindex|convert|xcpdb)\z/) {
		unshift @argv, '--no-fsync';
	}
	if ($run_mode == 0) {
		# spawn an independent new process, like real-world use cases:
		require PublicInbox::Spawn;
		my $cmd = [ key2script($key), @argv ];
		if (my $d = $opt->{'-C'}) {
			$cmd->[0] = File::Spec->rel2abs($cmd->[0]);
			$spawn_opt->{'-C'} = $d;
		}
		my $pid = PublicInbox::Spawn::spawn($cmd, $env, $spawn_opt);
		if (defined $pid) {
			my $r = waitpid($pid, 0) // die "waitpid: $!";
			$r == $pid or die "waitpid: expected $pid, got $r";
		}
	} else { # localize and run everything in the same process:
		# note: "local *STDIN = *STDIN;" and so forth did not work in
		# old versions of perl
		my $umask = umask;
		local %ENV = $env ? (%ENV, %$env) : %ENV;
		local @SIG{keys %SIG} = map { undef } values %SIG;
		local $SIG{FPE} = 'IGNORE'; # Perl default
		local $0 = join(' ', @$cmd);
		my $orig_io = _prepare_redirects($fhref);
		my $cwdfh = $lei_cwdfh;
		if (my $d = $opt->{'-C'}) {
			unless ($cwdfh) {
				opendir $cwdfh, '.' or die "opendir .: $!";
			}
			chdir $d or die "chdir $d: $!";
		}
		_run_sub($sub, $key, \@argv);
		eval { PublicInbox::Inbox::cleanup_task() };
		die "fchdir(restore): $!" if $cwdfh && !chdir($cwdfh);
		_undo_redirects($orig_io);
		select STDOUT;
		umask($umask);
	}

	{ local $?; undef $tail };
	# slurp the redirects back into user-supplied strings
	for my $fd (1..2) {
		my $fh = $fhref->[$fd] or next;
		next unless -f $fh;
		seek($fh, 0, SEEK_SET) or die "seek: $!";
		my $redir = $opt->{$fd};
		local $/;
		$$redir = <$fh>;
	}
	$? == 0;
}

sub tick (;$) {
	my $tick = shift // 0.1;
	select undef, undef, undef, $tick;
	1;
}

sub wait_for_tail {
	my ($tail_pid, $want) = @_;
	my $wait = 2; # "tail -F" sleeps 1.0s at-a-time w/o inotify/kevent
	if ($^O eq 'linux') { # GNU tail may use inotify
		state $tail_has_inotify;
		return tick if !$want && $tail_has_inotify; # before TERM
		my $end = time + $wait; # wait for startup:
		my @ino;
		do {
			@ino = grep {
				(readlink($_) // '') =~ /\binotify\b/
			} glob("/proc/$tail_pid/fd/*");
		} while (!@ino && time <= $end and tick);
		return if !@ino;
		$tail_has_inotify = 1;
		$ino[0] =~ s!/fd/!/fdinfo/!;
		my @info;
		do {
			if (open my $fh, '<', $ino[0]) {
				local $/ = "\n";
				@info = grep(/^inotify wd:/, <$fh>);
			}
		} while (scalar(@info) < $want && time <= $end and tick);
	} else {
		sleep($wait);
	}
}

# like system() built-in, but uses spawn() for env/rdr + vfork
sub xsys {
	my ($cmd, $env, $rdr) = @_;
	if (ref($cmd)) {
		$rdr ||= {};
	} else {
		$cmd = [ @_ ];
		$env = undef;
		$rdr = {};
	}
	run_script($cmd, $env, { %$rdr, run_mode => 0 });
	$? >> 8
}

sub xsys_e { # like "/bin/sh -e"
	xsys(@_) == 0 or
		BAIL_OUT (ref $_[0] ? "@{$_[0]}" : "@_"). " failed \$?=$?"
}

# like `backtick` or qx{} op, but uses spawn() for env/rdr + vfork
sub xqx {
	my ($cmd, $env, $rdr) = @_;
	$rdr //= {};
	run_script($cmd, $env, { %$rdr, run_mode => 0, 1 => \(my $out) });
	wantarray ? split(/^/m, $out) : $out;
}

sub tail_f (@) {
	$tail_cmd or return; # "tail -F" or "tail -f"
	for (@_) { open(my $fh, '>>', $_) or die $! };
	my $cmd = [ split(/ /, $tail_cmd), @_ ];
	require PublicInbox::Spawn;
	my $pid = PublicInbox::Spawn::spawn($cmd, undef, { 1 => 2 });
	wait_for_tail($pid, scalar @_);
	require PublicInbox::AutoReap;
	PublicInbox::AutoReap->new($pid, \&wait_for_tail);
}

sub start_script {
	my ($cmd, $env, $opt) = @_;
	my ($key, @argv) = @$cmd;
	my $run_mode = $ENV{TEST_RUN_MODE} // $opt->{run_mode} // 2;
	my $sub = $run_mode == 0 ? undef : key2sub($key);
	my $tail;
	if ($tail_cmd) {
		my @paths;
		for (@argv) {
			next unless /\A--std(?:err|out)=(.+)\z/;
			push @paths, $1;
		}
		if ($opt) {
			for (1, 2) {
				my $f = $opt->{$_} or next;
				if (!ref($f)) {
					push @paths, $f;
				} elsif (ref($f) eq 'GLOB' && $^O eq 'linux') {
					my $fd = fileno($f);
					my $f = readlink "/proc/$$/fd/$fd";
					push @paths, $f if -e $f;
				}
			}
		}
		$tail = tail_f(@paths);
	}
	my $pid = fork // die "fork: $!\n";
	if ($pid == 0) {
		eval { PublicInbox::DS->Reset };
		# pretend to be systemd (cf. sd_listen_fds(3))
		# 3 == SD_LISTEN_FDS_START
		my $fd;
		for ($fd = 0; 1; $fd++) {
			my $s = $opt->{$fd};
			last if $fd >= 3 && !defined($s);
			next unless $s;
			my $fl = fcntl($s, F_GETFD, 0);
			if (($fl & FD_CLOEXEC) != FD_CLOEXEC) {
				warn "got FD:".fileno($s)." w/o CLOEXEC\n";
			}
			fcntl($s, F_SETFD, $fl &= ~FD_CLOEXEC);
			dup2(fileno($s), $fd) or die "dup2 failed: $!\n";
		}
		%ENV = (%ENV, %$env) if $env;
		my $fds = $fd - 3;
		if ($fds > 0) {
			$ENV{LISTEN_PID} = $$;
			$ENV{LISTEN_FDS} = $fds;
		}
		if ($opt->{-C}) { chdir($opt->{-C}) or die "chdir: $!" }
		$0 = join(' ', @$cmd);
		if ($sub) {
			eval { PublicInbox::DS->Reset };
			_run_sub($sub, $key, \@argv);
			POSIX::_exit($? >> 8);
		} else {
			exec(key2script($key), @argv);
			die "FAIL: ",join(' ', $key, @argv), ": $!\n";
		}
	}
	require PublicInbox::AutoReap;
	my $td = PublicInbox::AutoReap->new($pid);
	$td->{-extra} = $tail;
	$td;
}

# favor lei() or lei_ok() over $lei for new code
sub lei (@) {
	my ($cmd, $env, $xopt) = @_;
	$lei_out = $lei_err = '';
	if (!ref($cmd)) {
		($env, $xopt) = grep { (!defined) || ref } @_;
		$cmd = [ grep { defined && !ref } @_ ];
	}
	my $res = run_script(['lei', @$cmd], $env, $xopt // $lei_opt);
	if ($lei_err ne '') {
		if ($lei_err =~ /Use of uninitialized/ ||
			$lei_err =~ m!\bArgument .*? isn't numeric in !) {
			fail "lei_err=$lei_err";
		} else {
			diag "lei_err=$lei_err" if $lei_loud;
		}
	}
	$res;
};

sub lei_ok (@) {
	state $PWD = $ENV{PWD} // Cwd::getcwd();
	my $msg = ref($_[-1]) eq 'SCALAR' ? pop(@_) : undef;
	my $tmpdir = quotemeta(File::Spec->tmpdir);
	# filter out anything that looks like a path name for consistent logs
	my @msg = ref($_[0]) eq 'ARRAY' ? @{$_[0]} : @_;
	if (!$lei_loud) {
		for (@msg) {
			s!\A([a-z0-9]+://)[^/]+/!$1\$HOST_PORT/!;
			s!$tmpdir\b/(?:[^/]+/)?!\$TMPDIR/!g;
			s!\Q$PWD\E\b!\$PWD!g;
		}
	}
	ok(lei(@_), "lei @msg". ($msg ? " ($$msg)" : '')) or
		diag "\$?=$? err=$lei_err";
}

sub json_utf8 () {
	state $x = ref(PublicInbox::Config->json)->new->utf8->canonical;
}

sub is_xdeeply ($$$) {
	my ($x, $y, $desc) = @_;
	my $ok = is_deeply($x, $y, $desc);
	diag explain([$x, '!=', $y]) if !$ok;
	$ok;
}

sub ignore_inline_c_missing {
	$_[0] = join('', grep(/\S/, grep(!/compilation aborted/,
		grep(!/\bInline\b/, split(/^/m, $_[0])))));
}

sub test_lei {
SKIP: {
	my ($cb) = pop @_;
	my $test_opt = shift // {};
	local $lei_cwdfh;
	opendir $lei_cwdfh, '.' or xbail "opendir .: $!";
	require_git(2.6, 1) or skip('git 2.6+ required for lei test', 2);
	my $mods = $test_opt->{mods} // [ 'lei' ];
	require_mods(@$mods, 2);

	# set PERL_INLINE_DIRECTORY before clobbering XDG_CACHE_HOME
	require PublicInbox::Spawn;
	require PublicInbox::Config;
	require File::Path;

	local %ENV = %ENV;
	delete $ENV{XDG_DATA_HOME};
	delete $ENV{XDG_CONFIG_HOME};
	delete $ENV{XDG_CACHE_HOME};
	$ENV{GIT_COMMITTER_EMAIL} = 'lei@example.com';
	$ENV{GIT_COMMITTER_NAME} = 'lei user';
	$ENV{LANG} = $ENV{LC_ALL} = 'C';
	my (undef, $fn, $lineno) = caller(0);
	my $t = "$fn:$lineno";
	state $lei_daemon = PublicInbox::Spawn->can('send_cmd4') || do {
			require PublicInbox::Syscall;
			PublicInbox::Syscall->can('send_cmd4');
		} || eval { require Socket::MsgHdr; 1 };
	unless ($lei_daemon) {
		skip('Inline::C unconfigured/missing '.
'(mkdir -p ~/.cache/public-inbox/inline-c) OR Socket::MsgHdr missing',
			1);
	}
	$lei_opt = { 1 => \$lei_out, 2 => \$lei_err };
	my ($daemon_pid, $for_destroy, $daemon_xrd);
	my $tmpdir = $test_opt->{tmpdir};
	File::Path::mkpath($tmpdir) if (defined $tmpdir && !-d $tmpdir);
	($tmpdir, $for_destroy) = tmpdir unless $tmpdir;
	state $persist_xrd = $ENV{TEST_LEI_DAEMON_PERSIST_DIR};
	SKIP: {
		$ENV{TEST_LEI_ONESHOT} and
			xbail 'TEST_LEI_ONESHOT no longer supported';
		my $home = "$tmpdir/lei-daemon";
		mkdir($home, 0700) or BAIL_OUT "mkdir: $!";
		local $ENV{HOME} = $home;
		my $persist;
		if ($persist_xrd && !$test_opt->{daemon_only}) {
			$persist = $daemon_xrd = $persist_xrd;
		} else {
			$daemon_xrd = "$home/xdg_run";
			mkdir($daemon_xrd, 0700) or BAIL_OUT "mkdir: $!";
		}
		local $ENV{XDG_RUNTIME_DIR} = $daemon_xrd;
		$cb->();
		if ($persist) { # remove before ~/.local gets removed
			File::Path::rmtree([glob("$home/*")]);
			File::Path::rmtree("$home/.config");
		} else {
			lei_ok(qw(daemon-pid), \"daemon-pid after $t");
			chomp($daemon_pid = $lei_out);
			if (!$daemon_pid) {
				fail("daemon not running after $t");
				skip 'daemon died unexpectedly', 2;
			}
			ok(kill(0, $daemon_pid), "daemon running after $t");
			lei_ok(qw(daemon-kill), \"daemon-kill after $t");
		}
	}; # SKIP for lei_daemon
	if ($daemon_pid) {
		for (0..10) {
			kill(0, $daemon_pid) or last;
			tick;
		}
		ok(!kill(0, $daemon_pid), "$t daemon stopped");
		my $f = "$daemon_xrd/lei/errors.log";
		open my $fh, '<', $f or BAIL_OUT "$f: $!";
		my @l = <$fh>;
		is_xdeeply(\@l, [],
			"$t daemon XDG_RUNTIME_DIR/lei/errors.log empty");
	}
}; # SKIP if missing git 2.6+ || Xapian || SQLite || json
} # /test_lei

# returns the pathname to a ~/.public-inbox/config in scalar context,
# ($test_home, $pi_config_pathname) in list context
sub setup_public_inboxes () {
	my $test_home = "t/home2";
	my $pi_config = "$test_home/.public-inbox/config";
	my $stamp = "$test_home/setup-stamp";
	my @ret = ($test_home, $pi_config);
	return @ret if -f $stamp;

	require PublicInbox::Lock;
	my $lk = bless { lock_path => "$test_home/setup.lock" },
			'PublicInbox::Lock';
	my $end = $lk->lock_for_scope;
	return @ret if -f $stamp;

	local $ENV{PI_CONFIG} = $pi_config;
	for my $V (1, 2) {
		run_script([qw(-init --skip-docdata), "-V$V",
				'--newsgroup', "t.v$V", "t$V",
				"$test_home/t$V", "http://example.com/t$V",
				"t$V\@example.com" ]) or xbail "init v$V";
		unlink "$test_home/t$V/description" or xbail "unlink $!";
	}
	require PublicInbox::Config;
	require PublicInbox::InboxWritable;
	my $cfg = PublicInbox::Config->new;
	my $seen = 0;
	$cfg->each_inbox(sub {
		my ($ibx) = @_;
		$ibx->{-no_fsync} = 1;
		my $im = PublicInbox::InboxWritable->new($ibx)->importer(0);
		my $V = $ibx->version;
		my @eml = (glob('t/*.eml'), 't/data/0001.patch');
		for (@eml) {
			next if $_ eq 't/psgi_v2-old.eml'; # dup mid
			$im->add(eml_load($_)) or BAIL_OUT "v$V add $_";
			$seen++;
		}
		$im->done;
	});
	$seen or BAIL_OUT 'no imports';
	open my $fh, '>', $stamp or BAIL_OUT "open $stamp: $!";
	@ret;
}

sub create_inbox ($$;@) {
	my $ident = shift;
	my $cb = pop;
	my %opt = @_;
	require PublicInbox::Lock;
	require PublicInbox::InboxWritable;
	require PublicInbox::Import;
	my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
	my ($db) = (PublicInbox::Import::default_branch() =~ m!([^/]+)\z!);
	my $dir = "t/data-gen/$base.$ident-$db";
	my $new = !-d $dir;
	if ($new) {
		mkdir $dir; # may race
		-d $dir or BAIL_OUT "$dir could not be created: $!";
	}
	my $lk = bless { lock_path => "$dir/creat.lock" }, 'PublicInbox::Lock';
	$opt{inboxdir} = File::Spec->rel2abs($dir);
	$opt{name} //= $ident;
	my $scope = $lk->lock_for_scope;
	my $pre_cb = delete $opt{pre_cb};
	$pre_cb->($dir) if $pre_cb && $new;
	$opt{-no_fsync} = 1;
	my $no_gc = delete $opt{-no_gc};
	my $tmpdir = delete $opt{tmpdir};
	my $addr = $opt{address} // [];
	$opt{-primary_address} //= $addr->[0] // "$ident\@example.com";
	my $parallel = delete($opt{importer_parallel}) // 0;
	my $creat_opt = { nproc => delete($opt{nproc}) // 1 };
	my $ibx = PublicInbox::InboxWritable->new({ %opt }, $creat_opt);
	if (!-f "$dir/creat.stamp") {
		my $im = $ibx->importer($parallel);
		$cb->($im, $ibx);
		$im->done if $im;
		unless ($no_gc) {
			my @to_gc = $ibx->version == 1 ? ($ibx->{inboxdir}) :
					glob("$ibx->{inboxdir}/git/*.git");
			for my $dir (@to_gc) {
				xsys_e([ qw(git gc -q) ], { GIT_DIR => $dir });
			}
		}
		open my $s, '>', "$dir/creat.stamp" or
			BAIL_OUT "error creating $dir/creat.stamp: $!";
	}
	if ($tmpdir) {
		undef $ibx;
		xsys([qw(/bin/cp -Rp), $dir, $tmpdir]) == 0 or
			BAIL_OUT "cp $dir $tmpdir";
		$opt{inboxdir} = $tmpdir;
		$ibx = PublicInbox::InboxWritable->new(\%opt);
	}
	$ibx;
}

sub test_httpd ($$;$) {
	my ($env, $client, $skip) = @_;
	for (qw(PI_CONFIG TMPDIR)) {
		$env->{$_} or BAIL_OUT "$_ unset";
	}
	SKIP: {
		require_mods(qw(Plack::Test::ExternalServer), $skip // 1);
		my $sock = tcp_server() or die;
		my ($out, $err) = map { "$env->{TMPDIR}/std$_.log" } qw(out err);
		my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ];
		my $td = start_script($cmd, $env, { 3 => $sock });
		my ($h, $p) = tcp_host_port($sock);
		local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p";
		Plack::Test::ExternalServer::test_psgi(client => $client);
		$td->join('TERM');
		open my $fh, '<', $err or BAIL_OUT $!;
		my $e = do { local $/; <$fh> };
		if ($e =~ s/^Plack::Middleware::ReverseProxy missing,\n//gms) {
			$e =~ s/^URL generation for redirects .*\n//gms;
		}
		is($e, '', 'no errors');
	}
};


package PublicInbox::TestCommon::InboxWakeup;
use strict;
sub on_inbox_unlock { ${$_[0]}->($_[1]) }

1;
public-inbox-1.9.0/lib/PublicInbox/Tmpfile.pm000066400000000000000000000022621430031475700210440ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors 
# License: AGPL-3.0+ 
package PublicInbox::Tmpfile;
use strict;
use v5.10.1;
use parent qw(Exporter);
our @EXPORT = qw(tmpfile);
use Fcntl qw(:DEFAULT);
use Errno qw(EEXIST);
use File::Spec;

# use tmpfile instead of open(..., '+>', undef) so we can get an
# unlinked filename which makes sense when viewed with lsof
# (at least on Linux)
# And if we ever stop caring to have debuggable filenames, O_TMPFILE :)
#
# This is also for Perl <5.32 which lacks: open(..., '+>>', undef)
# 
sub tmpfile ($;$$) {
	my ($id, $sock, $append) = @_;
	if (defined $sock) {
		# add the socket inode number so we can figure out which
		# socket it belongs to
		my @st = stat($sock);
		$id .= '-ino:'.$st[1];
	}
	$id =~ tr!/!^!;

	my $fl = O_RDWR | O_CREAT | O_EXCL;
	$fl |= O_APPEND if $append;
	do {
		my $fn = File::Spec->tmpdir . "/$id-".time.'-'.rand;
		if (sysopen(my $fh, $fn, $fl, 0600)) { # likely
			unlink($fn) or warn "unlink($fn): $!"; # FS broken
			return $fh; # success
		}
	} while ($! == EEXIST);
	undef  # EMFILE/ENFILE/ENOSPC/ENOMEM
}

1;
public-inbox-1.9.0/lib/PublicInbox/URIimap.pm000066400000000000000000000122701430031475700207520ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors 
# License: AGPL-3.0+ 
# cf. RFC 5092, which the `URI' package doesn't support
#
# This depends only on the documented public API of the `URI' dist,
# not on internal `_'-prefixed subclasses such as `URI::_server'
#
#  exists, but it appears
# unmaintained, isn't in common distros, nor does it support
# ';FOO=BAR' parameters such as UIDVALIDITY
#
# RFC 2192 also describes ";TYPE="
package PublicInbox::URIimap;
use strict;
use v5.10.1;
use URI::Split qw(uri_split uri_join); # part of URI
use URI::Escape qw(uri_unescape uri_escape);
use overload '""' => \&as_string;

my %default_ports = (imap => 143, imaps => 993);
# for enc-auth-type and enc-user in RFC 5092
my $achar = qr/[A-Za-z0-9%\-_\.\!\$'\(\)\+\,\&\=\*]+/;

sub new {
	my ($class, $url) = @_;
	$url =~ m!\Aimaps?://! ? bless \$url, $class : undef;
}

sub canonical {
	my ($self) = @_;

	# no #frag in RFC 5092 from what I can tell
	my ($scheme, $auth, $path, $query, $_frag) = uri_split($$self);
	$path =~ s!\A/+!/!; # excessive leading slash

	# upper-case uidvalidity= and uid= parameter names
	$path =~ s/;([^=]+)=([^;]*)/;\U$1\E=$2/g;

	# lowercase the host portion
	$auth =~ s#\A(.*@)?(.*?)(?::([0-9]+))?\z#
		my $ret = ($1//'').lc($2);
		if (defined(my $port = $3)) {
			if ($default_ports{lc($scheme)} != $port) {
				$ret .= ":$port";
			}
		}
		$ret#ei;

	ref($self)->new(uri_join(lc($scheme), $auth, $path, $query));
}

sub host {
	my ($self) = @_;
	my (undef, $auth) = uri_split($$self);
	$auth =~ s!\A.*?@!!;
	$auth =~ s!:[0-9]+\z!!;
	$auth =~ s!\A\[(.*)\]\z!$1!; # IPv6
	uri_unescape($auth);
}

# unescaped, may be used for globbing
sub path {
	my ($self) = @_;
	my (undef, undef, $path) = uri_split($$self);
	$path =~ s!\A/+!!;
	$path =~ s!/?;.*\z!!; # [;UIDVALIDITY=nz-number]/;UID=nz-number
	$path eq '' ? undef : $path;
}

sub mailbox {
	my ($self) = @_;
	my $path = path($self);
	defined($path) ? uri_unescape($path) : undef;
}

sub uidvalidity { # read/write
	my ($self, $val) = @_;
	my ($scheme, $auth, $path, $query, $frag) = uri_split($$self);
	if (defined $val) {
		if ($path =~ s!;UIDVALIDITY=[^;/]*\b!;UIDVALIDITY=$val!i or
				$path =~ s!/;!;UIDVALIDITY=$val/;!i) {
			# s// already changed it
		} else { # both s// failed, so just append
			$path .= ";UIDVALIDITY=$val";
		}
		$$self = uri_join($scheme, $auth, $path, $query, $frag);
	}
	$path =~ s!\A/+!!;
	$path =~ m!\A[^;]+;UIDVALIDITY=([1-9][0-9]*)\b!i ?
		($1 + 0) : undef;
}

sub uid {
	my ($self, $val) = @_;
	my ($scheme, $auth, $path, $query, $frag) = uri_split($$self);
	if (scalar(@_) == 2) {
		if (!defined $val) {
			$path =~ s!/;UID=[^;/]*\b!!i;
		} else {
			$path =~ s!/;UID=[^;/]*\b!/;UID=$val!i or
				$path .= "/;UID=$val";
		}
		$$self = uri_join($scheme, $auth, $path, $query);
	}
	$path =~ m!\A/[^;]+(?:;UIDVALIDITY=[^;/]+)?/;UID=([1-9][0-9]*)\b!i ?
		($1 + 0) : undef;
}

sub port {
	my ($self) = @_;
	my ($scheme, $auth) = uri_split($$self);
	$auth =~ /:([0-9]+)\z/ ? $1 + 0 : $default_ports{lc($scheme)};
}

sub authority {
	my ($self) = @_;
	my (undef, $auth) = uri_split($$self);
	$auth
}

sub user {
	my ($self, $val) = @_;
	my ($scheme, $auth, $path, $query) = uri_split($$self);
	my $at_host_port;
	$auth =~ s/(@.*)\z// and $at_host_port = $1; # stash host:port for now
	if (scalar(@_) == 2) { # set, this clobbers password, too
		if (defined $val) {
			my $uval = uri_escape($val);
			if (defined($at_host_port)) {
				$auth =~ s!\A.*?(;AUTH=$achar).*!$uval$1!ix
					or $auth = $uval;
			} else {
				substr($auth, 0, 0) = "$uval@";
			}
		} elsif (defined($at_host_port)) { # clobber
			$auth =~ s!\A.*?(;AUTH=$achar).*!$1!i or $auth = '';
			if ($at_host_port && $auth eq '') {
				$at_host_port =~ s/\A\@//;
			}
		}
		$at_host_port //= '';
		$$self = uri_join($scheme, $auth.$at_host_port, $path, $query);
		$val;
	} else { # read-only
		$at_host_port // return undef; # explicit undef for scalar
		$auth =~ s/;.*\z//; # drop ;AUTH=...
		$auth =~ s/:.*\z//; # drop password
		$auth eq '' ? undef : uri_unescape($auth);
	}
}

sub password {
	my ($self) = @_;
	my (undef, $auth) = uri_split($$self);
	$auth =~ s/@.*\z// or return undef; # drop host:port
	$auth =~ s/;.*\z//; # drop ;AUTH=...
	$auth =~ s/\A[^:]+:// ? uri_unescape($auth) : undef; # drop ->user
}

sub auth {
	my ($self, $val) = @_;
	my ($scheme, $auth, $path, $query) = uri_split($$self);
	my $at_host_port;
	$auth =~ s/(@.*)\z// and $at_host_port = $1; # stash host:port for now
	if (scalar(@_) == 2) {
		if (defined $val) {
			my $uval = uri_escape($val);
			if ($auth =~ s!;AUTH=$achar!;AUTH=$uval!ix) {
				# replaced existing
			} elsif (defined($at_host_port)) {
				$auth .= ";AUTH=$uval";
			} else {
				substr($auth, 0, 0) = ";AUTH=$uval@";
			}
		} else { # clobber
			$auth =~ s!;AUTH=$achar!!i;
			if ($at_host_port && $auth eq '') {
				$at_host_port =~ s/\A\@//;
			}
		}
		$at_host_port //= '';
		$$self = uri_join($scheme, $auth.$at_host_port, $path, $query);
		$val;
	} else { # read-only
		$auth =~ /;AUTH=(.+)\z/i ? uri_unescape($1) : undef;
	}
}

sub scheme {
	my ($self) = @_;
	(uri_split($$self))[0];
}

sub as_string { ${$_[0]} }

sub clone { ref($_[0])->new(as_string($_[0])) }

1;
public-inbox-1.9.0/lib/PublicInbox/URInntps.pm000066400000000000000000000010501430031475700211600ustar00rootroot00000000000000# Copyright (C) 2021 all contributors 
# License: AGPL-3.0+ 

# deal with the lack of URI::nntps in upstream URI.
# nntps is IANA registered, snews is deprecated
# cf. https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=983419
# Fixed in URI 5.08, we can drop this by 2035 when LTS distros all have it
package PublicInbox::URInntps;
use strict;
use parent qw(URI::snews);
use URI;

sub new {
	my ($class, $url) = @_;
	$url =~ m!\Anntps://!i ? bless(\$url, $class) : URI->new($url);
}

1;
public-inbox-1.9.0/lib/PublicInbox/Unsubscribe.pm000066400000000000000000000126411430031475700217320ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors 
# License: AGPL-3.0+ 
#
# Standalone PSGI app to handle HTTP(s) unsubscribe links generated
# by milters like examples/unsubscribe.milter to mailing lists.
#
# This does not depend on any other modules in the PublicInbox::*
# and ought to be usable with any mailing list software.
package PublicInbox::Unsubscribe;
use strict;
use warnings;
use Crypt::CBC;
use Plack::Util;
use MIME::Base64 qw(decode_base64url);
my @CODE_URL = qw(http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git
	https://public-inbox.org/public-inbox.git);
my @CT_HTML = ('Content-Type', 'text/html; charset=UTF-8');

sub new {
	my ($class, %opt) = @_;
	my $key_file = $opt{key_file};
	defined $key_file or die "`key_file' needed";
	open my $fh, '<', $key_file or die
		"failed to open key_file=$key_file: $!\n";
	my ($key, $iv);
	if (read($fh, $key, 8) != 8 || read($fh, $iv, 8) != 8 ||
				read($fh, my $end, 8) != 0) {
		die "key_file must be 16 bytes\n";
	}

	# these parameters were chosen to generate shorter parameters
	# to reduce the possibility of copy+paste errors
	my $cipher = Crypt::CBC->new(-key => $key,
			-iv => $iv,
			-header => 'none',
			-cipher => 'Blowfish');

	my $e = $opt{owner_email} or die "`owner_email' not specified\n";
	my $unsubscribe = $opt{unsubscribe} or
		die "`unsubscribe' callback not given\n";

	my $code_url = $opt{code_url} || \@CODE_URL;
	$code_url = [ $code_url ] if ref($code_url) ne 'ARRAY';
	bless {
		pi_cfg => $opt{pi_config}, # PublicInbox::Config
		owner_email => $opt{owner_email},
		cipher => $cipher,
		unsubscribe => $unsubscribe,
		contact => qq($e),
		code_url => $code_url,
		confirm => $opt{confirm},
	}, $class;
}

# entry point for PSGI
sub call {
	my ($self, $env) = @_;
	my $m = $env->{REQUEST_METHOD};
	if ($m eq 'GET' || $m eq 'HEAD') {
		$self->{confirm} ? confirm_prompt($self, $env)
				 : finalize_unsub($self, $env);
	} elsif ($m eq 'POST') {
		finalize_unsub($self, $env);
	} else {
		r($self, 405,
			Plack::Util::encode_html($m).' method not allowed');
	}
}

sub _user_list_addr {
	my ($self, $env) = @_;
	my ($blank, $u, $list) = split('/', $env->{PATH_INFO});

	if (!defined $u || $u eq '') {
		return r($self, 400, 'Bad request',
			'Missing encrypted email address in path component');
	}
	if (!defined $list && $list eq '') {
		return r($self, 400, 'Bad request',
			'Missing mailing list name in path component');
	}
	my $user = eval { $self->{cipher}->decrypt(decode_base64url($u)) };
	if (!defined $user || index($user, '@') < 1) {
		warn "error decrypting: $u: ", ($@ ? quotemeta($@) : ());
		$u = Plack::Util::encode_html($u);
		return r($self, 400, 'Bad request', "Failed to decrypt: $u");
	}

	# The URLs are too damn long if we have the encrypted domain
	# name in the PATH_INFO
	if (index($list, '@') < 0) {
		my $host = (split(':', $env->{HTTP_HOST}))[0];
		$list .= '@'.$host;
	}
	($user, $list);
}

sub confirm_prompt { # on GET
	my ($self, $env) = @_;
	my ($user_addr, $list_addr) = _user_list_addr($self, $env);
	return $user_addr if ref $user_addr;

	my $xl = Plack::Util::encode_html($list_addr);
	my $xu = Plack::Util::encode_html($user_addr);
	my @body = (
		"Confirmation required to remove", '',
		"\t$xu", '',
		"from the mailing list at", '',
		"\t$xl", '',
		'You will get one last email once you hit "Confirm" below:',
		qq(
) . qq() . '
');

	push @body, archive_info($self, $env, $list_addr);

	r($self, 200, "Confirm unsubscribe for $xl", @body);
}

sub finalize_unsub { # on POST
	my ($self, $env) = @_;
	my ($user_addr, $list_addr) = _user_list_addr($self, $env);
	return $user_addr if ref $user_addr;

	my @archive = archive_info($self, $env, $list_addr);
	if (my $err = $self->{unsubscribe}->($user_addr, $list_addr)) {
		return r($self, 500, Plack::Util::encode_html($err), @archive);
	}

	my $xl = Plack::Util::encode_html($list_addr);
	r($self, 200, "Unsubscribed from $xl",
		'You may get one final goodbye message', @archive);
}

sub r {
	my ($self, $code, $title, @body) = @_;
	[ $code, [ @CT_HTML ], [
		"$title
".
		join("\n", "$title\n", @body) . '

'. "
This page is available under AGPL-3.0+\n" .
		join('', map { "git clone $_\n" } @{$self->{code_url}}) .
		qq(Email $self->{contact} if you have any questions).
		'
' ] ]; } sub archive_info { my ($self, $env, $list_addr) = @_; my $archive_url = $self->{archive_urls}->{$list_addr}; unless ($archive_url) { if (my $cfg = $self->{pi_cfg}) { # PublicInbox::Config::lookup my $ibx = $cfg->lookup($list_addr); # PublicInbox::Inbox::base_url $archive_url = $ibx->base_url if $ibx; } } # protocol-relative URL: "//example.com/" => "https://example.com/" if ($archive_url && $archive_url =~ m!\A//!) { $archive_url = "$env->{'psgi.url_scheme'}:$archive_url"; } # maybe there are other places where we could map # list_addr => archive_url without ~/.public-inbox/config if ($archive_url) { $archive_url = Plack::Util::encode_html($archive_url); ('', 'HTML and git clone-able archives are available at:', qq($archive_url)) } else { ('', 'There ought to be archives for this list,', 'but unfortunately the admin did not configure '. __PACKAGE__. ' to show you the URL'); } } 1; public-inbox-1.9.0/lib/PublicInbox/UserContent.pm000066400000000000000000000073601430031475700217210ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # Self-updating module containing a sample CSS for client-side # customization by users of public-inbox. Used by Makefile.PL package PublicInbox::UserContent; use strict; use warnings; # this sub is updated automatically: sub CSS () { <<'_' /* * CC0-1.0 * Dark color scheme using 216 web-safe colors, inspired * somewhat by the default color scheme in mutt. * It reduces eyestrain for me, and energy usage for all: * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme */ * { font-size: 100% !important; font-family: monospace !important; background:#000 !important; color:#ccc !important } pre { white-space: pre-wrap !important } /* * Underlined links add visual noise which make them hard-to-read. * Use colors to make them stand out, instead. */ a:link { color:#69f !important; text-decoration:none !important } a:visited { color:#96f !important } /* quoted text in emails gets a different color */ *.q { color:#09f !important } /* * these may be used with cgit , too. * (cgit uses
, public-inbox uses ) */ *.add { color:#0ff !important } /* diff post-image lines */ *.del { color:#f0f !important } /* diff pre-image lines */ *.head { color:#fff !important } /* diff header (metainformation) */ *.hunk { color:#c93 !important } /* diff hunk-header */ /* * highlight 3.x colors (tested 3.18) for displaying blobs. * This doesn't use most of the colors available, as I find too * many colors overwhelming, so the default is commented out. */ .hl.num { color:#f30 !important } /* number */ .hl.esc { color:#f0f !important } /* escape character */ .hl.str { color:#f30 !important } /* string */ .hl.ppc { color:#f0f !important } /* preprocessor */ .hl.pps { color:#f30 !important } /* preprocessor string */ .hl.slc { color:#09f !important } /* single-line comment */ .hl.com { color:#09f !important } /* multi-line comment */ /* .hl.opt { color:#ccc !important } */ /* operator */ /* .hl.ipl { color:#ccc !important } */ /* interpolation */ /* keyword groups kw[a-z] */ .hl.kwa { color:#ff0 !important } .hl.kwb { color:#0f0 !important } .hl.kwc { color:#ff0 !important } /* .hl.kwd { color:#ccc !important } */ /* line-number (unused by public-inbox) */ /* .hl.lin { color:#ccc !important } */ _ } # end of auto-updated sub # return a sample CSS sub sample ($$) { my ($ibx, $env) = @_; my $url_prefix = $ibx->base_url($env); my $preamble = <<""; /* * Firefox users: this goes in \$PROFILE_FOLDER/chrome/userContent.css * where \$PROFILE_FOLDER is platform-specific * * cf. http://kb.mozillazine.org/UserContent.css * http://kb.mozillazine.org/Profile_folder_-_Firefox * * Users of dillo can remove the entire lines with "moz-only" * in them and place the resulting file in ~/.dillo/style.css */ \@-moz-document url-prefix($url_prefix) { /* moz-only */ $preamble . CSS() . "\n} /* moz-only */\n"; } # Auto-update this file based on the contents of a CSS file: # usage: perl -I lib __FILE__ contrib/css/216dark.css # (See Makefile.PL) if (scalar(@ARGV) == 1 && -r __FILE__) { open my $ro, '<', $ARGV[0] or die $!; my $css = do { local $/; <$ro> } or die $!; # indent one level: $css =~ s/^([ \t]*\S)/\t$1/smg; # "!important" overrides whatever the BOFH sets: $css =~ s/;/ !important;/sg; $css =~ s/(\w) \}/$1 !important }/msg; open my $rw, '+<', __FILE__ or die $!; my $out = do { local $/; <$rw> } or die $!; $css =~ s/; /;\n\t\t/g; $out =~ s/^sub CSS.*^_\n\}/sub CSS () {\n\t<<'_'\n${css}_\n}/sm; seek $rw, 0, 0; print $rw $out or die $!; close $rw or die $!; } 1; public-inbox-1.9.0/lib/PublicInbox/V2Writable.pm000066400000000000000000001153321430031475700214300ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ # This interface wraps and mimics PublicInbox::Import # Used to write to V2 inboxes (see L). package PublicInbox::V2Writable; use strict; use v5.10.1; use parent qw(PublicInbox::Lock PublicInbox::IPC); use PublicInbox::SearchIdxShard; use PublicInbox::IPC; use PublicInbox::Eml; use PublicInbox::Git; use PublicInbox::Import; use PublicInbox::MultiGit; use PublicInbox::MID qw(mids references); use PublicInbox::ContentHash qw(content_hash content_digest git_sha); use PublicInbox::InboxWritable; use PublicInbox::OverIdx; use PublicInbox::Msgmap; use PublicInbox::Spawn qw(spawn popen_rd run_die); use PublicInbox::Search; use PublicInbox::SearchIdx qw(log2stack is_ancestor check_size is_bad_blob); use IO::Handle; # ->autoflush use File::Temp (); use POSIX (); my $OID = qr/[a-f0-9]{40,}/; # an estimate of the post-packed size to the raw uncompressed size our $PACKING_FACTOR = 0.4; # SATA storage lags behind what CPUs are capable of, so relying on # nproc(1) can be misleading and having extra Xapian shards is a # waste of FDs and space. It can also lead to excessive IO latency # and slow things down. Users on NVME or other fast storage can # use the NPROC env or switches in our script/public-inbox-* programs # to increase Xapian shards our $NPROC_MAX_DEFAULT = 4; sub nproc_shards ($) { my ($creat_opt) = @_; my $n = $creat_opt->{nproc} if ref($creat_opt) eq 'HASH'; $n //= $ENV{NPROC}; if (!$n) { # assume 2 cores if not detectable or zero state $NPROC_DETECTED = PublicInbox::IPC::detect_nproc() || 2; $n = $NPROC_DETECTED; $n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT; } # subtract for the main process and git-fast-import $n -= 1; $n < 1 ? 1 : $n; } sub count_shards ($) { my ($self) = @_; # always load existing shards in case core count changes: # Also, shard count may change while -watch is running if (my $ibx = $self->{ibx}) { my $srch = $ibx->search or return 0; delete $ibx->{search}; $srch->{nshard} // 0 } else { # ExtSearchIdx $self->{nshard} = scalar($self->xdb_shards_flat); } } sub new { # $creat may be any true value, or 0/undef. A hashref is true, # and $creat->{nproc} may be set to an integer my ($class, $v2ibx, $creat) = @_; $v2ibx = PublicInbox::InboxWritable->new($v2ibx); my $dir = $v2ibx->assert_usable_dir; unless (-d $dir) { die "$dir does not exist\n" if !$creat; require File::Path; File::Path::mkpath($dir); } my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION; my $self = { ibx => $v2ibx, mg => PublicInbox::MultiGit->new($dir, 'all.git', 'git'), im => undef, # PublicInbox::Import parallel => 1, transact_bytes => 0, total_bytes => 0, current_info => '', xpfx => $xpfx, oidx => PublicInbox::OverIdx->new("$xpfx/over.sqlite3"), lock_path => "$dir/inbox.lock", # limit each git repo (epoch) to 1GB or so rotate_bytes => int((1024 * 1024 * 1024) / $PACKING_FACTOR), last_commit => [], # git epoch -> commit }; $self->{oidx}->{-no_fsync} = 1 if $v2ibx->{-no_fsync}; $self->{shards} = count_shards($self) || nproc_shards($creat); bless $self, $class; } # public (for now?) sub init_inbox { my ($self, $shards, $skip_epoch, $skip_artnum) = @_; if (defined $shards) { $self->{parallel} = 0 if $shards == 0; $self->{shards} = $shards if $shards > 0; } $self->idx_init; $self->{mm}->skip_artnum($skip_artnum) if defined $skip_artnum; my $max = $self->{ibx}->max_git_epoch; $max = $skip_epoch if (defined($skip_epoch) && !defined($max)); $self->{mg}->add_epoch($max // 0); $self->done; } # returns undef on duplicate or spam # mimics Import::add and wraps it for v2 sub add { my ($self, $eml, $check_cb) = @_; $self->{ibx}->with_umask(\&_add, $self, $eml, $check_cb); } sub idx_shard ($$) { my ($self, $num) = @_; $self->{idx_shards}->[$num % scalar(@{$self->{idx_shards}})]; } # indexes a message, returns true if checkpointing is needed sub do_idx ($$$) { my ($self, $eml, $smsg) = @_; $self->{oidx}->add_overview($eml, $smsg); if ($self->{-need_xapian}) { my $idx = idx_shard($self, $smsg->{num}); $idx->index_eml($eml, $smsg); } my $n = $self->{transact_bytes} += $smsg->{bytes}; $n >= $self->{batch_bytes}; } sub _add { my ($self, $mime, $check_cb) = @_; # spam check: if ($check_cb) { $mime = $check_cb->($mime, $self->{ibx}) or return; } # All pipes (> $^F) known to Perl 5.6+ have FD_CLOEXEC set, # as does SQLite 3.4.1+ (released in 2007-07-20), and # Xapian 1.3.2+ (released 2015-03-15). # For the most part, we can spawn git-fast-import without # leaking FDs to it... $self->idx_init; my ($num, $mid0) = v2_num_for($self, $mime); defined $num or return; # duplicate defined $mid0 or die "BUG: \$mid0 undefined\n"; my $im = $self->importer; my $smsg = bless { mid => $mid0, num => $num }, 'PublicInbox::Smsg'; my $cmt = $im->add($mime, undef, $smsg); # sets $smsg->{ds|ts|blob} $cmt = $im->get_mark($cmt); $self->{last_commit}->[$self->{epoch_max}] = $cmt; if (do_idx($self, $mime, $smsg)) { $self->checkpoint; } $cmt; } sub v2_num_for { my ($self, $mime) = @_; my $mids = mids($mime); if (@$mids) { my $mid = $mids->[0]; my $num = $self->{mm}->mid_insert($mid); if (defined $num) { # common case return ($num, $mid); } # crap, Message-ID is already known, hope somebody just resent: foreach my $m (@$mids) { # read-only lookup now safe to do after above barrier # easy, don't store duplicates # note: do not add more diagnostic info here since # it gets noisy on public-inbox-watch restarts return () if content_exists($self, $mime, $m); } # AltId may pre-populate article numbers (e.g. X-Mail-Count # or NNTP article number), use that article number if it's # not in Over. my $altid = $self->{ibx}->{altid}; if ($altid && grep(/:file=msgmap\.sqlite3\z/, @$altid)) { my $num = $self->{mm}->num_for($mid); if (defined $num && !$self->{oidx}->get_art($num)) { return ($num, $mid); } } # very unlikely: warn "<$mid> reused for mismatched content\n"; # try the rest of the mids for(my $i = $#$mids; $i >= 1; $i--) { my $m = $mids->[$i]; $num = $self->{mm}->mid_insert($m); if (defined $num) { warn "alternative <$m> for <$mid> found\n"; return ($num, $m); } } } # none of the existing Message-IDs are good, generate a new one: v2_num_for_harder($self, $mime); } sub v2_num_for_harder { my ($self, $eml) = @_; my $dig = content_digest($eml); my $mid0 = PublicInbox::Import::digest2mid($dig, $eml); my $num = $self->{mm}->mid_insert($mid0); unless (defined $num) { # it's hard to spoof the last Received: header my @recvd = $eml->header_raw('Received'); $dig->add("Received: $_") foreach (@recvd); $mid0 = PublicInbox::Import::digest2mid($dig, $eml); $num = $self->{mm}->mid_insert($mid0); # fall back to a random Message-ID and give up determinism: until (defined($num)) { $dig->add(rand); $mid0 = PublicInbox::Import::digest2mid($dig, $eml); warn "using random Message-ID <$mid0> as fallback\n"; $num = $self->{mm}->mid_insert($mid0); } } PublicInbox::Import::append_mid($eml, $mid0); ($num, $mid0); } sub _idx_init { # with_umask callback my ($self, $opt) = @_; $self->lock_acquire unless $opt && $opt->{-skip_lock}; $self->{oidx}->create; # xcpdb can change shard count while -watch is idle my $nshards = count_shards($self); $self->{shards} = $nshards if $nshards && $nshards != $self->{shards}; $self->{batch_bytes} = $opt->{batch_size} // $PublicInbox::SearchIdx::BATCH_BYTES; # need to create all shards before initializing msgmap FD # idx_shards must be visible to all forked processes my $max = $self->{shards} - 1; my $idx = $self->{idx_shards} = []; push @$idx, PublicInbox::SearchIdxShard->new($self, $_) for (0..$max); $self->{-need_xapian} = $idx->[0]->need_xapian; # SearchIdxShard may do their own flushing, so don't scale # until after forking $self->{batch_bytes} *= $self->{shards} if $self->{parallel}; my $ibx = $self->{ibx} or return; # ExtIdxSearch # Now that all subprocesses are up, we can open the FDs # for SQLite: my $mm = $self->{mm} = PublicInbox::Msgmap->new_file($ibx, 1); $mm->{dbh}->begin_work; } sub parallel_init ($$) { my ($self, $indexlevel) = @_; $self->{parallel} = 0 if ($indexlevel // 'full') eq 'basic'; } # idempotent sub idx_init { my ($self, $opt) = @_; return if $self->{idx_shards}; my $ibx = $self->{ibx}; # do not leak read-only FDs to child processes, we only have these # FDs for duplicate detection so they should not be # frequently activated. delete @$ibx{qw(mm search)}; $ibx->git->cleanup; parallel_init($self, $ibx->{indexlevel}); $ibx->with_umask(\&_idx_init, $self, $opt); } # returns an array mapping [ epoch => latest_commit ] # latest_commit may be undef if nothing was done to that epoch # $replace_map = { $object_id => $strref, ... } sub _replace_oids ($$$) { my ($self, $mime, $replace_map) = @_; $self->done; my $ibx = $self->{ibx}; my $pfx = "$ibx->{inboxdir}/git"; my $rewrites = []; # epoch => commit my $max = $self->{epoch_max} //= $ibx->max_git_epoch // return; foreach my $i (0..$max) { my $git_dir = "$pfx/$i.git"; -d $git_dir or next; my $git = PublicInbox::Git->new($git_dir); my $im = $self->import_init($git, 0, 1); $rewrites->[$i] = $im->replace_oids($mime, $replace_map); $im->done; } $rewrites; } sub content_hashes ($) { my ($mime) = @_; my @chashes = ( content_hash($mime) ); # We still support Email::MIME, here, and # Email::MIME->as_string doesn't always round-trip, so we may # use a second content_hash my $rt = content_hash(PublicInbox::Eml->new(\($mime->as_string))); push @chashes, $rt if $chashes[0] ne $rt; \@chashes; } sub content_matches ($$) { my ($chashes, $existing) = @_; my $chash = content_hash($existing); foreach (@$chashes) { return 1 if $_ eq $chash } 0 } # used for removing or replacing (purging) sub rewrite_internal ($$;$$$) { my ($self, $old_eml, $cmt_msg, $new_eml, $sref) = @_; $self->idx_init; my ($im, $need_reindex, $replace_map); if ($sref) { $replace_map = {}; # oid => sref $need_reindex = [] if $new_eml; } else { $im = $self->importer; } my $oidx = $self->{oidx}; my $chashes = content_hashes($old_eml); my $removed = []; my $mids = mids($old_eml); # We avoid introducing new blobs into git since the raw content # can be slightly different, so we do not need the user-supplied # message now that we have the mids and content_hash $old_eml = undef; my $mark; foreach my $mid (@$mids) { my %gone; # num => [ smsg, $mime, raw ] my ($id, $prev); while (my $smsg = $oidx->next_by_mid($mid, \$id, \$prev)) { my $msg = get_blob($self, $smsg); if (!defined($msg)) { warn "broken smsg for $mid\n"; next; # continue } my $orig = $$msg; my $cur = PublicInbox::Eml->new($msg); if (content_matches($chashes, $cur)) { $gone{$smsg->{num}} = [ $smsg, $cur, \$orig ]; } } my $n = scalar keys %gone; next unless $n; if ($n > 1) { warn "BUG: multiple articles linked to <$mid>\n", join(',', sort keys %gone), "\n"; } foreach my $num (keys %gone) { my ($smsg, $mime, $orig) = @{$gone{$num}}; # $removed should only be set once assuming # no bugs in our deduplication code: $removed = [ undef, $mime, $smsg ]; my $oid = $smsg->{blob}; if ($replace_map) { $replace_map->{$oid} = $sref; } else { ($mark, undef) = $im->remove($orig, $cmt_msg); $removed->[0] = $mark; } $orig = undef; if ($need_reindex) { # ->replace push @$need_reindex, $smsg; } else { # ->purge or ->remove $self->{mm}->num_delete($num); } unindex_oid_aux($self, $oid, $mid); } } if (defined $mark) { my $cmt = $im->get_mark($mark); $self->{last_commit}->[$self->{epoch_max}] = $cmt; } if ($replace_map && scalar keys %$replace_map) { my $rewrites = _replace_oids($self, $new_eml, $replace_map); return { rewrites => $rewrites, need_reindex => $need_reindex }; } defined($mark) ? $removed : undef; } # public (see PublicInbox::Import->remove), but note the 3rd element # (retval[2]) is not part of the stable API shared with Import->remove sub remove { my ($self, $eml, $cmt_msg) = @_; my $r = $self->{ibx}->with_umask(\&rewrite_internal, $self, $eml, $cmt_msg); defined($r) && defined($r->[0]) ? @$r: undef; } sub _replace ($$;$$) { my ($self, $old_eml, $new_eml, $sref) = @_; my $arg = [ $self, $old_eml, undef, $new_eml, $sref ]; my $rewritten = $self->{ibx}->with_umask(\&rewrite_internal, $self, $old_eml, undef, $new_eml, $sref) or return; my $rewrites = $rewritten->{rewrites}; # ->done is called if there are rewrites since we gc+prune from git $self->idx_init if @$rewrites; for my $i (0..$#$rewrites) { defined(my $cmt = $rewrites->[$i]) or next; $self->{last_commit}->[$i] = $cmt; } $rewritten; } # public sub purge { my ($self, $mime) = @_; my $rewritten = _replace($self, $mime, undef, \'') or return; $rewritten->{rewrites} } sub _check_mids_match ($$$) { my ($old_list, $new_list, $hdrs) = @_; my %old_mids = map { $_ => 1 } @$old_list; my %new_mids = map { $_ => 1 } @$new_list; my @old = keys %old_mids; my @new = keys %new_mids; my $err = "$hdrs may not be changed when replacing\n"; die $err if scalar(@old) != scalar(@new); delete @new_mids{@old}; delete @old_mids{@new}; die $err if (scalar(keys %old_mids) || scalar(keys %new_mids)); } # Changing Message-IDs or References with ->replace isn't supported. # The rules for dealing with messages with multiple or conflicting # Message-IDs are pretty complex and rethreading hasn't been fully # implemented, yet. sub check_mids_match ($$) { my ($old, $new) = @_; _check_mids_match(mids($old), mids($new), 'Message-ID(s)'); _check_mids_match(references($old), references($new), 'References/In-Reply-To'); } # public sub replace ($$$) { my ($self, $old_mime, $new_mime) = @_; check_mids_match($old_mime, $new_mime); # mutt will always add Content-Length:, Status:, Lines: when editing PublicInbox::Import::drop_unwanted_headers($new_mime); my $raw = $new_mime->as_string; my $expect_oid = git_sha(1, \$raw)->hexdigest; my $rewritten = _replace($self, $old_mime, $new_mime, \$raw) or return; my $need_reindex = $rewritten->{need_reindex}; # just in case we have bugs in deduplication code: my $n = scalar(@$need_reindex); if ($n > 1) { my $list = join(', ', map { "$_->{num}: <$_->{mid}>" } @$need_reindex); warn <<""; W: rewritten $n messages matching content of original message (expected: 1). W: possible bug in public-inbox, NNTP article IDs and Message-IDs follow: W: $list } # make sure we really got the OID: my ($blob, $type, $bytes) = $self->git->check($expect_oid); $blob eq $expect_oid or die "BUG: $expect_oid not found after replace"; # don't leak FDs to Xapian: $self->git->cleanup; # reindex modified messages: for my $smsg (@$need_reindex) { my $new_smsg = bless { blob => $blob, num => $smsg->{num}, mid => $smsg->{mid}, }, 'PublicInbox::Smsg'; my $sync = { autime => $smsg->{ds}, cotime => $smsg->{ts} }; $new_smsg->populate($new_mime, $sync); $new_smsg->set_bytes($raw, $bytes); do_idx($self, $new_mime, $new_smsg); } $rewritten->{rewrites}; } sub last_epoch_commit ($$;$) { my ($self, $i, $cmt) = @_; my $v = PublicInbox::Search::SCHEMA_VERSION(); $self->{mm}->last_commit_xap($v, $i, $cmt); } sub set_last_commits ($) { # this is NOT for ExtSearchIdx my ($self) = @_; defined(my $epoch_max = $self->{epoch_max}) or return; my $last_commit = $self->{last_commit}; foreach my $i (0..$epoch_max) { defined(my $cmt = $last_commit->[$i]) or next; $last_commit->[$i] = undef; last_epoch_commit($self, $i, $cmt); } } # public sub checkpoint ($;$) { my ($self, $wait) = @_; if (my $im = $self->{im}) { if ($wait) { $im->barrier; } else { $im->checkpoint; } } my $shards = $self->{idx_shards}; if ($shards) { my $dbh = $self->{mm}->{dbh} if $self->{mm}; # SQLite msgmap data is second in importance $dbh->commit if $dbh; eval { $dbh->do('PRAGMA optimize') }; # SQLite overview is third $self->{oidx}->commit_lazy; # Now deal with Xapian # start commit_txn_lazy asynchronously on all parallel shards # (non-parallel waits here) $_->ipc_do('commit_txn_lazy') for @$shards; # transactions started on parallel shards, # wait for them by issuing an echo command (echo can only # run after commit_txn_lazy is done) if ($wait && $self->{parallel}) { my $i = 0; for my $shard (@$shards) { my $echo = $shard->ipc_do('echo', $i); $echo == $i or die <<""; shard[$i] bad echo:$echo != $i waiting for txn commit ++$i; } } my $midx = $self->{midx}; # misc index if ($midx) { $midx->commit_txn; $PublicInbox::Search::X{CLOEXEC_UNSET} and $self->git->cleanup; } # last_commit is special, don't commit these until # Xapian shards are done: $dbh->begin_work if $dbh; set_last_commits($self); if ($dbh) { $dbh->commit; $dbh->begin_work; } } $self->{total_bytes} += $self->{transact_bytes}; $self->{transact_bytes} = 0; } # issue a write barrier to ensure all data is visible to other processes # and read-only ops. Order of data importance is: git > SQLite > Xapian # public sub barrier { checkpoint($_[0], 1) }; # true if locked and active sub active { !!$_[0]->{im} } # public sub done { my ($self) = @_; my $err = ''; if (my $im = delete $self->{im}) { eval { $im->done }; # PublicInbox::Import::done $err .= "import done: $@\n" if $@; } if (!$err) { eval { checkpoint($self) }; $err .= "checkpoint: $@\n" if $@; } if (my $mm = delete $self->{mm}) { my $m = $err ? 'rollback' : 'commit'; eval { $mm->{dbh}->$m }; $err .= "msgmap $m: $@\n" if $@; } if ($self->{oidx} && $self->{oidx}->{dbh} && $err) { eval { $self->{oidx}->rollback_lazy }; $err .= "overview rollback: $@\n" if $@; } my $shards = delete $self->{idx_shards}; if ($shards) { for (@$shards) { eval { $_->shard_close }; $err .= "shard close: $@\n" if $@; } } eval { $self->{oidx}->dbh_close }; $err .= "over close: $@\n" if $@; delete $self->{midx}; my $nbytes = $self->{total_bytes}; $self->{total_bytes} = 0; $self->lock_release(!!$nbytes) if $shards; $self->git->cleanup; die $err if $err; } sub importer { my ($self) = @_; my $im = $self->{im}; if ($im) { if ($im->{bytes_added} < $self->{rotate_bytes}) { return $im; } else { $self->{im} = undef; $im->done; $im = undef; $self->checkpoint; my $dir = $self->{mg}->add_epoch(++$self->{epoch_max}); my $git = PublicInbox::Git->new($dir); return $self->import_init($git, 0); } } my $epoch = 0; my $max; my $latest = $self->{ibx}->git_dir_latest(\$max); if (defined $latest) { my $git = PublicInbox::Git->new($latest); my $packed_bytes = $git->packed_bytes; my $unpacked_bytes = $packed_bytes / $PACKING_FACTOR; if ($unpacked_bytes >= $self->{rotate_bytes}) { $epoch = $max + 1; } else { $self->{epoch_max} = $max; return $self->import_init($git, $packed_bytes); } } $self->{epoch_max} = $epoch; my $dir = $self->{mg}->add_epoch($epoch); $self->import_init(PublicInbox::Git->new($dir), 0); } sub import_init { my ($self, $git, $packed_bytes, $tmp) = @_; my $im = PublicInbox::Import->new($git, undef, undef, $self->{ibx}); $im->{bytes_added} = int($packed_bytes / $PACKING_FACTOR); $im->{lock_path} = undef; $im->{path_type} = 'v2'; $self->{im} = $im unless $tmp; $im; } # XXX experimental sub diff ($$$) { my ($mid, $cur, $new) = @_; my $ah = File::Temp->new(TEMPLATE => 'email-cur-XXXX', TMPDIR => 1); print $ah $cur->as_string or die "print: $!"; $ah->flush or die "flush: $!"; PublicInbox::Import::drop_unwanted_headers($new); my $bh = File::Temp->new(TEMPLATE => 'email-new-XXXX', TMPDIR => 1); print $bh $new->as_string or die "print: $!"; $bh->flush or die "flush: $!"; my $cmd = [ qw(diff -u), $ah->filename, $bh->filename ]; print STDERR "# MID conflict <$mid>\n"; my $pid = spawn($cmd, undef, { 1 => 2 }); waitpid($pid, 0) == $pid or die "diff did not finish"; } sub get_blob ($$) { my ($self, $smsg) = @_; if (my $im = $self->{im}) { my $msg = $im->cat_blob($smsg->{blob}); return $msg if $msg; } # older message, should be in alternates $self->{ibx}->msg_by_smsg($smsg); } sub content_exists ($$$) { my ($self, $mime, $mid) = @_; my $oidx = $self->{oidx}; my $chashes = content_hashes($mime); my ($id, $prev); while (my $smsg = $oidx->next_by_mid($mid, \$id, \$prev)) { my $msg = get_blob($self, $smsg); if (!defined($msg)) { warn "broken smsg for $mid\n"; next; } my $cur = PublicInbox::Eml->new($msg); return 1 if content_matches($chashes, $cur); # XXX DEBUG_DIFF is experimental and may be removed diff($mid, $cur, $mime) if $ENV{DEBUG_DIFF}; } undef; } sub atfork_child { my ($self) = @_; if (my $older_siblings = $self->{idx_shards}) { $_->ipc_sibling_atfork_child for @$older_siblings; } if (my $im = $self->{im}) { $im->atfork_child; } die "BUG: unexpected mm" if $self->{mm}; } sub reindex_checkpoint ($$) { my ($self, $sync) = @_; $self->git->async_wait_all; $self->update_last_commit($sync); ${$sync->{need_checkpoint}} = 0; my $mm_tmp = $sync->{mm_tmp}; $mm_tmp->atfork_prepare if $mm_tmp; die 'BUG: {im} during reindex' if $self->{im}; if ($self->{ibx_map} && !$sync->{checkpoint_unlocks}) { checkpoint($self, 1); # no need to release lock on pure index } else { $self->done; # release lock } if (my $pr = $sync->{-regen_fmt} ? $sync->{-opt}->{-progress} : undef) { $pr->(sprintf($sync->{-regen_fmt}, ${$sync->{nr}})); } # allow -watch or -mda to write... $self->idx_init($sync->{-opt}); # reacquire lock if (my $intvl = $sync->{check_intvl}) { # eidx $sync->{next_check} = PublicInbox::DS::now() + $intvl; } $mm_tmp->atfork_parent if $mm_tmp; } sub index_finalize ($$) { my ($arg, $index) = @_; ++$arg->{self}->{nidx}; if (defined(my $cur = $arg->{cur_cmt})) { ${$arg->{latest_cmt}} = $cur; } elsif ($index) { die 'BUG: {cur_cmt} missing'; } # else { unindexing @leftovers doesn't set {cur_cmt} } sub index_oid { # cat_async callback my ($bref, $oid, $type, $size, $arg) = @_; is_bad_blob($oid, $type, $size, $arg->{oid}) and return index_finalize($arg, 1); # size == 0 purged returns here my $self = $arg->{self}; local $self->{current_info} = "$self->{current_info} $oid"; my ($num, $mid0); my $eml = PublicInbox::Eml->new($$bref); my $mids = mids($eml); my $chash = content_hash($eml); if (scalar(@$mids) == 0) { warn "E: $oid has no Message-ID, skipping\n"; return; } # {unindexed} is unlikely if (my $unindexed = $arg->{unindexed}) { my $oidbin = pack('H*', $oid); my $u = $unindexed->{$oidbin}; ($num, $mid0) = splice(@$u, 0, 2) if $u; if (defined $num) { $self->{mm}->mid_set($num, $mid0); if (scalar(@$u) == 0) { # done with current OID delete $unindexed->{$oidbin}; delete($arg->{unindexed}) if !keys(%$unindexed); } } } my $oidx = $self->{oidx}; if (!defined($num)) { # reuse if reindexing (or duplicates) for my $mid (@$mids) { ($num, $mid0) = $oidx->num_mid0_for_oid($oid, $mid); last if defined $num; } } $mid0 //= do { # is this a number we got before? $num = $arg->{mm_tmp}->num_for($mids->[0]); # don't clobber existing if Message-ID is reused: if (my $x = defined($num) ? $oidx->get_art($num) : undef) { undef($num) if $x->{blob} ne $oid; } defined($num) ? $mids->[0] : undef; }; if (!defined($num)) { for (my $i = $#$mids; $i >= 1; $i--) { $num = $arg->{mm_tmp}->num_for($mids->[$i]); if (defined($num)) { $mid0 = $mids->[$i]; last; } } } if (defined($num)) { $arg->{mm_tmp}->num_delete($num); } else { # never seen $num = $self->{mm}->mid_insert($mids->[0]); if (defined($num)) { $mid0 = $mids->[0]; } else { # rare, try the rest of them, backwards for (my $i = $#$mids; $i >= 1; $i--) { $num = $self->{mm}->mid_insert($mids->[$i]); if (defined($num)) { $mid0 = $mids->[$i]; last; } } } } if (!defined($num)) { warn "E: $oid <", join('> <', @$mids), "> is a duplicate\n"; return; } ++${$arg->{nr}}; my $smsg = bless { num => $num, blob => $oid, mid => $mid0, }, 'PublicInbox::Smsg'; $smsg->populate($eml, $arg); $smsg->set_bytes($$bref, $size); if (do_idx($self, $eml, $smsg)) { ${$arg->{need_checkpoint}} = 1; } index_finalize($arg, 1); } # only update last_commit for $i on reindex iff newer than current sub update_last_commit { my ($self, $sync, $stk) = @_; my $unit = $sync->{unit} // return; my $latest_cmt = $stk ? $stk->{latest_cmt} : ${$sync->{latest_cmt}}; defined($latest_cmt) or return; my $last = last_epoch_commit($self, $unit->{epoch}); if (defined $last && is_ancestor($self->git, $last, $latest_cmt)) { my @cmd = (qw(rev-list --count), "$last..$latest_cmt"); chomp(my $n = $unit->{git}->qx(@cmd)); return if $n ne '' && $n == 0; } # don't rewind if --{since,until,before,after} are in use return if (defined($last) && grep(defined, @{$sync->{-opt}}{qw(since until)}) && is_ancestor($self->git, $latest_cmt, $last)); last_epoch_commit($self, $unit->{epoch}, $latest_cmt); } sub last_commits { my ($self, $sync) = @_; my $heads = []; for (my $i = $sync->{epoch_max}; $i >= 0; $i--) { $heads->[$i] = last_epoch_commit($self, $i); } $heads; } # returns a revision range for git-log(1) sub log_range ($$$) { my ($sync, $unit, $tip) = @_; my $opt = $sync->{-opt}; my $pr = $opt->{-progress} if (($opt->{verbose} || 0) > 1); my $i = $unit->{epoch}; my $cur = $sync->{ranges}->[$i] or do { $pr->("$i.git indexing all of $tip\n") if $pr; return $tip; # all of it }; # fast equality check to avoid (v)fork+execve overhead if ($cur eq $tip) { $sync->{ranges}->[$i] = undef; return; } my $range = "$cur..$tip"; $pr->("$i.git checking contiguity... ") if $pr; my $git = $unit->{git}; if (is_ancestor($sync->{self}->git, $cur, $tip)) { # common case $pr->("OK\n") if $pr; my $n = $git->qx(qw(rev-list --count), $range); chomp($n); if ($n == 0) { $sync->{ranges}->[$i] = undef; $pr->("$i.git has nothing new\n") if $pr; return; # nothing to do } $pr->("$i.git has $n changes since $cur\n") if $pr; } else { $pr->("FAIL\n") if $pr; warn <<""; discontiguous range: $range Rewritten history? (in $git->{git_dir}) chomp(my $base = $git->qx('merge-base', $tip, $cur)); if ($base) { $range = "$base..$tip"; warn "found merge-base: $base\n" } else { $range = $tip; warn "discarding history at $cur\n"; } warn <<""; reindexing $git->{git_dir} starting at $range # $cur^0 may no longer exist if pruned by git if ($git->qx(qw(rev-parse -q --verify), "$cur^0")) { $unit->{unindex_range} = "$base..$cur"; } elsif ($base && $git->qx(qw(rev-parse -q --verify), $base)) { $unit->{unindex_range} = "$base.."; } else { warn "W: unable to unindex before $range\n"; } } $range; } # overridden by ExtSearchIdx sub artnum_max { $_[0]->{mm}->num_highwater } sub sync_prepare ($$) { my ($self, $sync) = @_; $sync->{ranges} = sync_ranges($self, $sync); my $pr = $sync->{-opt}->{-progress}; my $regen_max = 0; my $head = $sync->{ibx}->{ref_head} || 'HEAD'; my $pfx; if ($pr) { ($pfx) = ($sync->{ibx}->{inboxdir} =~ m!([^/]+)\z!g); $pfx //= $sync->{ibx}->{inboxdir}; } my $reindex_heads; if ($self->{ibx_map}) { # ExtSearchIdx won't index messages unless they're in # over.sqlite3 for a given inbox, so don't read beyond # what's in the per-inbox index. $reindex_heads = []; my $v = PublicInbox::Search::SCHEMA_VERSION; my $mm = $sync->{ibx}->mm; for my $i (0..$sync->{epoch_max}) { $reindex_heads->[$i] = $mm->last_commit_xap($v, $i); } } elsif ($sync->{reindex}) { # V2 inbox # reindex stops at the current heads and we later # rerun index_sync without {reindex} $reindex_heads = $self->last_commits($sync); } if ($sync->{max_size} = $sync->{-opt}->{max_size}) { $sync->{index_oid} = $self->can('index_oid'); } my $git_pfx = "$sync->{ibx}->{inboxdir}/git"; for (my $i = $sync->{epoch_max}; $i >= 0; $i--) { my $git_dir = "$git_pfx/$i.git"; -d $git_dir or next; # missing epochs are fine my $git = PublicInbox::Git->new($git_dir); my $unit = { git => $git, epoch => $i }; my $tip; if ($reindex_heads) { $tip = $head = $reindex_heads->[$i] or next; } else { $tip = $git->qx(qw(rev-parse -q --verify), $head); next if $?; # new repo chomp $tip; } my $range = log_range($sync, $unit, $tip) or next; # can't use 'rev-list --count' if we use --diff-filter $pr->("$pfx $i.git counting $range ... ") if $pr; # Don't bump num_highwater on --reindex by using {D}. # We intentionally do NOT use {D} in the non-reindex case # because we want NNTP article number gaps from unindexed # messages to show up in mirrors, too. $sync->{D} //= $sync->{reindex} ? {} : undef; # OID_BIN => NR my $stk = log2stack($sync, $git, $range); return 0 if $sync->{quit}; my $nr = $stk ? $stk->num_records : 0; $pr->("$nr\n") if $pr; $unit->{stack} = $stk; # may be undef unshift @{$sync->{todo}}, $unit; $regen_max += $nr; } return 0 if $sync->{quit}; # XXX this should not happen unless somebody bypasses checks in # our code and blindly injects "d" file history into git repos if (my @leftovers = keys %{delete($sync->{D}) // {}}) { warn('W: unindexing '.scalar(@leftovers)." leftovers\n"); local $self->{current_info} = 'leftover '; my $unindex_oid = $self->can('unindex_oid'); for my $oid (@leftovers) { last if $sync->{quit}; $oid = unpack('H*', $oid); my $req = { %$sync, oid => $oid }; $self->git->cat_async($oid, $unindex_oid, $req); } $self->git->async_wait_all; } return 0 if $sync->{quit}; if (!$regen_max) { $sync->{-regen_fmt} = "%u/?\n"; return 0; } # reindex should NOT see new commits anymore, if we do, # it's a problem and we need to notice it via die() my $pad = length($regen_max) + 1; $sync->{-regen_fmt} = "% ${pad}u/$regen_max\n"; $sync->{nr} = \(my $nr = 0); return -1 if $sync->{reindex}; $regen_max + $self->artnum_max || 0; } sub unindex_oid_aux ($$$) { my ($self, $oid, $mid) = @_; my @removed = $self->{oidx}->remove_oid($oid, $mid); return unless $self->{-need_xapian}; for my $num (@removed) { idx_shard($self, $num)->ipc_do('xdb_remove', $num); } } sub unindex_oid ($$;$) { # git->cat_async callback my ($bref, $oid, $type, $size, $arg) = @_; is_bad_blob($oid, $type, $size, $arg->{oid}) and return index_finalize($arg, 0); my $self = $arg->{self}; local $self->{current_info} = "$self->{current_info} $oid"; my $unindexed = $arg->{in_unindex} ? $arg->{unindexed} : undef; my $mm = $self->{mm}; my $mids = mids(PublicInbox::Eml->new($bref)); undef $$bref; my $oidx = $self->{oidx}; foreach my $mid (@$mids) { my %gone; my ($id, $prev); while (my $smsg = $oidx->next_by_mid($mid, \$id, \$prev)) { $gone{$smsg->{num}} = 1 if $oid eq $smsg->{blob}; } my $n = scalar(keys(%gone)) or next; if ($n > 1) { warn "BUG: multiple articles linked to $oid\n", join(',',sort keys %gone), "\n"; } # reuse (num => mid) mapping in ascending numeric order for my $num (sort { $a <=> $b } keys %gone) { $num += 0; if ($unindexed) { my $mid0 = $mm->mid_for($num); my $oidbin = pack('H*', $oid); push @{$unindexed->{$oidbin}}, $num, $mid0; } $mm->num_delete($num); } unindex_oid_aux($self, $oid, $mid); } index_finalize($arg, 0); } sub git { $_[0]->{ibx}->git } # this is rare, it only happens when we get discontiguous history in # a mirror because the source used -purge or -edit sub unindex_todo ($$$) { my ($self, $sync, $unit) = @_; my $unindex_range = delete($unit->{unindex_range}) // return; my $unindexed = $sync->{unindexed} //= {}; # $oidbin => [$num, $mid0] my $before = scalar keys %$unindexed; # order does not matter, here: my $fh = $unit->{git}->popen(qw(log --raw -r --no-notes --no-color --no-abbrev --no-renames), $unindex_range); local $sync->{in_unindex} = 1; my $unindex_oid = $self->can('unindex_oid'); while (<$fh>) { /\A:\d{6} 100644 $OID ($OID) [AM]\tm$/o or next; $self->git->cat_async($1, $unindex_oid, { %$sync, oid => $1 }); } close $fh or die "git log failed: \$?=$?"; $self->git->async_wait_all; return unless $sync->{-opt}->{prune}; my $after = scalar keys %$unindexed; return if $before == $after; # ensure any blob can not longer be accessed via dumb HTTP run_die(['git', "--git-dir=$unit->{git}->{git_dir}", qw(-c gc.reflogExpire=now gc --prune=all --quiet)]); } sub sync_ranges ($$) { my ($self, $sync) = @_; my $reindex = $sync->{reindex}; return $self->last_commits($sync) unless $reindex; return [] if ref($reindex) ne 'HASH'; my $ranges = $reindex->{from}; # arrayref; if (ref($ranges) ne 'ARRAY') { die 'BUG: $reindex->{from} not an ARRAY'; } $ranges; } sub index_xap_only { # git->cat_async callback my ($bref, $oid, $type, $size, $smsg) = @_; my $self = delete $smsg->{self}; my $idx = idx_shard($self, $smsg->{num}); $idx->index_eml(PublicInbox::Eml->new($bref), $smsg); $self->{transact_bytes} += $smsg->{bytes}; } sub index_xap_step ($$$;$) { my ($self, $sync, $beg, $step) = @_; my $end = $sync->{art_end}; return if $beg > $end; # nothing to do $step //= $self->{shards}; my $ibx = $self->{ibx}; if (my $pr = $sync->{-opt}->{-progress}) { $pr->("Xapian indexlevel=$ibx->{indexlevel} ". "$beg..$end (% $step)\n"); } for (my $num = $beg; $num <= $end; $num += $step) { last if $sync->{quit}; my $smsg = $ibx->over->get_art($num) or next; $smsg->{self} = $self; $ibx->git->cat_async($smsg->{blob}, \&index_xap_only, $smsg); if ($self->{transact_bytes} >= $self->{batch_bytes}) { ${$sync->{nr}} = $num; reindex_checkpoint($self, $sync); } } } sub index_todo ($$$) { my ($self, $sync, $unit) = @_; return if $sync->{quit}; unindex_todo($self, $sync, $unit); my $stk = delete($unit->{stack}) or return; my $all = $self->git; my $index_oid = $self->can('index_oid'); my $unindex_oid = $self->can('unindex_oid'); my $pfx; if ($unit->{git}->{git_dir} =~ m!/([^/]+)/git/([0-9]+\.git)\z!) { $pfx = "$1 $2"; # v2 } else { # v1 ($pfx) = ($unit->{git}->{git_dir} =~ m!/([^/]+)\z!g); $pfx //= $unit->{git}->{git_dir}; } local $self->{current_info} = "$pfx "; local $sync->{latest_cmt} = \(my $latest_cmt); local $sync->{unit} = $unit; while (my ($f, $at, $ct, $oid, $cmt) = $stk->pop_rec) { if ($sync->{quit}) { warn "waiting to quit...\n"; $all->async_wait_all; $self->update_last_commit($sync); return; } my $req = { %$sync, autime => $at, cotime => $ct, oid => $oid, cur_cmt => $cmt }; if ($f eq 'm') { if ($sync->{max_size}) { $all->check_async($oid, \&check_size, $req); } else { $all->cat_async($oid, $index_oid, $req); } } elsif ($f eq 'd') { $all->cat_async($oid, $unindex_oid, $req); } if (${$sync->{need_checkpoint}}) { reindex_checkpoint($self, $sync); } } $all->async_wait_all; $self->update_last_commit($sync, $stk); } sub xapian_only { my ($self, $opt, $sync, $art_beg) = @_; my $seq = $opt->{'sequential-shard'}; $art_beg //= 0; local $self->{parallel} = 0 if $seq; $self->idx_init($opt); # acquire lock if (my $art_end = $self->{ibx}->mm->max) { $sync //= { need_checkpoint => \(my $bool = 0), -opt => $opt, self => $self, nr => \(my $nr = 0), -regen_fmt => "%u/?\n", }; $sync->{art_end} = $art_end; if ($seq || !$self->{parallel}) { my $shard_end = $self->{shards} - 1; for my $i (0..$shard_end) { last if $sync->{quit}; index_xap_step($self, $sync, $art_beg + $i); if ($i != $shard_end) { reindex_checkpoint($self, $sync); } } } else { # parallel (maybe) index_xap_step($self, $sync, $art_beg, 1); } } $self->git->async_wait_all; $self->{ibx}->cleanup; $self->done; } # public, called by public-inbox-index sub index_sync { my ($self, $opt) = @_; $opt //= {}; return xapian_only($self, $opt) if $opt->{xapian_only}; my $epoch_max; my $latest = $self->{ibx}->git_dir_latest(\$epoch_max) // return; if ($opt->{'fast-noop'}) { # nanosecond (st_ctim) comparison use Time::HiRes qw(stat); if (my @mm = stat("$self->{ibx}->{inboxdir}/msgmap.sqlite3")) { my $c = $mm[10]; # 10 = ctime (nsec NV) my @hd = stat("$latest/refs/heads"); my @pr = stat("$latest/packed-refs"); return if $c > ($hd[10] // 0) && $c > ($pr[10] // 0); } } my $pr = $opt->{-progress}; my $seq = $opt->{'sequential-shard'}; my $art_beg; # the NNTP article number we start xapian_only at my $idxlevel = $self->{ibx}->{indexlevel}; local $self->{ibx}->{indexlevel} = 'basic' if $seq; $self->idx_init($opt); # acquire lock $self->{mg}->fill_alternates; $self->{oidx}->rethread_prepare($opt); my $sync = { need_checkpoint => \(my $bool = 0), reindex => $opt->{reindex}, -opt => $opt, self => $self, ibx => $self->{ibx}, epoch_max => $epoch_max, }; my $quit = PublicInbox::SearchIdx::quit_cb($sync); local $SIG{QUIT} = $quit; local $SIG{INT} = $quit; local $SIG{TERM} = $quit; if (sync_prepare($self, $sync)) { # tmp_clone seems to fail if inside a transaction, so # we rollback here (because we opened {mm} for reading) # Note: we do NOT rely on DBI transactions for atomicity; # only for batch performance. $self->{mm}->{dbh}->rollback; $self->{mm}->{dbh}->begin_work; $sync->{mm_tmp} = $self->{mm}->tmp_clone($self->{ibx}->{inboxdir}); # xapian_only works incrementally w/o --reindex if ($seq && !$opt->{reindex}) { $art_beg = $sync->{mm_tmp}->max || -1; $art_beg++; } } # work forwards through history index_todo($self, $sync, $_) for @{delete($sync->{todo}) // []}; $self->{oidx}->rethread_done($opt) unless $sync->{quit}; $self->done; if (my $nr = $sync->{nr}) { my $pr = $sync->{-opt}->{-progress}; $pr->('all.git '.sprintf($sync->{-regen_fmt}, $$nr)) if $pr; } my $quit_warn; # deal with Xapian shards sequentially if ($seq && delete($sync->{mm_tmp})) { if ($sync->{quit}) { $quit_warn = 1; } else { $self->{ibx}->{indexlevel} = $idxlevel; xapian_only($self, $opt, $sync, $art_beg); $quit_warn = 1 if $sync->{quit}; } } # --reindex on the command-line if (!$sync->{quit} && $opt->{reindex} && !ref($opt->{reindex}) && $idxlevel ne 'basic') { $self->lock_acquire; my $s0 = PublicInbox::SearchIdx->new($self->{ibx}, 0, 0); if (my $xdb = $s0->idx_acquire) { my $n = $xdb->get_metadata('has_threadid'); $xdb->set_metadata('has_threadid', '1') if $n ne '1'; } $s0->idx_release; $self->lock_release; } # reindex does not pick up new changes, so we rerun w/o it: if ($opt->{reindex} && !$sync->{quit} && !grep(defined, @$opt{qw(since until)})) { my %again = %$opt; $sync = undef; delete @again{qw(rethread reindex -skip_lock)}; index_sync($self, \%again); $opt->{quit} = $again{quit}; # propagate to caller } warn <{lei}) { $lei->_lei_atfork_child; my $pkt_op_p = delete $lei->{pkt_op_p}; close($pkt_op_p->{op_p}); } $self->SUPER::ipc_atfork_child; } 1; public-inbox-1.9.0/lib/PublicInbox/View.pm000066400000000000000000001065421430031475700203640ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # # Used for displaying the HTML web interface. # See Documentation/design_www.txt for this. package PublicInbox::View; use strict; use v5.10.1; use List::Util qw(max); use PublicInbox::MsgTime qw(msg_datestamp); use PublicInbox::Hval qw(ascii_html obfuscate_addrs prurl mid_href ts2str fmt_ts); use PublicInbox::Linkify; use PublicInbox::MID qw(id_compress mids mids_for_index references $MID_EXTRACT); use PublicInbox::MsgIter; use PublicInbox::Address; use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::Reply; use PublicInbox::ViewDiff qw(flush_diff); use PublicInbox::Eml; use Time::Local qw(timegm); use PublicInbox::Smsg qw(subject_normalized); use PublicInbox::ContentHash qw(content_hash); use constant COLS => 72; use constant INDENT => ' '; use constant TCHILD => '` '; sub th_pfx ($) { $_[0] == 0 ? '' : TCHILD }; sub msg_page_i { my ($ctx, $eml) = @_; if ($eml) { # called by WwwStream::async_eml or getline my $smsg = $ctx->{smsg}; my $over = $ctx->{ibx}->over; $ctx->{smsg} = $over ? $over->next_by_mid(@{$ctx->{next_arg}}) : $ctx->gone('over'); $ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ? "../${\mid_href($smsg->{mid})}/" : ''; my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($eml, $ctx); if (length($$obuf)) { multipart_text_as_html($eml, $ctx); $$obuf .= '

'; } delete $ctx->{obuf}; $$obuf .= html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg}; $$obuf; } else { # called by WwwStream::async_next or getline $ctx->{smsg}; # may be undef } } # /$INBOX/$MSGID/ for unindexed v1 inboxes sub no_over_html ($) { my ($ctx) = @_; my $bref = $ctx->{ibx}->msg_by_mid($ctx->{mid}) or return; # 404 my $eml = PublicInbox::Eml->new($bref); $ctx->{mhref} = ''; PublicInbox::WwwStream::init($ctx); my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($eml, $ctx); if (length($$obuf)) { multipart_text_as_html($eml, $ctx); $$obuf .= '

'; } delete $ctx->{obuf}; eval { $$obuf .= html_footer($ctx, $eml) }; html_oneshot($ctx, 200, $obuf); } # public functions: (unstable) sub msg_page { my ($ctx) = @_; my $ibx = $ctx->{ibx}; $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; my $over = $ibx->over or return no_over_html($ctx); my ($id, $prev); my $next_arg = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ]; my $smsg = $ctx->{smsg} = $over->next_by_mid(@$next_arg) or return; # undef == 404 # allow user to easily browse the range around this message if # they have ->over $ctx->{-t_max} = $smsg->{ts}; PublicInbox::WwwStream::aresponse($ctx, 200, \&msg_page_i); } # /$INBOX/$MESSAGE_ID/#R sub msg_reply ($$) { my ($ctx, $hdr) = @_; my $se_url = 'https://kernel.org/pub/software/scm/git/docs/git-send-email.html'; my $p_url = 'https://en.wikipedia.org/wiki/Posting_style#Interleaved_style'; my $info = ''; my $ibx = $ctx->{ibx}; if (my $url = $ibx->{infourl}) { $url = prurl($ctx->{env}, $url); $info = qq(\n List information: $url\n); } my ($arg, $link, $reply_to_all) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); if (ref($arg) eq 'SCALAR') { return '
'.ascii_html($$arg).'
'; } # mailto: link only works if address obfuscation is disabled if ($link) { $link = <In-Reply-To header via mailto: links, try the mailto: link EOF } push @$arg, '/path/to/YOUR_REPLY'; $arg = ascii_html(join(" \\\n ", '', @$arg)); <
Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and $reply_to_all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  $p_url
$info
* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email$arg

  $se_url
$link
Be sure your reply has a Subject: header at the top and a blank line before the message body. EOF } sub in_reply_to { my ($hdr) = @_; my $refs = references($hdr); $refs->[-1]; } sub fold_addresses ($) { return $_[0] if length($_[0]) <= COLS; # try to fold on commas after non-word chars before $lim chars, # Try to get the "," preceded by ">" or ")", but avoid folding # on the comma where somebody uses "Lastname, Firstname". # We also try to keep the last and penultimate addresses in # the list on the same line if possible, hence the extra \z # Fall back to folding on spaces at $lim + 1 chars my $lim = COLS - 8; # 8 = "\t" display width my $too_long = $lim + 1; $_[0] =~ s/\s*\z//s; # Email::Simple doesn't strip trailing spaces $_[0] = join("\n\t", ($_[0] =~ /(.{0,$lim}\W(?:,|\z)| .{1,$lim}(?:,|\z)| .{1,$lim}| .{$too_long,}?)(?:\s|\z)/xgo)); } sub _hdr_names_html ($$) { my ($hdr, $field) = @_; my @vals = $hdr->header($field) or return ''; ascii_html(join(', ', PublicInbox::Address::names(join(',', @vals)))); } sub nr_to_s ($$$) { my ($nr, $singular, $plural) = @_; return "0 $plural" if $nr == 0; $nr == 1 ? "$nr $singular" : "$nr $plural"; } # Displays the text of of the message for /$INBOX/$MSGID/[Tt]/ endpoint # this is already inside a
sub eml_entry {
	my ($ctx, $eml) = @_;
	my $smsg = delete $ctx->{smsg};
	my $subj = delete $smsg->{subject};
	my $mid_raw = $smsg->{mid};
	my $id = id_compress($mid_raw, 1);
	my $id_m = 'm'.$id;
	my $root_anchor = $ctx->{root_anchor} || '';
	my $irt;
	my $obfs_ibx = $ctx->{-obfs_ibx};

	$subj = '(no subject)' if $subj eq '';
	my $rv = "* ";
	$subj = ''.ascii_html($subj).'';
	obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
	$subj = "$subj" if $root_anchor eq $id_m;
	$rv .= $subj . "\n";
	$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
	my @tocc;
	my $ds = delete $smsg->{ds}; # for v1 non-Xapian/SQLite users

	# Deleting these fields saves about 400K as we iterate across 1K msgs
	delete @$smsg{qw(ts blob)};

	my $from = _hdr_names_html($eml, 'From');
	obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
	$rv .= "From: $from @ ".fmt_ts($ds)." UTC";
	my $upfx = $ctx->{-upfx};
	my $mhref = $upfx . mid_href($mid_raw) . '/';
	$rv .= qq{ (permalink / };
	$rv .= qq{raw)\n};
	my $to = fold_addresses(_hdr_names_html($eml, 'To'));
	my $cc = fold_addresses(_hdr_names_html($eml, 'Cc'));
	my ($tlen, $clen) = (length($to), length($cc));
	my $to_cc = '';
	if (($tlen + $clen) > COLS) {
		$to_cc .= '  To: '.$to."\n" if $tlen;
		$to_cc .= '  Cc: '.$cc."\n" if $clen;
	} else {
		if ($tlen) {
			$to_cc .= '  To: '.$to;
			$to_cc .= '; +Cc: '.$cc if $clen;
		} else {
			$to_cc .= '  Cc: '.$cc if $clen;
		}
		$to_cc .= "\n";
	}
	obfuscate_addrs($obfs_ibx, $to_cc) if $obfs_ibx;
	$rv .= $to_cc;

	my $mapping = $ctx->{mapping};
	if (!$mapping && (defined($irt) || defined($irt = in_reply_to($eml)))) {
		my $href = $upfx . mid_href($irt) . '/';
		my $html = ascii_html($irt);
		$rv .= qq(In-Reply-To: <$html>\n)
	}
	$rv .= "\n";

	# scan through all parts, looking for displayable text
	$ctx->{mhref} = $mhref;
	$ctx->{obuf} = \$rv;
	$eml->each_part(\&add_text_body, $ctx, 1);
	delete $ctx->{obuf};

	# add the footer
	$rv .= "\n^ ".
		"permalink" .
		" raw" .
		" reply";

	my $hr;
	if (defined(my $pct = $smsg->{pct})) { # used by SearchView.pm
		$rv .= "\t[relevance $pct%]";
		$hr = 1;
	} elsif ($mapping) {
		my $nested = 'nested';
		my $flat = 'flat';
		if ($ctx->{flat}) {
			$hr = 1;
			$flat = "$flat";
		} else {
			$nested = "$nested";
		}
		$rv .= "\t[$flat";
		$rv .= "|$nested]";
		$rv .= " $ctx->{s_nr}";
	} else {
		$hr = $ctx->{-hr};
	}

	# do we have more messages? start a new 
 if so
	$rv .= scalar(@{$ctx->{msgs}}) ? '

' : '
' if $hr; $rv; } sub pad_link ($$;$) { my ($mid, $level, $s) = @_; $s ||= '...'; my $href = defined($mid) ? ("($s)\n") : "($s)\n"; (' 'x19).indent_for($level).th_pfx($level).$href; } sub _skel_hdr { # my ($mapping, $mid) = @_; ($_[0]->{$_[1] // \'bogus'} // [ "(?)\n" ])->[0]; } sub _th_index_lite { my ($mid_raw, $irt, $id, $ctx) = @_; my $rv = ''; my $mapping = $ctx->{mapping} or return $rv; my $pad = ' '; my $mid_map = $mapping->{$mid_raw}; defined $mid_map or return 'public-inbox BUG: '.ascii_html($mid_raw).' not mapped'; my ($attr, $node, $idx, $level) = @$mid_map; my $children = $node->{children}; my $nr_c = scalar @$children; my $nr_s = 0; my $siblings; # delete saves about 200KB on a 1K message thread if (my $refs = delete $node->{references}) { ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o); } my $irt_map = $mapping->{$$irt} if defined $$irt; if (defined $irt_map) { $siblings = $irt_map->[1]->{children}; $nr_s = scalar(@$siblings) - 1; $rv .= $pad . $irt_map->[0]; if ($idx > 0) { my $prev = $siblings->[$idx - 1]; my $pmid = $prev->{mid}; if ($idx > 2) { my $s = ($idx - 1). ' preceding siblings ...'; $rv .= pad_link($pmid, $level, $s); } elsif ($idx == 2) { $rv .= $pad . _skel_hdr($mapping, $siblings->[0] ? $siblings->[0]->{mid} : undef); } $rv .= $pad . _skel_hdr($mapping, $pmid); } } my $s_s = nr_to_s($nr_s, 'sibling', 'siblings'); my $s_c = nr_to_s($nr_c, 'reply', 'replies'); $attr =~ s!\n\z!\n!s; $attr =~ s! (?:" )?!!s; # no point in dup subject $attr =~ s!]+>([^<]+)!$1!s; # no point linking to self $rv .= "@ $attr"; if ($nr_c) { my $cmid = $children->[0] ? $children->[0]->{mid} : undef; $rv .= $pad . _skel_hdr($mapping, $cmid); if ($nr_c > 2) { my $s = ($nr_c - 1). ' more replies'; $rv .= pad_link($cmid, $level + 1, $s); } elsif (my $cn = $children->[1]) { $rv .= $pad . _skel_hdr($mapping, $cn->{mid}); } } my $next = $siblings->[$idx+1] if $siblings && $idx >= 0; if ($next) { my $nmid = $next->{mid}; $rv .= $pad . _skel_hdr($mapping, $nmid); my $nnext = $nr_s - $idx; if ($nnext > 2) { my $s = ($nnext - 1).' subsequent siblings'; $rv .= pad_link($nmid, $level, $s); } elsif (my $nn = $siblings->[$idx + 2]) { $rv .= $pad . _skel_hdr($mapping, $nn->{mid}); } } $rv .= $pad ."$s_s, $s_c; $ctx->{s_nr}\n"; } # non-recursive thread walker sub walk_thread ($$$) { my ($rootset, $ctx, $cb) = @_; my @q = map { (0, $_, -1) } @$rootset; while (@q) { my ($level, $node, $i) = splice(@q, 0, 3); defined $node or next; $cb->($ctx, $level, $node, $i) or return; ++$level; $i = 0; unshift @q, map { ($level, $_, $i++) } @{$node->{children}}; } } sub pre_thread { # walk_thread callback my ($ctx, $level, $node, $idx) = @_; $ctx->{mapping}->{$node->{mid}} = [ '', $node, $idx, $level ]; skel_dump($ctx, $level, $node); } sub thread_eml_entry { my ($ctx, $eml) = @_; my ($beg, $end) = thread_adj_level($ctx, $ctx->{level}); $beg . '
' . eml_entry($ctx, $eml) . '
' . $end; } sub next_in_queue ($$) { my ($q, $ghost_ok) = @_; while (@$q) { my ($level, $smsg) = splice(@$q, 0, 2); my $cl = $level + 1; unshift @$q, map { ($cl, $_) } @{$smsg->{children}}; return ($level, $smsg) if $ghost_ok || exists($smsg->{blob}); } undef; } sub stream_thread_i { # PublicInbox::WwwStream::getline callback my ($ctx, $eml) = @_; return thread_eml_entry($ctx, $eml) if $eml; return unless exists($ctx->{skel}); my $ghost_ok = $ctx->{nr}++; while (1) { my ($lvl, $smsg) = next_in_queue($ctx->{-queue}, $ghost_ok); if ($smsg) { if (exists $smsg->{blob}) { # next message for cat-file $ctx->{level} = $lvl; if (!$ghost_ok) { # first non-ghost $ctx->{-title_html} = ascii_html($smsg->{subject}); $ctx->zmore($ctx->html_top); } return $smsg; } # buffer the ghost entry and loop $ctx->zmore(ghost_index_entry($ctx, $lvl, $smsg)); } else { # all done $ctx->zmore(join('', thread_adj_level($ctx, 0))); $ctx->zmore(${delete($ctx->{skel})}); return; } } } sub stream_thread ($$) { my ($rootset, $ctx) = @_; @{$ctx->{-queue}} = map { (0, $_) } @$rootset; PublicInbox::WwwStream::aresponse($ctx, 200, \&stream_thread_i); } # /$INBOX/$MSGID/t/ and /$INBOX/$MSGID/T/ sub thread_html { my ($ctx) = @_; $ctx->{-upfx} = '../../'; my $mid = $ctx->{mid}; my $ibx = $ctx->{ibx}; my ($nr, $msgs) = $ibx->over->get_thread($mid); return missing_thread($ctx) if $nr == 0; # link $INBOX_DIR/description text to "index_topics" view around # the newest message in this thread my $t = ts2str($ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs)); my $t_fmt = fmt_ts($ctx->{-t_max}); my $skel = '
';
	$skel .= $nr == 1 ? 'only message in thread' : 'end of thread';
	$skel .= <~$t_fmt UTC | newest]

EOF
	$skel .= "Thread overview: ";
	$skel .= $nr == 1 ? '(only message)' : "$nr+ messages";
	$skel .= " (download: mbox.gz";
	$skel .= " / follow: Atom feed)\n";
	$skel .= "-- links below jump to the message on this page --\n";
	$ctx->{cur_level} = 0;
	$ctx->{skel} = \$skel;
	$ctx->{prev_attr} = '';
	$ctx->{prev_level} = 0;
	$ctx->{root_anchor} = 'm' . id_compress($mid, 1);
	$ctx->{mapping} = {}; # mid -> [ header_summary, node, idx, level ]
	$ctx->{s_nr} = ($nr > 1 ? "$nr+ messages" : 'only message')
	               .' in thread';

	my $rootset = thread_results($ctx, $msgs);

	# reduce hash lookups in pre_thread->skel_dump
	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
	walk_thread($rootset, $ctx, \&pre_thread);

	$skel .= '
'; return stream_thread($rootset, $ctx) unless $ctx->{flat}; # flat display: lazy load the full message from smsg $ctx->{msgs} = $msgs; $ctx->{-html_tip} = '
';
	PublicInbox::WwwStream::aresponse($ctx, 200, \&thread_html_i);
}

sub thread_html_i { # PublicInbox::WwwStream::getline callback
	my ($ctx, $eml) = @_;
	if ($eml) {
		my $smsg = $ctx->{smsg};
		if (exists $ctx->{-html_tip}) {
			$ctx->{-title_html} = ascii_html($smsg->{subject});
			$ctx->zmore($ctx->html_top);
		}
		return eml_entry($ctx, $eml);
	} else {
		while (my $smsg = shift @{$ctx->{msgs}}) {
			return $smsg if exists($smsg->{blob});
		}
		my $skel = delete($ctx->{skel}) or return; # all done
		$ctx->zmore($$skel);
		undef;
	}
}

sub multipart_text_as_html {
	# ($mime, $ctx) = @_; # each_part may do "$_[0] = undef"

	# scan through all parts, looking for displayable text
	$_[0]->each_part(\&add_text_body, $_[1], 1);
}

sub submsg_hdr ($$) {
	my ($ctx, $eml) = @_;
	my $obfs_ibx = $ctx->{-obfs_ibx};
	my $rv = $ctx->{obuf};
	$$rv .= "\n";
	for my $h (qw(From To Cc Subject Date Message-ID X-Alt-Message-ID)) {
		my @v = $eml->header($h);
		for my $v (@v) {
			obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
			$v = ascii_html($v);
			$$rv .= "$h: $v\n";
		}
	}
}

sub attach_link ($$$$;$) {
	my ($ctx, $ct, $p, $fn, $err) = @_;
	my ($part, $depth, $idx) = @$p;

	# Eml iteration clobbers multipart ->{bdy}, so do not offer
	# downloads for 0-byte multipart attachments
	return unless $part->{bdy};

	my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...)
	my $size = length($part->body);
	delete $part->{bdy}; # save memory

	# hide attributes normally, unless we want to aid users in
	# spotting MUA problems:
	$ct =~ s/;.*// unless $err;
	$ct = ascii_html($ct);
	my $sfn;
	if (defined $fn && $fn =~ /\A$PublicInbox::Hval::FN\z/o) {
		$sfn = $fn;
	} elsif ($ct eq 'text/plain') {
		$sfn = 'a.txt';
	} else {
		$sfn = 'a.bin';
	}
	my $rv = $ctx->{obuf};
	$$rv .= qq($nl{mhref}$idx-$sfn">);
	if ($err) {
		$$rv .= <header('Content-Description') // $fn // '';
	$desc = ascii_html($desc);
	$$rv .= ($desc eq '') ? "$ts --]" : "$desc --]\n[-- $ts --]";
	$$rv .= "\n";

	submsg_hdr($ctx, $part) if $part->{is_submsg};

	undef;
}

sub add_text_body { # callback for each_part
	my ($p, $ctx) = @_;
	my $upfx = $ctx->{mhref};
	my $ibx = $ctx->{ibx};
	my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new;
	# $p - from each_part: [ Email::MIME-like, depth, $idx ]
	my ($part, $depth, $idx) = @$p;
	my $ct = $part->content_type || 'text/plain';
	my $fn = $part->filename;
	my ($s, $err) = msg_part_text($part, $ct);
	return attach_link($ctx, $ct, $p, $fn) unless defined $s;

	my $rv = $ctx->{obuf};
	if ($part->{is_submsg}) {
		submsg_hdr($ctx, $part);
		$$rv .= "\n";
	}

	# makes no difference to browsers, and don't screw up filename
	# link generation in diffs with the extra '%0D'
	$s =~ s/\r+\n/\n/sg;

	# will be escaped to `•' in HTML
	obfuscate_addrs($ibx, $s, "\x{2022}") if $ibx->{obfuscate};

	# always support diff-highlighting, but we can't linkify hunk
	# headers for solver unless some coderepo are configured:
	my $diff;
	if ($s =~ /^--- [^\n]+\n\+{3} [^\n]+\n@@ /ms) {
		# diffstat anchors do not link across attachments or messages,
		# -apfx is just a stable prefix for making diffstat anchors
		# linkable to the first diff hunk w/o crossing attachments
		$idx =~ tr!.!/!; # compatibility with previous versions
		$ctx->{-apfx} = $upfx . $idx;

		# do attr => filename mappings for diffstats in git diffs:
		$ctx->{-anchors} = {} if $s =~ /^diff --git /sm;
		$diff = 1;
		delete $ctx->{-long_path};
		my $spfx;
		# absolute URL (Atom feeds)
		if ($ibx->{coderepo}) {
			if (index($upfx, '//') >= 0) {
				$spfx = $upfx;
				$spfx =~ s!/([^/]*)/\z!/!;
			} else {
				my $n_slash = $upfx =~ tr!/!/!;
				if ($n_slash == 0) {
					$spfx = '../';
				} elsif ($n_slash == 1) {
					$spfx = '';
				} else { # nslash == 2
					$spfx = '../../';
				}
			}
		}
		$ctx->{-spfx} = $spfx;
	};

	# split off quoted and unquoted blocks:
	my @sections = PublicInbox::MsgIter::split_quotes($s);
	undef $s; # free memory
	if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
		# badly-encoded message with $err? tell the world about it!
		attach_link($ctx, $ct, $p, $fn, $err);
		$$rv .= "\n";
	}
	delete $part->{bdy}; # save memory
	foreach my $cur (@sections) {
		if ($cur =~ /\A>/) {
			# we use a  here to allow users to specify
			# their own color for quoted text
			$$rv .= qq();
			$$rv .= $l->to_html($cur);
			$$rv .= '';
		} elsif ($diff) {
			flush_diff($ctx, \$cur);
		} else {
			# regular lines, OK
			$$rv .= $l->to_html($cur);
		}
		undef $cur; # free memory
	}
}

sub _msg_page_prepare_obuf {
	my ($eml, $ctx) = @_;
	my $over = $ctx->{ibx}->over;
	my $obfs_ibx = $ctx->{-obfs_ibx};
	my $rv = '';
	my $mids = mids_for_index($eml);
	my $nr = $ctx->{nr}++;
	if ($nr) { # unlikely
		if ($ctx->{chash} eq content_hash($eml)) {
			warn "W: BUG? @$mids not deduplicated properly\n";
			return \$rv;
		}
		$rv .=
"
WARNING: multiple messages have this Message-ID\n
"; $rv .= '
';
	} else {
		$ctx->{first_hdr} = $eml->header_obj;
		$ctx->{chash} = content_hash($eml) if $ctx->{smsg}; # reused MID
		$rv .= ""; # anchor for body start
	}
	$ctx->{-upfx} = '../' if $over;
	my @title; # (Subject[0], From[0])
	for my $v ($eml->header('From')) {
		my @n = PublicInbox::Address::names($v);
		$v = ascii_html($v);
		$title[1] //= ascii_html(join(', ', @n));
		if ($obfs_ibx) {
			obfuscate_addrs($obfs_ibx, $v);
			obfuscate_addrs($obfs_ibx, $title[1]);
		}
		$rv .= "From: $v\n" if $v ne '';
	}
	foreach my $h (qw(To Cc)) {
		for my $v ($eml->header($h)) {
			fold_addresses($v);
			$v = ascii_html($v);
			obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
			$rv .= "$h: $v\n" if $v ne '';
		}
	}
	my @subj = $eml->header('Subject');
	if (@subj) {
		my $v = ascii_html(shift @subj);
		obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
		$rv .= 'Subject: ';
		$rv .= $over ? qq($v\n) : "$v\n";
		$title[0] = $v;
		for $v (@subj) { # multi-Subject message :<
			$v = ascii_html($v);
			obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
			$rv .= "Subject: $v\n";
		}
	} else { # dummy anchor for thread skeleton at bottom of page
		$rv .= qq() if $over;
		$title[0] = '(no subject)';
	}
	for my $v ($eml->header('Date')) {
		$v = ascii_html($v);
		obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P
		$rv .= qq{Date: $v\t[thread overview]\n};
	}
	if (!$nr) { # first (and only) message, common case
		$ctx->{-title_html} = join(' - ', @title);
		$rv = $ctx->html_top . $rv;
	}
	if (scalar(@$mids) == 1) { # common case
		my $mhtml = ascii_html($mids->[0]);
		$rv .= "Message-ID: <$mhtml> ";
		$rv .= "(raw)\n";
	} else {
		# X-Alt-Message-ID can happen if a message is injected from
		# public-inbox-nntpd because of multiple Message-ID headers.
		my $lnk = PublicInbox::Linkify->new;
		my $s = '';
		for my $h (qw(Message-ID X-Alt-Message-ID)) {
			$s .= "$h: $_\n" for ($eml->header_raw($h));
		}
		$lnk->linkify_mids('..', \$s, 1);
		$rv .= $s;
	}
	$rv .= _parent_headers($eml, $over);
	$rv .= "\n";
	\$rv;
}

sub SKEL_EXPAND () {
	qq(expand[flat) .
		qq(|nested]  ) .
		qq(mbox.gz  ) .
		qq(Atom feed);
}

sub thread_skel ($$$) {
	my ($skel, $ctx, $hdr) = @_;
	my $mid = mids($hdr)->[0];
	my $ibx = $ctx->{ibx};
	my ($nr, $msgs) = $ibx->over->get_thread($mid);
	my $parent = in_reply_to($hdr);
	$$skel .= "\nThread overview: ";
	if ($nr <= 1) {
		if (defined $parent) {
			$$skel .= SKEL_EXPAND."\n ";
			$$skel .= ghost_parent('../', $parent) . "\n";
		} else {
			$$skel .= "[no followups] ".
					SKEL_EXPAND."\n";
		}
		$ctx->{next_msg} = undef;
		$ctx->{parent_msg} = $parent;
		return;
	}

	$$skel .= $nr;
	$$skel .= '+ messages / '.SKEL_EXPAND.qq!  top\n!;

	# nb: mutt only shows the first Subject in the index pane
	# when multiple Subject: headers are present, so we follow suit:
	my $subj = $hdr->header('Subject') // '';
	$subj = '(no subject)' if $subj eq '';
	$ctx->{prev_subj} = [ split(/ /, subject_normalized($subj)) ];
	$ctx->{cur} = $mid;
	$ctx->{prev_attr} = '';
	$ctx->{prev_level} = 0;
	$ctx->{skel} = $skel;

	# reduce hash lookups in skel_dump
	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
	walk_thread(thread_results($ctx, $msgs), $ctx, \&skel_dump);

	$ctx->{parent_msg} = $parent;
}

sub _parent_headers {
	my ($hdr, $over) = @_;
	my $rv = '';
	my @irt = $hdr->header_raw('In-Reply-To');
	my $refs;
	if (@irt) {
		my $lnk = PublicInbox::Linkify->new;
		$rv .= "In-Reply-To: $_\n" for @irt;
		$lnk->linkify_mids('..', \$rv);
	} else {
		$refs = references($hdr);
		my $irt = pop @$refs;
		if (defined $irt) {
			my $html = ascii_html($irt);
			my $href = mid_href($irt);
			$rv .= "In-Reply-To: <";
			$rv .= "$html>\n";
		}
	}

	# do not display References: if search is present,
	# we show the thread skeleton at the bottom, instead.
	return $rv if $over;

	$refs //= references($hdr);
	if (@$refs) {
		@$refs = map { linkify_ref_no_over($_) } @$refs;
		$rv .= 'References: '. join("\n\t", @$refs) . "\n";
	}
	$rv;
}

# returns a string buffer
sub html_footer {
	my ($ctx, $hdr) = @_;
	my $ibx = $ctx->{ibx};
	my $upfx = '../';
	my $skel;
	my $rv = '
';
	if ($ibx->over) {
		my $t = ts2str($ctx->{-t_max});
		my $t_fmt = fmt_ts($ctx->{-t_max});
		$skel .= <~$t_fmt UTC|newest]
EOF

		thread_skel(\$skel, $ctx, $hdr);
		my ($next, $prev);
		my $parent = '       ';
		$next = $prev = '    ';

		if (my $n = $ctx->{next_msg}) {
			$n = mid_href($n);
			$next = "next";
		}
		my $u;
		my $par = $ctx->{parent_msg};
		if ($par) {
			$u = mid_href($par);
			$u = "$upfx$u/";
		}
		if (my $p = $ctx->{prev_msg}) {
			$prev = mid_href($p);
			if ($p && $par && $p eq $par) {
				$prev = "prev parent';
				$parent = '';
			} else {
				$prev = "prev';
				$parent = " parent" if $u;
			}
		} elsif ($u) { # unlikely
			$parent = " parent";
		}
		$rv .= "$next $prev$parent ";
	} else { # unindexed inboxes w/o over
		$skel = qq( latest);
	}
	$rv .= qq(reply);
	$rv .= $skel;
	$rv .= '
'; $rv .= msg_reply($ctx, $hdr); } sub linkify_ref_no_over { my ($mid) = @_; my $href = mid_href($mid); my $html = ascii_html($mid); "<$html>"; } sub ghost_parent { my ($upfx, $mid) = @_; my $href = mid_href($mid); my $html = ascii_html($mid); qq{[parent not found: <$html>]}; } sub indent_for { my ($level) = @_; $level ? INDENT x ($level - 1) : ''; } sub find_mid_root { my ($ctx, $level, $node, $idx) = @_; ++$ctx->{root_idx} if $level == 0; if ($node->{mid} eq $ctx->{mid}) { $ctx->{found_mid_at} = $ctx->{root_idx}; return 0; # stop iterating } 1; } sub strict_loose_note ($) { my ($nr) = @_; my $msg = " -- strict thread matches above, loose matches on Subject: below --\n"; if ($nr > PublicInbox::Over::DEFAULT_LIMIT()) { $msg .= " -- use mbox.gz link to download all $nr messages --\n"; } $msg; } sub thread_results { my ($ctx, $msgs) = @_; require PublicInbox::SearchThread; my $rootset = PublicInbox::SearchThread::thread($msgs, \&sort_ds, $ctx); # FIXME: `tid' is broken on --reindex, so that needs to be fixed # and preserved in the future. This bug is hidden by `sid' matches # in get_thread, so we never noticed it until now. And even when # reindexing is fixed, we'll keep this code until a SCHEMA_VERSION # bump since reindexing is expensive and users may not do it # loose threading could've returned too many results, # put the root the message we care about at the top: my $mid = $ctx->{mid}; if (defined($mid) && scalar(@$rootset) > 1) { $ctx->{root_idx} = -1; my $nr = scalar @$msgs; walk_thread($rootset, $ctx, \&find_mid_root); my $idx = $ctx->{found_mid_at}; if (defined($idx) && $idx != 0) { my $tip = splice(@$rootset, $idx, 1); @$rootset = reverse @$rootset; unshift @$rootset, $tip; $ctx->{sl_note} = strict_loose_note($nr); } } $rootset } sub missing_thread { my ($ctx) = @_; require PublicInbox::ExtMsg; PublicInbox::ExtMsg::ext_msg($ctx); } sub dedupe_subject { my ($prev_subj, $subj, $val) = @_; my $omit; # '"' denotes identical text omitted my (@prev_pop, @curr_pop); while (@$prev_subj && @$subj && $subj->[-1] eq $prev_subj->[-1]) { push(@prev_pop, pop(@$prev_subj)); push(@curr_pop, pop(@$subj)); $omit //= $val; } pop @$subj if @$subj && $subj->[-1] =~ /^re:\s*/i; if (scalar(@curr_pop) == 1) { $omit = undef; push @$prev_subj, @prev_pop; push @$subj, @curr_pop; } $omit // ''; } sub skel_dump { # walk_thread callback my ($ctx, $level, $smsg) = @_; $smsg->{blob} or return _skel_ghost($ctx, $level, $smsg); my $skel = $ctx->{skel}; my $cur = $ctx->{cur}; my $mid = $smsg->{mid}; if ($level == 0 && $ctx->{skel_dump_roots}++) { $$skel .= delete($ctx->{sl_note}) || ''; } my $f = ascii_html(delete $smsg->{from_name}); my $obfs_ibx = $ctx->{-obfs_ibx}; obfuscate_addrs($obfs_ibx, $f) if $obfs_ibx; my $d = fmt_ts($smsg->{ds}); my $unmatched; # if lazy-loaded by SearchThread::Msg::visible() if (exists $ctx->{searchview}) { if (defined(my $pct = $smsg->{pct})) { $d .= (sprintf(' % 2u', $pct) . '%'); } else { $unmatched = 1; $d .= ' '; } } $d .= ' ' . indent_for($level) . th_pfx($level); my $attr = $f; $ctx->{first_level} ||= $level; if ($attr ne $ctx->{prev_attr} || $ctx->{prev_level} > $level) { $ctx->{prev_attr} = $attr; } $ctx->{prev_level} = $level; if ($cur) { if ($cur eq $mid) { delete $ctx->{cur}; $$skel .= "$d". "$attr [this message]\n"; return 1; } else { $ctx->{prev_msg} = $mid; } } else { $ctx->{next_msg} ||= $mid; } # Subject is never undef, this mail was loaded from # our Xapian which would've resulted in '' if it were # really missing (and Filter rejects empty subjects) my @subj = split(/ /, subject_normalized($smsg->{subject})); # remove common suffixes from the subject if it matches the previous, # so we do not show redundant text at the end. my $prev_subj = $ctx->{prev_subj} || []; $ctx->{prev_subj} = [ @subj ]; my $omit = dedupe_subject($prev_subj, \@subj, '" '); my $end; if (@subj) { my $subj = join(' ', @subj); $subj = ascii_html($subj); obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx; $end = "$subj $omit$f\n" } else { $end = "$f\n"; } my $m; my $id = ''; my $mapping = $unmatched ? undef : $ctx->{mapping}; if ($mapping) { my $map = $mapping->{$mid}; $id = id_compress($mid, 1); $m = '#m'.$id; $map->[0] = "$d$end"; $id = "\nid=r".$id; } else { $m = $ctx->{-upfx}.mid_href($mid).'/'; } $$skel .= $d . "" . $end; 1; } sub _skel_ghost { my ($ctx, $level, $node) = @_; my $mid = $node->{mid}; my $d = ' [not found] '; $d .= ' ' if exists $ctx->{searchview}; $d .= indent_for($level) . th_pfx($level); my $upfx = $ctx->{-upfx}; my $href = $upfx . mid_href($mid) . '/'; my $html = ascii_html($mid); my $mapping = $ctx->{mapping}; my $map = $mapping->{$mid} if $mapping; if ($map) { my $id = id_compress($mid, 1); $map->[0] = $d . qq{<$html>\n}; $d .= qq{<$html>\n}; } else { $d .= qq{<$html>\n}; } ${$ctx->{skel}} .= $d; 1; } sub sort_ds { @{$_[0]} = sort { (eval { $a->topmost->{ds} } || 0) <=> (eval { $b->topmost->{ds} } || 0) } @{$_[0]}; } # accumulate recent topics if search is supported # returns 200 if done, 404 if not sub acc_topic { # walk_thread callback my ($ctx, $level, $smsg) = @_; my $mid = $smsg->{mid}; my $has_blob = $smsg->{blob} // do { if (my $by_mid = $ctx->{ibx}->smsg_by_mid($mid)) { %$smsg = (%$smsg, %$by_mid); 1; } }; if ($has_blob) { my $subj = subject_normalized($smsg->{subject}); $subj = '(no subject)' if $subj eq ''; my $ds = $smsg->{ds}; if ($level == 0) { # new, top-level topic my $topic = [ $ds, 1, { $subj => $mid }, $subj ]; $ctx->{-cur_topic} = $topic; push @{$ctx->{order}}, $topic; return 1; } # continue existing topic my $topic = $ctx->{-cur_topic}; # should never be undef $topic->[0] = $ds if $ds > $topic->[0]; $topic->[1]++; # bump N+ message counter my $seen = $topic->[2]; if (scalar(@$topic) == 3) { # parent was a ghost push @$topic, $subj; } elsif (!defined($seen->{$subj})) { push @$topic, $level, $subj; # @extra messages } $seen->{$subj} = $mid; # latest for subject } else { # ghost message return 1 if $level != 0; # ignore child ghosts my $topic = $ctx->{-cur_topic} = [ -666, 0, {} ]; push @{$ctx->{order}}, $topic; } 1; } sub dump_topics { my ($ctx) = @_; my $order = delete $ctx->{order}; # [ ds, subj1, subj2, subj3, ... ] unless ($order) { $ctx->{-html_tip} = '
[No topics in range]
'; return 404; } my @out; my $ibx = $ctx->{ibx}; my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef; # sort by recency, this allows new posts to "bump" old topics... foreach my $topic (sort { $b->[0] <=> $a->[0] } @$order) { my ($ds, $n, $seen, $top_subj, @extra) = @$topic; @$topic = (); next unless defined $top_subj; # ghost topic my $mid = delete $seen->{$top_subj}; my $href = mid_href($mid); my $prev_subj = [ split(/ /, $top_subj) ]; $top_subj = ascii_html($top_subj); $ds = fmt_ts($ds); # $n isn't the total number of posts on the topic, # just the number of posts in the current results window my $anchor; if ($n == 1) { $n = ''; $anchor = '#u'; # top of only message } else { $n = " ($n+ messages)"; $anchor = '#t'; # thread skeleton } my $s = "$top_subj\n" . " $ds UTC $n\n"; for (my $i = 0; $i < scalar(@extra); $i += 2) { my $level = $extra[$i]; my $subj = $extra[$i + 1]; # already normalized $mid = delete $seen->{$subj}; my @subj = split(/ /, $subj); my @next_prev = @subj; # full copy my $omit = dedupe_subject($prev_subj, \@subj, ' "'); $prev_subj = \@next_prev; $subj = join(' ', @subj); $subj = ascii_html($subj); obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx; $href = mid_href($mid); $s .= indent_for($level) . TCHILD; $s .= qq($subj$omit\n); } push @out, $s; } $ctx->{-html_tip} = '
' . join("\n", @out) . '
'; 200; } sub str2ts ($) { my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $_[0]); timegm($ss || 0, $mm || 0, $hh || 0, $dd, $mon - 1, $yyyy); } sub pagination_footer ($$) { my ($ctx, $latest) = @_; my $next = $ctx->{next_page} || ''; my $prev = $ctx->{prev_page} || ''; if ($prev) { # aligned padding for: 'next (older) | ' $next = $next ? "$next | " : ' | '; $prev .= qq[ | latest]; } ($next || $prev) ? \"
page: $next$prev
" : \''; } sub paginate_recent ($$) { my ($ctx, $lim) = @_; my $t = $ctx->{qp}->{t} || ''; my $opts = { limit => $lim }; my ($after, $before); # Xapian uses '..' but '-' is perhaps friendier to URL linkifiers # if only $after exists "YYYYMMDD.." because "." could be skipped # if interpreted as an end-of-sentence $t =~ s/\A([0-9]{8,14})-// and $after = str2ts($1); $t =~ /\A([0-9]{8,14})\z/ and $before = str2ts($1); my $ibx = $ctx->{ibx}; my $msgs = $ibx->recent($opts, $after, $before); my $nr = scalar @$msgs; if ($nr < $lim && defined($after)) { $after = $before = undef; $msgs = $ibx->recent($opts); $nr = scalar @$msgs; } my $more = $nr == $lim; my ($newest, $oldest); if ($nr) { $newest = $msgs->[0]->{ts}; $oldest = $msgs->[-1]->{ts}; # if we only had $after, our SQL query in ->recent ordered if ($newest < $oldest) { ($oldest, $newest) = ($newest, $oldest); $more = 0 if defined($after) && $after < $oldest; } } if (defined($oldest) && $more) { my $s = ts2str($oldest); $ctx->{next_page} = qq[] . 'next (older)'; } if (defined($newest) && (defined($before) || defined($after))) { my $s = ts2str($newest); $ctx->{prev_page} = qq[] . 'prev (newer)'; } $msgs; } # GET /$INBOX - top-level inbox view for indexed inboxes sub index_topics { my ($ctx) = @_; my $msgs = paginate_recent($ctx, 200); # 200 is our window walk_thread(thread_results($ctx, $msgs), $ctx, \&acc_topic) if @$msgs; html_oneshot($ctx, dump_topics($ctx), pagination_footer($ctx, '.')); } sub thread_adj_level { my ($ctx, $level) = @_; my $max = $ctx->{cur_level}; if ($level <= 0) { return ('', '') if $max == 0; # flat output # reset existing lists my $beg = $max > 1 ? ('' x ($max - 1)) : ''; $ctx->{cur_level} = 0; ("$beg", ''); } elsif ($level == $max) { # continue existing list qw(
  • ); } elsif ($level < $max) { my $beg = $max > 1 ? ('' x ($max - $level)) : ''; $ctx->{cur_level} = $level; ("$beg
  • ", '
  • '); } else { # ($level > $max) # start a new level $ctx->{cur_level} = $level; my $beg = ($max ? '
  • ' : '') . '
    • '; ($beg, '
    • '); } } sub ghost_index_entry { my ($ctx, $level, $node) = @_; my ($beg, $end) = thread_adj_level($ctx, $level); $beg . '
      '. ghost_parent($ctx->{-upfx}, $node->{mid} // '?')
      		. '
      ' . $end; } 1; public-inbox-1.9.0/lib/PublicInbox/ViewDiff.pm000066400000000000000000000166651430031475700211630ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # # used by PublicInbox::View # This adds CSS spans for diff highlighting. # It also generates links for ViewVCS + SolverGit to show # (or reconstruct) blobs. package PublicInbox::ViewDiff; use strict; use v5.10.1; use parent qw(Exporter); our @EXPORT_OK = qw(flush_diff); use URI::Escape qw(uri_escape_utf8); use PublicInbox::Hval qw(ascii_html to_attr); use PublicInbox::Git qw(git_unquote); my $UNSAFE = "^A-Za-z0-9\-\._~/"; # '/' + $URI::Escape::Unsafe{RFC3986} my $OID_NULL = '0{7,}'; my $OID_BLOB = '[a-f0-9]{7,}'; my $LF = qr!\n!; my $ANY = qr![^\n]!; my $FN = qr!(?:"?[^/\n]+/[^\n]+|/dev/null)!; # cf. git diff.c :: get_compact_summary my $DIFFSTAT_COMMENT = qr/(?: *\((?:new|gone|(?:(?:new|mode) [\+\-][lx]))\))? *\z/s; my $NULL_TO_BLOB = qr/^(index $OID_NULL\.\.)($OID_BLOB)\b/ms; my $BLOB_TO_NULL = qr/^index ($OID_BLOB)(\.\.$OID_NULL)\b/ms; my $BLOB_TO_BLOB = qr/^index ($OID_BLOB)\.\.($OID_BLOB)/ms; our $EXTRACT_DIFFS = qr/( (?: # begin header stuff, don't capture filenames, here, # but instead wait for the --- and +++ lines. (?:^diff\x20--git\x20$FN\x20$FN$LF) # old mode || new mode || copy|rename|deleted|... (?:^[a-z]$ANY+$LF)* )? # end of optional stuff, everything below is required ^index\x20($OID_BLOB)\.\.($OID_BLOB)$ANY*$LF ^---\x20($FN)$LF ^\+{3}\x20($FN)$LF)/msx; our $IS_OID = qr/\A$OID_BLOB\z/s; # link to line numbers in blobs sub diff_hunk ($$$$) { my ($dst, $dctx, $ca, $cb) = @_; my ($oid_a, $oid_b, $spfx) = @$dctx{qw(oid_a oid_b spfx)}; if (defined($spfx) && defined($oid_a) && defined($oid_b)) { my ($n) = ($ca =~ /^-([0-9]+)/); $n = defined($n) ? "#n$n" : ''; $$dst .= qq(@@ {Q}$n">$ca); ($n) = ($cb =~ /^\+([0-9]+)/); $n = defined($n) ? "#n$n" : ''; $$dst .= qq( {Q}$n">$cb @@); } else { $$dst .= "@@ $ca $cb @@"; } } sub oid ($$$) { my ($dctx, $spfx, $oid) = @_; defined($spfx) ? qq({Q}">$oid) : $oid; } # returns true if diffstat anchor written, false otherwise sub anchor0 ($$$$) { my ($dst, $ctx, $fn, $rest) = @_; my $orig = $fn; # normal git diffstat output is impossible to parse reliably # without --numstat, and that isn't the default for format-patch. # So only do best-effort handling of renames for common cases; # which works well in practice. If projects put "=>", or trailing # spaces in filenames, oh well :P $fn =~ s/$DIFFSTAT_COMMENT//; $fn =~ s/\{(?:.+) => (.+)\}/$1/ or $fn =~ s/.* => (.+)/$1/; $fn = git_unquote($fn); # long filenames will require us to check in anchor1() push(@{$ctx->{-long_path}}, $fn) if $fn =~ s!\A\.\.\./?!!; if (defined(my $attr = to_attr($ctx->{-apfx}.$fn))) { $ctx->{-anchors}->{$attr} = 1; my $spaces = ($orig =~ s/( +)\z//) ? $1 : ''; $$dst .= " " . ascii_html($orig) . '' . $spaces . $ctx->{-linkify}->to_html($rest); return 1; } undef; } # returns "diff --git" anchor destination, undef otherwise sub anchor1 ($$) { my ($ctx, $pb) = @_; my $attr = to_attr($ctx->{-apfx}.$pb) // return; my $ok = delete $ctx->{-anchors}->{$attr}; # unlikely, check the end of long path names we captured, # assume diffstat and diff output follow the same order, # and ignore different ordering (could be malicious input) unless ($ok) { my $fn = shift(@{$ctx->{-long_path}}) // return; $pb =~ /\Q$fn\E\z/s or return; $attr = to_attr($ctx->{-apfx}.$fn) // return; $ok = delete $ctx->{-anchors}->{$attr} // return; } $ok ? "diff --git" : undef } sub diff_header ($$$) { my ($x, $ctx, $top) = @_; my (undef, undef, $pa, $pb) = splice(@$top, 0, 4); # ignore oid_{a,b} my $spfx = $ctx->{-spfx}; my $dctx = { spfx => $spfx }; # get rid of leading "a/" or "b/" (or whatever --{src,dst}-prefix are) $pa = (split(m'/', git_unquote($pa), 2))[1] if $pa ne '/dev/null'; $pb = (split(m'/', git_unquote($pb), 2))[1] if $pb ne '/dev/null'; if ($pa eq $pb && $pb ne '/dev/null') { $dctx->{Q} = "?b=".uri_escape_utf8($pb, $UNSAFE); } else { my @q; if ($pb ne '/dev/null') { push @q, 'b='.uri_escape_utf8($pb, $UNSAFE); } if ($pa ne '/dev/null') { push @q, 'a='.uri_escape_utf8($pa, $UNSAFE); } $dctx->{Q} = '?'.join('&', @q); } # linkify early and all at once, since we know the following # subst ops on $$x won't need further escaping: $$x = $ctx->{-linkify}->to_html($$x); # no need to capture oid_a and oid_b on add/delete, # we just linkify OIDs directly via s///e in conditional if (($$x =~ s/$NULL_TO_BLOB/$1 . oid($dctx, $spfx, $2)/e) || ($$x =~ s/$BLOB_TO_NULL/ 'index ' . oid($dctx, $spfx, $1) . $2/e)) { } elsif ($$x =~ $BLOB_TO_BLOB) { # modification-only, not add/delete: # linkify hunk headers later using oid_a and oid_b @$dctx{qw(oid_a oid_b)} = ($1, $2); } else { warn "BUG? <$$x> had no ^index line"; } $$x =~ s!^diff --git!anchor1($ctx, $pb) // 'diff --git'!ems; my $dst = $ctx->{obuf}; $$dst .= qq(); $$dst .= $$x; $$dst .= ''; $dctx; } sub diff_before_or_after ($$) { my ($ctx, $x) = @_; my $linkify = $ctx->{-linkify}; my $dst = $ctx->{obuf}; my $anchors = exists($ctx->{-anchors}) ? 1 : 0; for my $y (split(/(^---\n)/sm, $$x)) { if ($y =~ /\A---\n\z/s) { $$dst .= "---\n"; # all HTML is "\r\n" => "\n" $anchors |= 2; } elsif ($anchors == 3 && $y =~ /^ [0-9]+ files? changed, /sm) { # ok, looks like a diffstat, go line-by-line: for my $l (split(/^/m, $y)) { if ($l =~ /^ (.+)( +\| .*\z)/s) { anchor0($dst, $ctx, $1, $2) and next; } $$dst .= $linkify->to_html($l); } } else { # commit message, notes, etc $$dst .= $linkify->to_html($y); } } } # callers must do CRLF => LF conversion before calling this sub flush_diff ($$) { my ($ctx, $cur) = @_; my @top = split($EXTRACT_DIFFS, $$cur); undef $$cur; # free memory my $linkify = $ctx->{-linkify}; my $dst = $ctx->{obuf}; my $dctx; # {}, keys: Q, oid_a, oid_b while (defined(my $x = shift @top)) { if (scalar(@top) >= 4 && $top[1] =~ $IS_OID && $top[0] =~ $IS_OID) { $dctx = diff_header(\$x, $ctx, \@top); } elsif ($dctx) { my $after = ''; # Quiet "Complex regular subexpression recursion limit" # warning. Perl will truncate matches upon hitting # that limit, giving us more (and shorter) scalars than # would be ideal, but otherwise it's harmless. # # We could replace the `+' metacharacter with `{1,100}' # to limit the matches ourselves to 100, but we can # let Perl do it for us, quietly. no warnings 'regexp'; for my $s (split(/((?:(?:^\+[^\n]*\n)+)| (?:(?:^-[^\n]*\n)+)| (?:^@@ [^\n]+\n))/xsm, $x)) { if (!defined($dctx)) { $after .= $s; } elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) { $$dst .= qq(); diff_hunk($dst, $dctx, $1, $2); $$dst .= $linkify->to_html($s); $$dst .= ''; } elsif ($s =~ /\A\+/) { $$dst .= qq(); $$dst .= $linkify->to_html($s); $$dst .= ''; } elsif ($s =~ /\A-- $/sm) { # email sig starts $dctx = undef; $after .= $s; } elsif ($s =~ /\A-/) { $$dst .= qq(); $$dst .= $linkify->to_html($s); $$dst .= ''; } else { $$dst .= $linkify->to_html($s); } } diff_before_or_after($ctx, \$after) unless $dctx; } else { diff_before_or_after($ctx, \$x); } } } 1; public-inbox-1.9.0/lib/PublicInbox/ViewVCS.pm000066400000000000000000000144231430031475700207340ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # show any VCS object, similar to "git show" # FIXME: we only show blobs for now # # This can use a "solver" to reconstruct blobs based on git # patches (with abbreviated OIDs in the header). However, the # abbreviated OIDs must match exactly what's in the original # email (unless a normal code repo already has the blob). # # In other words, we can only reliably reconstruct blobs based # on links generated by ViewDiff (and only if the emailed # patches apply 100% cleanly to published blobs). package PublicInbox::ViewVCS; use strict; use v5.10.1; use PublicInbox::SolverGit; use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::Linkify; use PublicInbox::Tmpfile; use PublicInbox::Hval qw(ascii_html to_filename); my $hl = eval { require PublicInbox::HlMod; PublicInbox::HlMod->new; }; my %QP_MAP = ( A => 'oid_a', a => 'path_a', b => 'path_b' ); our $MAX_SIZE = 1024 * 1024; # TODO: configurable my $BIN_DETECT = 8000; # same as git sub html_page ($$$) { my ($ctx, $code, $strref) = @_; my $wcb = delete $ctx->{-wcb}; $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/" my $res = html_oneshot($ctx, $code, $strref); $wcb ? $wcb->($res) : $res; } sub stream_blob_parse_hdr { # {parse_hdr} for Qspawn my ($r, $bref, $ctx) = @_; my ($res, $logref) = delete @$ctx{qw(-res -logref)}; my ($git, $oid, $type, $size, $di) = @$res; my @cl = ('Content-Length', $size); if (!defined $r) { # error html_page($ctx, 500, $logref); } elsif (index($$bref, "\0") >= 0) { [200, [qw(Content-Type application/octet-stream), @cl] ]; } else { my $n = length($$bref); if ($n >= $BIN_DETECT || $n == $size) { return [200, [ 'Content-Type', 'text/plain; charset=UTF-8', @cl ] ]; } if ($r == 0) { warn "premature EOF on $oid $$logref"; return html_page($ctx, 500, $logref); } @$ctx{qw(-res -logref)} = ($res, $logref); undef; # bref keeps growing } } sub stream_large_blob ($$$$) { my ($ctx, $res, $logref, $fn) = @_; $ctx->{-logref} = $logref; $ctx->{-res} = $res; my ($git, $oid, $type, $size, $di) = @$res; my $cmd = ['git', "--git-dir=$git->{git_dir}", 'cat-file', $type, $oid]; my $qsp = PublicInbox::Qspawn->new($cmd); my $env = $ctx->{env}; $env->{'qspawn.wcb'} = delete $ctx->{-wcb}; $qsp->psgi_return($env, undef, \&stream_blob_parse_hdr, $ctx); } sub show_other_result ($$) { my ($bref, $ctx) = @_; my ($qsp, $logref) = delete @$ctx{qw(-qsp -logref)}; if (my $err = $qsp->{err}) { utf8::decode($$err); $$logref .= "git show error: $err"; return html_page($ctx, 500, $logref); } my $l = PublicInbox::Linkify->new; utf8::decode($$bref); $$bref = '
      '. $l->to_html($$bref);
      	$$bref .= '

      ' . $$logref; html_page($ctx, 200, $bref); } sub show_other ($$$$) { my ($ctx, $res, $logref, $fn) = @_; my ($git, $oid, $type, $size) = @$res; if ($size > $MAX_SIZE) { $$logref = "$oid is too big to show\n" . $$logref; return html_page($ctx, 200, $logref); } my $cmd = ['git', "--git-dir=$git->{git_dir}", qw(show --encoding=UTF-8 --no-color --no-abbrev), $oid ]; my $qsp = PublicInbox::Qspawn->new($cmd); my $env = $ctx->{env}; $ctx->{-qsp} = $qsp; $ctx->{-logref} = $logref; $qsp->psgi_qx($env, undef, \&show_other_result, $ctx); } # user_cb for SolverGit, called as: user_cb->($result_or_error, $uarg) sub solve_result { my ($res, $ctx) = @_; my ($log, $hints, $fn) = delete @$ctx{qw(log hints fn)}; unless (seek($log, 0, 0)) { warn "seek(log): $!"; return html_page($ctx, 500, \'seek error'); } $log = do { local $/; <$log> }; my $ref = ref($res); my $l = PublicInbox::Linkify->new; $log = '
      debug log:

      ' .
      		$l->to_html($log) . '
      '; $res or return html_page($ctx, 404, \$log); $ref eq 'ARRAY' or return html_page($ctx, 500, \$log); my ($git, $oid, $type, $size, $di) = @$res; return show_other($ctx, $res, \$log, $fn) if $type ne 'blob'; my $path = to_filename($di->{path_b} // $hints->{path_b} // 'blob'); my $raw_link = "(raw)"; if ($size > $MAX_SIZE) { return stream_large_blob($ctx, $res, \$log, $fn) if defined $fn; $log = "
      Too big to show, download available\n" .
      			"$oid $type $size bytes $raw_link
      " . $log; return html_page($ctx, 200, \$log); } my $blob = $git->cat_file($oid); if (!$blob) { # WTF? my $e = "Failed to retrieve generated blob ($oid)"; warn "$e ($git->{git_dir})"; $log = "
      $e
      " . $log; return html_page($ctx, 500, \$log); } my $bin = index(substr($$blob, 0, $BIN_DETECT), "\0") >= 0; if (defined $fn) { my $h = [ 'Content-Length', $size, 'Content-Type' ]; push(@$h, ($bin ? 'application/octet-stream' : 'text/plain')); return delete($ctx->{-wcb})->([200, $h, [ $$blob ]]); } if ($bin) { $log = "
      $oid $type $size bytes (binary)" .
      			" $raw_link
      " . $log; return html_page($ctx, 200, \$log); } # TODO: detect + convert to ensure validity utf8::decode($$blob); my $nl = ($$blob =~ s/\r?\n/\n/sg); my $pad = length($nl); $l->linkify_1($$blob); my $ok = $hl->do_hl($blob, $path) if $hl; if ($ok) { $blob = $ok; } else { $$blob = ascii_html($$blob); } # using some of the same CSS class names and ids as cgit $log = "
      $oid $type $size bytes $raw_link
      " . "
      ". "
      " . join('', map {
      			sprintf("% ${pad}u\n", $_)
      		} (1..$nl)) . '
      ' . '
       
      '. # pad for non-CSS users "" . $l->linkify_2($$blob) . '
  • ' . $log; html_page($ctx, 200, \$log); } # GET /$INBOX/$GIT_OBJECT_ID/s/ # GET /$INBOX/$GIT_OBJECT_ID/s/$FILENAME sub show ($$;$) { my ($ctx, $oid_b, $fn) = @_; my $qp = $ctx->{qp}; my $hints = $ctx->{hints} = {}; while (my ($from, $to) = each %QP_MAP) { defined(my $v = $qp->{$from}) or next; $hints->{$to} = $v if $v ne ''; } $ctx->{'log'} = tmpfile("solve.$oid_b") // die "tmpfile: $!"; $ctx->{fn} = $fn; my $solver = PublicInbox::SolverGit->new($ctx->{ibx}, \&solve_result, $ctx); # PSGI server will call this immediately and give us a callback (-wcb) sub { $ctx->{-wcb} = $_[0]; # HTTP write callback $solver->solve($ctx->{env}, $ctx->{log}, $oid_b, $hints); }; } 1; public-inbox-1.9.0/lib/PublicInbox/WQBlocked.pm000066400000000000000000000024311430031475700212550ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # non-blocking workqueues, currently used by LeiNoteEvent to track renames package PublicInbox::WQBlocked; use v5.12; use parent qw(PublicInbox::DS); use PublicInbox::Syscall qw(EPOLLOUT EPOLLONESHOT); use PublicInbox::IPC; use Carp (); use Socket qw(MSG_EOR); sub new { my ($cls, $wq, $buf) = @_; my $self = bless { msgq => [$buf], }, $cls; $wq->{wqb} = $self->SUPER::new($wq->{-wq_s1}, EPOLLOUT|EPOLLONESHOT); } sub flush_send { my ($self) = @_; push(@{$self->{msgq}}, $_[1]) if defined($_[1]); while (defined(my $buf = shift @{$self->{msgq}})) { if (ref($buf) eq 'CODE') { $buf->($self); # could be \&PublicInbox::DS::close } else { my $wq_s1 = $self->{sock}; my $n = $PublicInbox::IPC::send_cmd->($wq_s1, [], $buf, MSG_EOR); next if defined($n); Carp::croak("sendmsg: $!") unless $!{EAGAIN}; PublicInbox::DS::epwait($wq_s1, EPOLLOUT|EPOLLONESHOT); unshift @{$self->{msgq}}, $buf; last; # wait for ->event_step } } } sub enq_close { flush_send($_[0], $_[0]->can('close')) } sub event_step { # called on EPOLLOUT wakeup my ($self) = @_; eval { flush_send($self) } if $self->{sock}; if ($@) { warn $@; $self->close; } } 1; public-inbox-1.9.0/lib/PublicInbox/WQWorker.pm000066400000000000000000000015471430031475700211720ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ # for PublicInbox::IPC wq_* (work queue) workers package PublicInbox::WQWorker; use strict; use v5.10.1; use parent qw(PublicInbox::DS); use PublicInbox::Syscall qw(EPOLLIN EPOLLEXCLUSIVE); use Errno qw(EAGAIN ECONNRESET); use IO::Handle (); # blocking sub new { my ($cls, $wq, $sock) = @_; $sock->blocking(0); my $self = bless { sock => $sock, wq => $wq }, $cls; $self->SUPER::new($sock, EPOLLEXCLUSIVE|EPOLLIN); $self; } sub event_step { my ($self) = @_; my $n = $self->{wq}->recv_and_run($self->{sock}) and return; unless (defined $n) { return if $! == EAGAIN; warn "recvmsg: $!" if $! != ECONNRESET; } $self->{sock} == $self->{wq}->{-wq_s2} and $self->{wq}->wq_atexit_child; $self->close; # PublicInbox::DS::close } 1; public-inbox-1.9.0/lib/PublicInbox/WWW.pm000066400000000000000000000462441430031475700201400ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # # Main web interface for mailing list archives # # We focus on the lowest common denominators here: # - targeted at text-only console browsers (w3m, links, etc..) # - Only basic HTML, CSS only for line-wrapping
     text content for GUIs
    #   and diff/syntax-highlighting (optional)
    # - No JavaScript, graphics or icons allowed.
    # - Must not rely on static content
    # - UTF-8 is only for user-content, 7-bit US-ASCII for us
    package PublicInbox::WWW;
    use strict;
    use v5.10.1;
    use PublicInbox::Config;
    use PublicInbox::Hval;
    use URI::Escape qw(uri_unescape);
    use PublicInbox::MID qw(mid_escape);
    use PublicInbox::GitHTTPBackend;
    use PublicInbox::UserContent;
    use PublicInbox::WwwStatic qw(r path_info_raw);
    use PublicInbox::Eml;
    
    # TODO: consider a routing tree now that we have more endpoints:
    our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
    our $MID_RE = qr!([^/]+)!;
    our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
    our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
    our $OID_RE = qr![a-f0-9]{7,}!;
    
    sub new {
    	my ($class, $pi_cfg) = @_;
    	bless { pi_cfg => $pi_cfg // PublicInbox::Config->new }, $class;
    }
    
    # backwards compatibility, do not use
    sub run {
    	my ($req, $method) = @_;
    	PublicInbox::WWW->new->call($req->env);
    }
    
    sub call {
    	my ($self, $env) = @_;
    	my $ctx = { env => $env, www => $self };
    
    	# we don't care about multi-value
    	%{$ctx->{qp}} = map {
    		utf8::decode($_);
    		tr/+/ /;
    		my ($k, $v) = split(/=/, $_, 2);
    		# none of the keys we care about will need escaping
    		($k // '', uri_unescape($v // ''))
    	} split(/[&;]+/, $env->{QUERY_STRING});
    
    	my $path_info = path_info_raw($env);
    	my $method = $env->{REQUEST_METHOD};
    
    	if ($method eq 'POST') {
    		if ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)?
    					(git-upload-pack)\z!x) {
    			my ($epoch, $path) = ($2, $3);
    			return invalid_inbox($ctx, $1) ||
    				serve_git($ctx, $epoch, $path);
    		} elsif ($path_info =~ m!$INBOX_RE/(\w+)\.sql\.gz\z!o) {
    			return get_altid_dump($ctx, $1, $2);
    		} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$ATTACH_RE\z!o) {
    			my ($idx, $fn) = ($3, $4);
    			return invalid_inbox_mid($ctx, $1, $2) ||
    				get_attach($ctx, $idx, $fn);
    		} elsif ($path_info =~ m!$INBOX_RE/!o) {
    			return invalid_inbox($ctx, $1) || mbox_results($ctx);
    		}
    	}
    	elsif ($method !~ /\A(?:GET|HEAD)\z/) {
    		return r(405);
    	}
    
    	# top-level indices and feeds
    	if ($path_info eq '/') {
    		require PublicInbox::WwwListing;
    		PublicInbox::WwwListing->response($ctx);
    	} elsif ($path_info eq '/manifest.js.gz') {
    		require PublicInbox::ManifestJsGz;
    		PublicInbox::ManifestJsGz->response($ctx);
    	} elsif ($path_info =~ m!$INBOX_RE\z!o) {
    		invalid_inbox($ctx, $1) || r301($ctx, $1);
    	} elsif ($path_info =~ m!$INBOX_RE(?:/|/index\.html)?\z!o) {
    		invalid_inbox($ctx, $1) || get_index($ctx);
    	} elsif ($path_info =~ m!$INBOX_RE/(?:atom\.xml|new\.atom)\z!o) {
    		invalid_inbox($ctx, $1) || get_atom($ctx);
    	} elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
    		invalid_inbox($ctx, $1) || get_new($ctx);
    	} elsif ($path_info =~ m!$INBOX_RE/description\z!o) {
    		get_description($ctx, $1);
    	} elsif ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)?
    				($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
    		my ($epoch, $path) = ($2, $3);
    		invalid_inbox($ctx, $1) || serve_git($ctx, $epoch, $path);
    	} elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-]+).mbox\.gz\z!o) {
    		serve_mbox_range($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) {
    		msg_page($ctx, $1, $2, $3);
    
    	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$ATTACH_RE\z!o) {
    		my ($idx, $fn) = ($3, $4);
    		invalid_inbox_mid($ctx, $1, $2) || get_attach($ctx, $idx, $fn);
    	# in case people leave off the trailing slash:
    	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(T|t)\z!o) {
    		my ($inbox, $mid_ue, $suffix) = ($1, $2, $3);
    		$suffix .= $suffix =~ /\A[tT]\z/ ? '/#u' : '/';
    		r301($ctx, $inbox, $mid_ue, $suffix);
    
    	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/R/?\z!o) {
    		my ($inbox, $mid_ue) = ($1, $2);
    		r301($ctx, $inbox, $mid_ue, '#R');
    
    	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) {
    		r301($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) {
    		get_text($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-\.]+)\.css\z!o) {
    		get_css($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/manifest\.js\.gz\z!o) {
    		get_inbox_manifest($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) {
    		get_vcs_object($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/
    				($PublicInbox::Hval::FN)\z!ox) {
    		get_vcs_object($ctx, $1, $2, $3);
    	} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
    		r301($ctx, $1, $2, 's/');
    	} elsif ($path_info =~ m!$INBOX_RE/(\w+)\.sql\.gz\z!o) {
    		get_altid_dump($ctx, $1, $2);
    	# convenience redirects order matters
    	} elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
    		r301($ctx, $1, $2);
    	} elsif ($path_info =~ m!\A/\+/([a-zA-Z0-9_\-\.]+)\.css\z!) {
    		get_css($ctx, undef, $1); # for WwwListing
    	} else {
    		legacy_redirects($ctx, $path_info);
    	}
    }
    
    # for CoW-friendliness, MOOOOO!  Even for single-process setups,
    # we want to get all immortal allocations done early to avoid heap
    # fragmentation since common allocators favor a large contiguous heap.
    sub preload {
    	my ($self) = @_;
    
    	# populate caches used by Encode internally, since emails
    	# may show up with any encoding.
    	require Encode;
    	Encode::find_encoding($_) for Encode->encodings(':all');
    
    	require PublicInbox::ExtMsg;
    	require PublicInbox::Feed;
    	require PublicInbox::View;
    	require PublicInbox::SearchThread;
    	require PublicInbox::Eml;
    	require PublicInbox::Mbox;
    	require PublicInbox::ViewVCS;
    	require PublicInbox::WwwText;
    	require PublicInbox::WwwAttach;
    	eval {
    		require PublicInbox::Search;
    		PublicInbox::Search::load_xapian();
    	};
    	for (qw(SearchView MboxGz WwwAltId)) {
    		eval "require PublicInbox::$_;";
    	}
    	if (ref($self)) {
    		my $pi_cfg = $self->{pi_cfg};
    		if (defined($pi_cfg->{'publicinbox.cgitrc'})) {
    			$pi_cfg->limiter('-cgit');
    		}
    		$pi_cfg->ALL and require PublicInbox::Isearch;
    		$self->cgit;
    		$self->stylesheets_prepare($_) for ('', '../', '../../');
    		$self->news_www;
    	}
    }
    
    # private functions below
    
    sub r404 {
    	my ($ctx) = @_;
    	if ($ctx && $ctx->{mid}) {
    		require PublicInbox::ExtMsg;
    		return PublicInbox::ExtMsg::ext_msg($ctx);
    	}
    	r(404);
    }
    
    sub news_cgit_fallback ($) {
    	my ($ctx) = @_;
    	my $www = $ctx->{www};
    	my $env = $ctx->{env};
    	my $res = $www->news_www->call($env);
    	$res->[0] == 404 ? $www->cgit->call($env) : $res;
    }
    
    # returns undef if valid, array ref response if invalid
    sub invalid_inbox ($$) {
    	my ($ctx, $inbox) = @_;
    	my $ibx = $ctx->{www}->{pi_cfg}->lookup_name($inbox) //
    			$ctx->{www}->{pi_cfg}->lookup_ei($inbox);
    	if (defined $ibx) {
    		$ctx->{ibx} = $ibx;
    		return;
    	}
    
    	# sometimes linkifiers (not ours!) screw up automatic link
    	# generation and link things intended for nntp:// to https?://,
    	# so try to infer links and redirect them to the appropriate
    	# list URL.
    	news_cgit_fallback($ctx);
    }
    
    # returns undef if valid, array ref response if invalid
    sub invalid_inbox_mid {
    	my ($ctx, $inbox, $mid_ue) = @_;
    	my $ret = invalid_inbox($ctx, $inbox);
    	return $ret if $ret;
    
    	my $mid = $ctx->{mid} = uri_unescape($mid_ue);
    	my $ibx = $ctx->{ibx};
    	if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) {
    		my ($x2, $x38) = ($1, $2);
    		# this is horrifically wasteful for legacy URLs:
    		my $str = $ctx->{ibx}->msg_by_path("$x2/$x38") or return;
    		my $s = PublicInbox::Eml->new($str);
    		$mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID'));
    		return r301($ctx, $inbox, mid_escape($mid));
    	}
    	undef;
    }
    
    # /$INBOX/new.atom                     -> Atom feed, includes replies
    sub get_atom {
    	my ($ctx) = @_;
    	require PublicInbox::Feed;
    	PublicInbox::Feed::generate($ctx);
    }
    
    # /$INBOX/new.html			-> HTML only
    sub get_new {
    	my ($ctx) = @_;
    	require PublicInbox::Feed;
    	PublicInbox::Feed::new_html($ctx);
    }
    
    # /$INBOX/?r=$GIT_COMMIT                 -> HTML only
    sub get_index {
    	my ($ctx) = @_;
    	require PublicInbox::Feed;
    	if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
    		require PublicInbox::SearchView;
    		PublicInbox::SearchView::sres_top_html($ctx);
    	} else {
    		PublicInbox::Feed::generate_html_index($ctx);
    	}
    }
    
    # /$INBOX/$MESSAGE_ID/raw                    -> raw mbox
    sub get_mid_txt {
    	my ($ctx) = @_;
    	require PublicInbox::Mbox;
    	PublicInbox::Mbox::emit_raw($ctx) || r(404);
    }
    
    # /$INBOX/$MESSAGE_ID/                   -> HTML content (short quotes)
    sub get_mid_html {
    	my ($ctx) = @_;
    	require PublicInbox::View;
    	PublicInbox::View::msg_page($ctx) || r404($ctx);
    }
    
    # /$INBOX/$MESSAGE_ID/t/
    sub get_thread {
    	my ($ctx, $flat) = @_;
    	$ctx->{ibx}->over or return need($ctx, 'Overview');
    	$ctx->{flat} = $flat;
    	require PublicInbox::View;
    	PublicInbox::View::thread_html($ctx);
    }
    
    # /$INBOX/_/text/$KEY/
    # /$INBOX/_/text/$KEY/raw
    # KEY may contain slashes
    sub get_text {
    	my ($ctx, $inbox, $key) = @_;
    	my $r404 = invalid_inbox($ctx, $inbox);
    	return $r404 if $r404;
    
    	require PublicInbox::WwwText;
    	PublicInbox::WwwText::get_text($ctx, $key);
    }
    
    # show git objects (blobs and commits)
    # /$INBOX/$GIT_OBJECT_ID/s/
    # /$INBOX/$GIT_OBJECT_ID/s/$FILENAME
    sub get_vcs_object ($$$;$) {
    	my ($ctx, $inbox, $oid, $filename) = @_;
    	my $r404 = invalid_inbox($ctx, $inbox);
    	return $r404 if $r404 || !$ctx->{www}->{pi_cfg}->repo_objs($ctx->{ibx});
    	require PublicInbox::ViewVCS;
    	PublicInbox::ViewVCS::show($ctx, $oid, $filename);
    }
    
    sub get_altid_dump {
    	my ($ctx, $inbox, $altid_pfx) =@_;
    	my $r404 = invalid_inbox($ctx, $inbox);
    	return $r404 if $r404;
    	eval { require PublicInbox::WwwAltId } or return need($ctx, 'sqlite3');
    	PublicInbox::WwwAltId::sqldump($ctx, $altid_pfx);
    }
    
    sub need {
    	my ($ctx, $extra) = @_;
    	require PublicInbox::WwwStream;
    	PublicInbox::WwwStream::html_oneshot($ctx, 501, \<$extra is not available for this public-inbox
    Return to index
    EOF } # /$INBOX/$MESSAGE_ID/t.mbox -> thread as mbox # /$INBOX/$MESSAGE_ID/t.mbox.gz -> thread as gzipped mbox # note: I'm not a big fan of other compression formats since they're # significantly more expensive on CPU than gzip and less-widely available, # especially on older systems. Stick to zlib since that's what git uses. sub get_thread_mbox { my ($ctx, $sfx) = @_; my $over = $ctx->{ibx}->over or return need($ctx, 'Overview'); require PublicInbox::Mbox; PublicInbox::Mbox::thread_mbox($ctx, $over, $sfx); } # /$INBOX/$MESSAGE_ID/t.atom -> thread as Atom feed sub get_thread_atom { my ($ctx) = @_; $ctx->{ibx}->over or return need($ctx, 'Overview'); require PublicInbox::Feed; PublicInbox::Feed::generate_thread_atom($ctx); } sub legacy_redirects { my ($ctx, $path_info) = @_; # single-message pages if ($path_info =~ m!$INBOX_RE/m/(\S+)/\z!o) { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/m/(\S+)/raw\z!o) { r301($ctx, $1, $2, 'raw'); } elsif ($path_info =~ m!$INBOX_RE/f/(\S+)/\z!o) { r301($ctx, $1, $2); # thread display } elsif ($path_info =~ m!$INBOX_RE/t/(\S+)/\z!o) { r301($ctx, $1, $2, 't/#u'); } elsif ($path_info =~ m!$INBOX_RE/t/(\S+)/mbox(\.gz)?\z!o) { r301($ctx, $1, $2, "t.mbox$3"); # even older legacy redirects } elsif ($path_info =~ m!$INBOX_RE/m/(\S+)\.html\z!o) { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/t/(\S+)\.html\z!o) { r301($ctx, $1, $2, 't/#u'); } elsif ($path_info =~ m!$INBOX_RE/f/(\S+)\.html\z!o) { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/(?:m|f)/(\S+)\.txt\z!o) { r301($ctx, $1, $2, 'raw'); } elsif ($path_info =~ m!$INBOX_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) { r301($ctx, $1, $2, "t$3"); # legacy convenience redirects, order still matters } elsif ($path_info =~ m!$INBOX_RE/m/(\S+)\z!o) { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/t/(\S+)\z!o) { r301($ctx, $1, $2, 't/#u'); } elsif ($path_info =~ m!$INBOX_RE/f/(\S+)\z!o) { r301($ctx, $1, $2); # some Message-IDs have slashes in them and the HTTP server # may try to be clever and unescape them :< } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/$END_RE\z!o) { msg_page($ctx, $1, $2, $3); # in case people leave off the trailing slash: } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/(T|t)\z!o) { r301($ctx, $1, $2, $3 eq 't' ? 't/#u' : $3); } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/f\z!o) { r301($ctx, $1, $2); } else { news_cgit_fallback($ctx); } } sub r301 { my ($ctx, $inbox, $mid_ue, $suffix) = @_; my $ibx = $ctx->{ibx}; unless ($ibx) { my $r404 = invalid_inbox($ctx, $inbox); return $r404 if $r404; $ibx = $ctx->{ibx}; } my $url = $ibx->base_url($ctx->{env}); my $qs = $ctx->{env}->{QUERY_STRING}; if (defined $mid_ue) { # common, and much nicer as '@' than '%40': $mid_ue =~ s/%40/@/g; $url .= $mid_ue . '/'; } $url .= $suffix if (defined $suffix); $url .= "?$qs" if $qs ne ''; [ 301, [ Location => $url, 'Content-Type' => 'text/plain' ], [ "Redirecting to $url\n" ] ] } sub msg_page { my ($ctx, $inbox, $mid_ue, $e) = @_; my $ret; $ret = invalid_inbox_mid($ctx, $inbox, $mid_ue) and return $ret; '' eq $e and return get_mid_html($ctx); 'T/' eq $e and return get_thread($ctx, 1); 't/' eq $e and return get_thread($ctx); 't.atom' eq $e and return get_thread_atom($ctx); 't.mbox' eq $e and return get_thread_mbox($ctx); 't.mbox.gz' eq $e and return get_thread_mbox($ctx, '.gz'); 'raw' eq $e and return get_mid_txt($ctx); # legacy, but no redirect for compatibility: 'f/' eq $e and return get_mid_html($ctx); r404($ctx); } sub serve_git { my ($ctx, $epoch, $path) = @_; my $env = $ctx->{env}; my $ibx = $ctx->{ibx}; my $git = defined $epoch ? $ibx->git_epoch($epoch) : $ibx->git; $git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404(); } sub mbox_results { my ($ctx) = @_; if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) { $ctx->{ibx}->isrch or return need($ctx, 'search'); require PublicInbox::SearchView; return PublicInbox::SearchView::mbox_results($ctx); } r404(); } sub serve_mbox_range { my ($ctx, $inbox, $range) = @_; invalid_inbox($ctx, $inbox) || eval { require PublicInbox::Mbox; PublicInbox::Mbox::emit_range($ctx, $range); } } sub news_www { my ($self) = @_; $self->{news_www} //= do { require PublicInbox::NewsWWW; PublicInbox::NewsWWW->new($self->{pi_cfg}); } } sub cgit { my ($self) = @_; $self->{cgit} //= do { my $pi_cfg = $self->{pi_cfg}; if (defined($pi_cfg->{'publicinbox.cgitrc'})) { require PublicInbox::Cgit; PublicInbox::Cgit->new($pi_cfg); } else { require Plack::Util; Plack::Util::inline_object(call => sub { r404() }); } } } # GET $INBOX/manifest.js.gz sub get_inbox_manifest ($$$) { my ($ctx, $inbox, $key) = @_; my $r404 = invalid_inbox($ctx, $inbox); return $r404 if $r404; require PublicInbox::ManifestJsGz; PublicInbox::ManifestJsGz::per_inbox($ctx); } sub get_attach { my ($ctx, $idx, $fn) = @_; require PublicInbox::WwwAttach; PublicInbox::WwwAttach::get_attach($ctx, $idx, $fn); } # User-generated content (UGC) may have excessively long lines # and screw up rendering on some browsers, so we use pre-wrap. # # We also force everything to the same scaled font-size because GUI # browsers (tested both Firefox and surf (webkit)) uses a larger font # for the Search
    element than the rest of the page. Font size # uniformity is important to people who rely on gigantic fonts. # Finally, we use monospace to ensure the Search field and button # has the same size and spacing as everything else which is #
    -formatted anyways.
    our $STYLE = 'pre{white-space:pre-wrap}*{font-size:100%;font-family:monospace}';
    
    sub stylesheets_prepare ($$) {
    	my ($self, $upfx) = @_;
    	my $mini = eval {
    		require CSS::Minifier;
    		sub { CSS::Minifier::minify(input => $_[0]) };
    	} || eval {
    		require CSS::Minifier::XS;
    		sub { CSS::Minifier::XS::minify($_[0]) };
    	} || sub { $_[0] };
    
    	my $css_map = {};
    	my $stylesheets = $self->{pi_cfg}->{css} || [];
    	my $links = [];
    	my $inline_ok = 1;
    
    	foreach my $s (@$stylesheets) {
    		my $attr = {};
    		local $_ = $s;
    		foreach my $k (qw(media title href)) {
    			if (s/\s*$k='([^']+)'// || s/\s*$k=(\S+)//) {
    				$attr->{$k} = $1;
    			}
    		}
    
    		if (defined $attr->{href}) {
    			$inline_ok = 0;
    		} else {
    			my $fn = $_;
    			my ($key) = (m!([^/]+?)(?:\.css)?\z!i);
    			if ($key !~ /\A[a-zA-Z0-9_\-\.]+\z/) {
    				warn "ignoring $fn, non-ASCII word character\n";
    				next;
    			}
    			open(my $fh, '<', $fn) or do {
    				warn "failed to open $fn: $!\n";
    				next;
    			};
    			my $ctime = 0;
    			my $local = do { local $/; <$fh> };
    			if ($local =~ /\S/) {
    				$ctime = sprintf('%x',(stat($fh))[10]);
    				$local = $mini->($local);
    			}
    
    			# do not let BOFHs override userContent.css:
    			if ($local =~ /!\s*important\b/i) {
    				warn "ignoring $fn since it uses `!important'\n";
    				next;
    			}
    
    			$css_map->{$key} = $local;
    			$attr->{href} = "$upfx$key.css?$ctime";
    			if (defined($attr->{title})) {
    				$inline_ok = 0;
    			} elsif (($attr->{media}||'screen') eq 'screen') {
    				$attr->{-inline} = $local;
    			}
    		}
    		push @$links, $attr;
    	}
    
    	my $buf = "';
    
    	if (@$links) {
    		foreach my $attr (@$links) {
    			delete $attr->{-inline};
    			$buf .= "{"-style-$upfx"} = $buf;
    	} else {
    		$self->{-style_inline} = $buf;
    	}
    	$self->{-css_map} = $css_map;
    }
    
    # returns an HTML fragment with 
    EOF
    
    $STYLE =~ s/^\s*//gm;
    $STYLE =~ tr/\n//d;
    
    sub r ($;$) {
    	my ($code, $msg) = @_;
    	$msg ||= status_message($code);
    	[ $code, [ qw(Content-Type text/plain), 'Content-Length', length($msg),
    		@NO_CACHE ],
    	  [ $msg ] ]
    }
    
    sub getline_response ($$$$$) {
    	my ($env, $in, $off, $len, $path) = @_;
    	my $r = bless {}, __PACKAGE__;
    	if ($env->{'pi-httpd.async'}) { # public-inbox-httpd-only mode
    		$env->{'psgix.no-compress'} = 1; # do not chunk response
    		%$r = ( bypass => [$in, $off, $len, $env->{'psgix.io'}] );
    	} else {
    		%$r = ( in => $in, off => $off, len => $len, path => $path );
    	}
    	$r;
    }
    
    sub setup_range {
    	my ($env, $in, $h, $beg, $end, $size) = @_;
    	my $code = 200;
    	my $len = $size;
    	if ($beg eq '') {
    		if ($end ne '') { # "bytes=-$end" => last N bytes
    			$beg = $size - $end;
    			$beg = 0 if $beg < 0;
    			$end = $size - 1;
    			$code = 206;
    		} else {
    			$code = 416;
    		}
    	} else {
    		if ($beg > $size) {
    			$code = 416;
    		} elsif ($end eq '' || $end >= $size) {
    			$end = $size - 1;
    			$code = 206;
    		} elsif ($end < $size) {
    			$code = 206;
    		} else {
    			$code = 416;
    		}
    	}
    	if ($code == 206) {
    		$len = $end - $beg + 1;
    		if ($len <= 0) {
    			$code = 416;
    		} else {
    			push @$h, qw(Accept-Ranges bytes Content-Range);
    			push @$h, "bytes $beg-$end/$size";
    
    			# FIXME: Plack::Middleware::Deflater bug?
    			$env->{'psgix.no-compress'} = 1;
    		}
    	}
    	if ($code == 416) {
    		push @$h, 'Content-Range', "bytes */$size";
    		return [ 416, $h, [] ];
    	}
    	($code, $beg, $len);
    }
    
    # returns a PSGI arrayref response iff .gz and non-.gz mtimes match
    sub try_gzip_static ($$$$) {
    	my ($env, $h, $path, $type) = @_;
    	return unless ($env->{HTTP_ACCEPT_ENCODING} // '') =~ /\bgzip\b/i;
    	my $mtime;
    	return unless -f $path && defined(($mtime = (stat(_))[9]));
    	my $gz = "$path.gz";
    	return unless -f $gz && (stat(_))[9] == $mtime;
    	my $res = response($env, $h, $gz, $type);
    	return if ($res->[0] > 300 || $res->[0] < 200);
    	push @{$res->[1]}, qw(Cache-Control no-transform
    				Content-Encoding gzip
    				Vary Accept-Encoding);
    	$res;
    }
    
    sub response ($$$;$) {
    	my ($env, $h, $path, $type) = @_;
    	$type //= Plack::MIME->mime_type($path) // 'application/octet-stream';
    	if ($path !~ /\.gz\z/i) {
    		if (my $res = try_gzip_static($env, $h, $path, $type)) {
    			return $res;
    		}
    	}
    
    	my $in;
    	if ($env->{REQUEST_METHOD} eq 'HEAD') {
    		return r(404) unless -f $path && -r _; # in case it's a FIFO :P
    	} else { # GET, callers should've already filtered out other methods
    		if (!sysopen($in, $path, O_RDONLY|O_NONBLOCK)) {
    			return r(404) if $! == ENOENT || $! == ENOTDIR;
    			return r(403) if $! == EACCES;
    			return r(500);
    		}
    		return r(404) unless -f $in;
    	}
    	my $size = -s _; # bare "_" reuses "struct stat" from "-f" above
    	my $mtime = time2str((stat(_))[9]);
    
    	if (my $ims = $env->{HTTP_IF_MODIFIED_SINCE}) {
    		return [ 304, [], [] ] if $mtime eq $ims;
    	}
    
    	my $len = $size;
    	my $code = 200;
    	push @$h, 'Content-Type', $type;
    	my $off = 0;
    	if (($env->{HTTP_RANGE} || '') =~ /\bbytes=([0-9]*)-([0-9]*)\z/) {
    		($code, $off, $len) = setup_range($env, $in, $h, $1, $2, $size);
    		return $code if ref($code);
    	}
    	push @$h, 'Content-Length', $len, 'Last-Modified', $mtime;
    	[ $code, $h, $in ? getline_response($env, $in, $off, $len, $path) : [] ]
    }
    
    # called by PSGI servers on each response chunk:
    sub getline {
    	my ($self) = @_;
    
    	# avoid buffering, by becoming the buffer! (public-inbox-httpd)
    	if (my $tmpio = delete $self->{bypass}) {
    		my $http = pop @$tmpio; # PublicInbox::HTTP
    		push @{$http->{wbuf}}, $tmpio; # [ $in, $off, $len ]
    		$http->flush_write;
    		return; # undef, EOF
    	}
    
    	# generic PSGI runs this:
    	my $len = $self->{len} or return; # undef, tells server we're done
    	my $n = 8192;
    	$n = $len if $len < $n;
    	sysseek($self->{in}, $self->{off}, SEEK_SET) or
    			die "sysseek ($self->{path}): $!";
    	my $r = sysread($self->{in}, my $buf, $n);
    	if (defined $r && $r > 0) { # success!
    		$self->{len} = $len - $r;
    		$self->{off} += $r;
    		return $buf;
    	}
    	my $m = defined $r ? "EOF with $len bytes left" : "read error: $!";
    	die "$self->{path} $m, dropping client socket\n";
    }
    
    sub close {} # noop, called by PSGI server, just let everything go out-of-scope
    
    # OO interface for use as a Plack app
    sub new {
    	my ($class, %opt) = @_;
    	my $index = $opt{'index'} // [ 'index.html' ];
    	$index = [ $index ] if defined($index) && ref($index) ne 'ARRAY';
    	$index = undef if scalar(@$index) == 0;
    	my $style = $opt{style};
    	if (defined $style) {
    		$style = \$style unless ref($style);
    	}
    	my $docroot = $opt{docroot};
    	die "`docroot' not set" unless defined($docroot) && $docroot ne '';
    	bless {
    		docroot => $docroot,
    		index => $index,
    		autoindex => $opt{autoindex},
    		style => $style // \$STYLE,
    	}, $class;
    }
    
    # PATH_INFO is decoded, and we want the undecoded original
    my %path_re_cache;
    sub path_info_raw ($) {
    	my ($env) = @_;
    	my $sn = $env->{SCRIPT_NAME};
    	my $re = $path_re_cache{$sn} //= do {
    		$sn = '/'.$sn unless index($sn, '/') == 0;
    		$sn =~ s!/\z!!;
    		qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!;
    	};
    	$env->{REQUEST_URI} =~ $re ? $1 : $env->{PATH_INFO};
    }
    
    sub redirect_slash ($) {
    	my ($env) = @_;
    	my $url = $env->{'psgi.url_scheme'} . '://';
    	my $host_port = $env->{HTTP_HOST} //
    		"$env->{SERVER_NAME}:$env->{SERVER_PORT}";
    	$url .= $host_port . path_info_raw($env) . '/';
    	my $body = "Redirecting to $url\n";
    	[ 302, [ qw(Content-Type text/plain), 'Location', $url,
    		'Content-Length', length($body) ], [ $body ] ]
    }
    
    sub human_size ($) {
    	my ($size) = @_;
    	my $suffix = '';
    	for my $s (qw(K M G T P)) {
    		last if $size < 1024;
    		$size /= 1024;
    		if ($size <= 1024) {
    			$suffix = $s;
    			last;
    		}
    	}
    	sprintf('%lu', $size).$suffix;
    }
    
    # by default, this returns "index.html" if it exists for a given directory
    # It'll generate a directory listing, (autoindex).
    # May be disabled by setting autoindex => 0
    sub dir_response ($$$) {
    	my ($self, $env, $fs_path) = @_;
    	if (my $index = $self->{'index'}) { # serve index.html or similar
    		for my $html (@$index) {
    			my $p = $fs_path . $html;
    			my $res = response($env, [], $p);
    			return $res if $res->[0] != 404;
    		}
    	}
    	return r(404) unless $self->{autoindex};
    	opendir(my $dh, $fs_path) or do {
    		return r(404) if ($! == ENOENT || $! == ENOTDIR);
    		return r(403) if $! == EACCES;
    		return r(500);
    	};
    	my @entries = grep(!/\A\./, readdir($dh));
    	$dh = undef;
    	my (%dirs, %other, %want_gz);
    	my $path_info = $env->{PATH_INFO};
    	push @entries, '..' if $path_info ne '/';
    	for my $base (@entries) {
    		my $href = ascii_html(uri_escape_utf8($base));
    		my $name = ascii_html($base);
    		my @st = stat($fs_path . $base) or next; # unlikely
    		my ($gzipped, $uncompressed, $hsize);
    		my $entry = '';
    		my $mtime = $st[9];
    		if (-d _) {
    			$href .= '/';
    			$name .= '/';
    			$hsize = '-';
    			$dirs{"$base\0$mtime"} = \$entry;
    		} elsif (-f _) {
    			$other{"$base\0$mtime"} = \$entry;
    			if ($base !~ /\.gz\z/i) {
    				$want_gz{"$base.gz\0$mtime"} = undef;
    			}
    			$hsize = human_size($st[7]);
    		} else {
    			next;
    		}
    		# 54 = 80 - (SP length(strftime(%Y-%m-%d %k:%M)) SP human_size)
    		$hsize = sprintf('% 8s', $hsize);
    		my $pad = 54 - length($name);
    		$pad = 1 if $pad <= 0;
    		$entry .= qq($name) . (' ' x $pad);
    		$mtime = strftime('%Y-%m-%d %k:%M', gmtime($mtime));
    		$entry .= $mtime . $hsize;
    	}
    
    	# filter out '.gz' files as long as the mtime matches the
    	# uncompressed version
    	delete(@other{keys %want_gz});
    	@entries = ((map { ${$dirs{$_}} } sort keys %dirs),
    			(map { ${$other{$_}} } sort keys %other));
    
    	my $path_info_html = ascii_html($path_info);
    	my $h = [qw(Content-Type text/html Content-Length), undef];
    	my $gzf = gzf_maybe($h, $env);
    	$gzf->zmore("Index of $path_info_html" .
    		${$self->{style}} .
    		"
    Index of $path_info_html

    \n");
    	$gzf->zmore(join("\n", @entries));
    	my $out = $gzf->zflush("

    \n"); $h->[3] = length($out); [ 200, $h, [ $out ] ] } sub call { # PSGI app endpoint my ($self, $env) = @_; return r(405) if $env->{REQUEST_METHOD} !~ /\A(?:GET|HEAD)\z/; my $path_info = $env->{PATH_INFO}; return r(403) if index($path_info, "\0") >= 0; my (@parts) = split(m!/+!, $path_info, -1); return r(403) if grep(/\A(?:\.\.)\z/, @parts) || $parts[0] ne ''; my $fs_path = join('/', $self->{docroot}, @parts); return dir_response($self, $env, $fs_path) if $parts[-1] eq ''; my $res = response($env, [], $fs_path); $res->[0] == 404 && -d $fs_path ? redirect_slash($env) : $res; } 1; public-inbox-1.9.0/lib/PublicInbox/WwwStream.pm000066400000000000000000000134571430031475700214140ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # # HTML body stream for which yields getline+close methods for # generic PSGI servers and callbacks for public-inbox-httpd. # # See PublicInbox::GzipFilter parent class for more info. package PublicInbox::WwwStream; use strict; use v5.10.1; use parent qw(Exporter PublicInbox::GzipFilter); our @EXPORT_OK = qw(html_oneshot); use PublicInbox::Hval qw(ascii_html prurl ts2str); our $CODE_URL = [ qw( http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git https://public-inbox.org/public-inbox.git) ]; sub base_url ($) { my $ctx = shift; my $base_url = $ctx->{ibx}->base_url($ctx->{env}); chop $base_url; # no trailing slash for clone $base_url; } sub init { my ($ctx, $cb) = @_; $ctx->{cb} = $cb; $ctx->{base_url} = base_url($ctx); bless $ctx, __PACKAGE__; } sub async_eml { # for async_blob_cb my ($ctx, $eml) = @_; $ctx->write($ctx->{cb}->($ctx, $eml)); } sub html_top ($) { my ($ctx) = @_; my $ibx = $ctx->{ibx}; my $desc = ascii_html($ibx->description); my $title = delete($ctx->{-title_html}) // $desc; my $upfx = $ctx->{-upfx} || ''; my $atom = $ctx->{-atom} || $upfx.'new.atom'; my $top = "$desc"; if (my $t_max = $ctx->{-t_max}) { $t_max = ts2str($t_max); $top = qq($top); # we had some kind of query, link to /$INBOX/?t=YYYYMMDDhhmmss } elsif ($ctx->{qp}->{t}) { $top = qq($top); } elsif (length($upfx)) { $top = qq($top); } my $code = $ibx->{coderepo} ? qq( / code) : ''; # id=mirror must exist for legacy bookmarks my $links = qq(help / ). qq(color / ). qq(mirror$code / ). qq(Atom feed); if ($ibx->isrch) { my $q_val = delete($ctx->{-q_value_html}) // ''; $q_val = qq(\nvalue="$q_val") if $q_val ne ''; # XXX gross, for SearchView.pm my $extra = delete($ctx->{-extra_form_html}) // ''; my $action = $upfx eq '' ? './' : $upfx; $top = qq{
    $top} .
    			  qq{\n} .
    			  $extra .
    			  qq{} .
    			  ' ' . $links .
    			  q{
    } } else { $top = '
    ' . $top . "\n" . $links . '
    '; } "$title" . qq() . $ctx->{www}->style($upfx) . ''. $top . (delete($ctx->{-html_tip}) // ''); } sub coderepos ($) { my ($ctx) = @_; my $cr = $ctx->{ibx}->{coderepo} // return (); my $cfg = $ctx->{www}->{pi_cfg}; my $upfx = ($ctx->{-upfx} // ''). '../'; my $pfx = $ctx->{base_url} //= $ctx->base_url; my $up = $upfx =~ tr!/!/!; $pfx =~ s!/[^/]+\z!/! for (1..$up); my @ret = ('' . 'Code repositories for project(s) associated with this '. $ctx->{ibx}->thing_type . "\n"); for my $cr_name (@$cr) { my $urls = $cfg->get_all("coderepo.$cr_name.cgiturl"); if ($urls) { for (@$urls) { my $u = m!\A(?:[a-z\+]+:)?//! ? $_ : $pfx.$_; $u = ascii_html(prurl($ctx->{env}, $u)); $ret[0] .= qq(\n\t$u); } } else { $ret[0] .= qq[\n\t$cr_name.git (no URL configured)]; } } @ret; # may be empty, this sub is called as an arg for join() } sub _html_end { my ($ctx) = @_; my $upfx = $ctx->{-upfx} || ''; my $m = "${upfx}_/text/mirror/"; my $x; if ($ctx->{ibx}->can('cloneurl')) { $x = <mirroring instructions for how to clone and mirror all data and code used for this inbox EOF my $has_nntp = @{$ctx->{ibx}->nntp_url($ctx)}; my $has_imap = @{$ctx->{ibx}->imap_url($ctx)}; if ($has_nntp || $has_imap) { substr($x, -1, 1) = ";\n"; # s/\n/;\n if ($has_nntp && $has_imap) { $x .= <mirroring instructions on how to clone and mirror all data and code used by this external index. EOF } chomp $x; '
    '.join("\n\n", coderepos($ctx), $x).'
    ' } # callback for HTTP.pm (and any other PSGI servers) sub getline { my ($ctx) = @_; my $cb = $ctx->{cb} or return; while (defined(my $x = $cb->($ctx))) { # x = smsg or scalar non-ref if (ref($x)) { # smsg my $eml = $ctx->{ibx}->smsg_eml($x) or next; $ctx->{smsg} = $x; return $ctx->translate($cb->($ctx, $eml)); } else { # scalar return $ctx->translate($x); } } delete $ctx->{cb}; $ctx->zflush(_html_end($ctx)); } sub html_oneshot ($$;$) { my ($ctx, $code, $sref) = @_; my $res_hdr = [ 'Content-Type' => 'text/html; charset=UTF-8', 'Content-Length' => undef ]; bless $ctx, __PACKAGE__; $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res_hdr, $ctx->{env}); $ctx->{base_url} // do { $ctx->zmore(html_top($ctx)); $ctx->{base_url} = base_url($ctx); }; $ctx->zmore($$sref) if $sref; my $bdy = $ctx->zflush(_html_end($ctx)); $res_hdr->[3] = length($bdy); [ $code, $res_hdr, [ $bdy ] ] } sub async_next ($) { my ($http) = @_; # PublicInbox::HTTP my $ctx = $http->{forward} or return; eval { if (my $smsg = $ctx->{smsg} = $ctx->{cb}->($ctx)) { $ctx->smsg_blob($smsg); } else { $ctx->write(_html_end($ctx)); $ctx->close; # GzipFilter->close } }; warn "E: $@" if $@; } sub aresponse { my ($ctx, $code, $cb) = @_; my $res_hdr = [ 'Content-Type' => 'text/html; charset=UTF-8' ]; init($ctx, $cb); $ctx->psgi_response($code, $res_hdr); } 1; public-inbox-1.9.0/lib/PublicInbox/WwwText.pm000066400000000000000000000354051430031475700211020ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # used for displaying help texts and other non-mail content package PublicInbox::WwwText; use strict; use v5.10.1; use PublicInbox::Linkify; use PublicInbox::WwwStream; use PublicInbox::Hval qw(ascii_html prurl); use HTTP::Date qw(time2str); use URI::Escape qw(uri_escape_utf8); use PublicInbox::GzipFilter qw(gzf_maybe); our $QP_URL = 'https://xapian.org/docs/queryparser.html'; our $WIKI_URL = 'https://en.wikipedia.org/wiki'; my $hl = eval { require PublicInbox::HlMod; PublicInbox::HlMod->new }; # /$INBOX/_/text/$KEY/ # KEY may contain slashes # For now, "help" is the only supported $KEY sub get_text { my ($ctx, $key) = @_; my $code = 200; $key //= 'help'; # this 302s to _/text/help/ # get the raw text the same way we get mboxrds my $raw = ($key =~ s!/raw\z!!); my $have_tslash = ($key =~ s!/\z!!) if !$raw; my $txt = ''; my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ]; if (!_default_text($ctx, $key, $hdr, \$txt)) { $code = 404; $txt = "404 Not Found ($key)\n"; } my $env = $ctx->{env}; if ($raw) { $txt = gzf_maybe($hdr, $env)->zflush($txt) if $code == 200; $hdr->[3] = length($txt); return [ $code, $hdr, [ $txt ] ] } # enforce trailing slash for "wget -r" compatibility if (!$have_tslash && $code == 200) { my $url = $ctx->{ibx}->base_url($env); $url .= "_/text/$key/"; return [ 302, [ 'Content-Type', 'text/plain', 'Location', $url ], [ "Redirecting to $url\n" ] ]; } # Follow git commit message conventions, # first line is the Subject/title my ($title) = ($txt =~ /\A([^\n]*)/s); $ctx->{-title_html} = ascii_html($title); my $nslash = ($key =~ tr!/!/!); $ctx->{-upfx} = '../../../' . ('../' x $nslash); my $l = PublicInbox::Linkify->new; $l->linkify_1($txt); if ($hl) { $hl->do_hl_text(\$txt); } else { $txt = ascii_html($txt); } $txt = '
    ' . $l->linkify_2($txt) . '
    '; $txt =~ s!\bPOP3\b!POP3!; $txt =~ s!\b(Newsgroups?)\b!$1!; $txt =~ s!\bIMAP\b!IMAP!; PublicInbox::WwwStream::html_oneshot($ctx, $code, \$txt); } sub _srch_prefix ($$) { my ($ibx, $txt) = @_; my $pad = 0; my $htxt = ''; my $help = $ibx->isrch->help; my $i; for ($i = 0; $i < @$help; $i += 2) { my $pfx = $help->[$i]; my $n = length($pfx); $pad = $n if $n > $pad; $htxt .= $pfx . "\0"; $htxt .= $help->[$i + 1]; $htxt .= "\f\n"; } $pad += 2; my $padding = ' ' x ($pad + 4); $htxt =~ s/^/$padding/gms; $htxt =~ s/^$padding(\S+)\0/" $1".(' ' x ($pad - length($1)))/egms; $htxt =~ s/\f\n/\n/gs; $$txt .= $htxt; 1; } sub _colors_help ($$) { my ($ctx, $txt) = @_; my $ibx = $ctx->{ibx}; my $env = $ctx->{env}; my $base_url = $ibx->base_url($env); $$txt .= "color customization for $base_url\n"; $$txt .= <{ibx}->{coderepo} // return; # note: this doesn't preserve cgitrc layout, since we parse cgitrc # and drop the original structure $$txt .= "\tcoderepo = $_\n" for @$cr; $$txt .= <<'EOF'; ; `coderepo' entries allows blob reconstruction via patch emails if ; the inbox is indexed with Xapian. `@@ @@' ; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/, ; an HTTP endpoint which reconstructs git blobs via git-apply(1). EOF my $pi_cfg = $ctx->{www}->{pi_cfg}; for my $cr_name (@$cr) { my $urls = $pi_cfg->get_all("coderepo.$cr_name.cgiturl"); my $path = "/path/to/$cr_name"; $cr_name = dq_escape($cr_name); $$txt .= qq([coderepo "$cr_name"]\n); if ($urls && scalar(@$urls)) { $$txt .= "\t; "; $$txt .= join(" ||\n\t;\t", map {; my $dst = $path; if ($path !~ m![a-z0-9_/\.\-]!i) { $dst = '"'.dq_escape($dst).'"'; } qq(git clone $_ $dst); } @$urls); $$txt .= "\n"; } $$txt .= "\tdir = $path\n"; $$txt .= "\tcgiturl = https://example.com/"; $$txt .= uri_escape_utf8($cr_name, '^A-Za-z0-9\-\._~/')."\n"; } } # n.b. this is a perfect candidate for memoization sub inbox_config ($$$) { my ($ctx, $hdr, $txt) = @_; my $ibx = $ctx->{ibx}; push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config'; my $t = eval { $ibx->mm->created_at }; push(@$hdr, 'Last-Modified', time2str($t)) if $t; my $name = dq_escape($ibx->{name}); my $inboxdir = '/path/to/top-level-inbox'; my $base_url = $ibx->base_url($ctx->{env}); $$txt .= <{$k}) or next; $$txt .= "\t$k = $_\n" for @$v; } if (my $altid = $ibx->{altid}) { my $altid_map = $ibx->altid_map; $$txt .= <{$k}) or next; $$txt .= "\t$k = $v\n"; } $$txt .= "\timapmirror = $_\n" for (@{$ibx->imap_url($ctx)}); $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url($ctx)}); _coderepo_config($ctx, $txt); 1; } # n.b. this is a perfect candidate for memoization sub extindex_config ($$$) { my ($ctx, $hdr, $txt) = @_; my $ibx = $ctx->{ibx}; push @$hdr, 'Content-Disposition', 'inline; filename=extindex.config'; my $name = dq_escape($ibx->{name}); my $base_url = $ibx->base_url($ctx->{env}); $$txt .= <{$k}) or next; $$txt .= "\t$k = $v\n"; } _coderepo_config($ctx, $txt); 1; } sub coderepos_raw ($$) { my ($ctx, $top_url) = @_; my $cr = $ctx->{ibx}->{coderepo} // return (); my $cfg = $ctx->{www}->{pi_cfg}; my @ret = ('Code repositories for project(s) associated with this '. $ctx->{ibx}->thing_type . "\n"); for my $cr_name (@$cr) { my $urls = $cfg->get_all("coderepo.$cr_name.cgiturl"); if ($urls) { for (@$urls) { my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ : $top_url.$_; $ret[0] .= "\n\t" . prurl($ctx->{env}, $u); } } else { $ret[0] .= qq[\n\t$cr_name.git (no URL configured)]; } } @ret; # may be empty, this sub is called as an arg for join() } sub _add_non_http_urls ($$) { my ($ctx, $txt) = @_; $ctx->{ibx}->can('nntp_url') or return; # TODO extindex can have IMAP my $urls = $ctx->{ibx}->imap_url($ctx); if (@$urls) { $urls = join("\n ", @$urls); $urls =~ s!://([^/@]+)/!://;AUTH=ANONYMOUS\@$1/!sg; $$txt .= <{ibx}->nntp_url($ctx); if (@$urls) { $$txt .= @$urls == 1 ? "\nNewsgroup" : "\nNewsgroups are"; $$txt .= ' available over NNTP:'; $$txt .= "\n " . join("\n ", @$urls) . "\n"; } $urls = $ctx->{ibx}->pop3_url($ctx); if (@$urls) { $urls = join("\n ", @$urls); $$txt .= <{ibx}->{newsgroup} where \$(uuidgen) in the output of the `uuidgen' command on your system. The UUID in the username functions as a private cookie (don't share it). Idle accounts will expire periodically. EOM } } sub _add_onion_note ($) { my ($txt) = @_; $$txt =~ m!\b[^:]+://\w+\.onion/!i and $$txt .= <{ibx}; my $base_url = $ibx->base_url($ctx->{env}); chop $base_url; # no trailing slash for "git clone" my $dir = (split(m!/!, $base_url))[-1]; my %seen = ($base_url => 1); my $top_url = $base_url; $top_url =~ s!/[^/]+\z!/!; $$txt .= "public-inbox mirroring instructions\n\n"; if ($ibx->can('cloneurl')) { # PublicInbox::Inbox $$txt .= "This public inbox may be cloned and mirrored by anyone:\n"; my @urls; my $max = $ibx->max_git_epoch; # TODO: some of these URLs may be too long and we may need to # do something like code_footer() above, but these are local # admin-defined if (defined($max)) { # v2 for my $i (0..$max) { # old epochs my be deleted: -d "$ibx->{inboxdir}/git/$i.git" or next; my $url = "$base_url/$i"; $seen{$url} = 1; push @urls, "$url $dir/git/$i.git"; } my $nr = scalar(@urls); if ($nr > 1) { chomp($$txt .= <cloneurl}) { next if $seen{$u}++; push @urls, $u; } $$txt .= "\n"; $$txt .= join('', map { " git clone --mirror $_\n" } @urls); my $addrs = $ibx->{address} // 'inbox@example.com'; my $ng = $ibx->{newsgroup} // ''; substr($ng, 0, 0, ' --ng ') if $ng; $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY'; my $v = defined $max ? '-V2' : '-V1'; $$txt .= <{name} ./$dir $base_url \\ $addrs public-inbox-index ./$dir EOF } else { # PublicInbox::ExtSearch $$txt .= <{www}->{pi_cfg}->{lc('publicInbox.wwwListing')}; if (($v // '') =~ /\A(?:all|match=domain)\z/) { $$txt .= <{env}, $PublicInbox::WwwStream::CODE_URL); $$txt .= join("\n\n", coderepos_raw($ctx, $top_url), # may be empty "AGPL code for this site:\n git clone $code_url"); 1; } sub _default_text ($$$$) { my ($ctx, $key, $hdr, $txt) = @_; if ($key eq 'mirror') { return _mirror_help($ctx, $txt); } elsif ($key eq 'color') { return _colors_help($ctx, $txt); } elsif ($key eq 'config') { return $ctx->{ibx}->can('cloneurl') ? inbox_config($ctx, $hdr, $txt) : extindex_config($ctx, $hdr, $txt); } return if $key ne 'help'; # TODO more keys? my $ibx = $ctx->{ibx}; my $base_url = $ibx->base_url($ctx->{env}); $$txt .= <') into the URL. Forward slash ('/') characters in the Message-IDs need to be escaped as "%2F" (without quotes). Thus, it is possible to retrieve any message by its Message-ID by going to: $base_url/ (without the '<' or '>') Message-IDs are described at: $WIKI_URL/Message-ID EOF # n.b. we use the Xapian DB for any regeneratable, # order-of-arrival-independent data. if ($ibx->isrch) { $$txt .= <over) { $$txt .= </T/#u Loads the thread belonging to the given in flat chronological order. The "#u" anchor focuses the browser on the given . * $base_url/t/#u Loads the thread belonging to the given in threaded order with nesting. For deep threads, this requires a wide display or horizontal scrolling. Both of these HTML endpoints are suitable for offline reading using the thread overview at the bottom of each page. The gzipped mbox for a thread is available for downloading and importing into your favorite mail client: * $base_url/t.mbox.gz We use the mboxrd variant of the mbox format described at: $WIKI_URL/Mbox Users of feed readers may follow a particular thread using: * $base_url/t.atom Which loads the thread in Atom Syndication Standard described at Wikipedia and RFC4287: $WIKI_URL/Atom_(standard) https://tools.ietf.org/html/rfc4287 Atom Threading Extensions (RFC4685) are supported: https://tools.ietf.org/html/rfc4685 EOF } # $over _add_non_http_urls($ctx, \(my $note = '')); $note and $note =~ s/^/ /gms and $$txt .= < # License: AGPL-3.0+ package PublicInbox::Xapcmd; use strict; use PublicInbox::Spawn qw(which popen_rd); use PublicInbox::Syscall; use PublicInbox::Admin qw(setup_signals); use PublicInbox::Over; use PublicInbox::SearchIdx; use File::Temp 0.19 (); # ->newdir use File::Path qw(remove_tree); use POSIX qw(WNOHANG _exit); # support testing with dev versions of Xapian which installs # commands with a version number suffix (e.g. "xapian-compact-1.5") our $XAPIAN_COMPACT = $ENV{XAPIAN_COMPACT} || 'xapian-compact'; our @COMPACT_OPT = qw(jobs|j=i quiet|q blocksize|b=s no-full|n fuller|F); sub commit_changes ($$$$) { my ($ibx, $im, $tmp, $opt) = @_; my $reshard = $opt->{reshard}; $SIG{INT} or die 'BUG: $SIG{INT} not handled'; my (@old_shard, $over_chg); # Sort shards highest-to-lowest, since ->xdb_shards_flat # determines the number of shards to load based on the max; # and we'd rather xdb_shards_flat to momentarily fail rather # than load out-of-date shards my @order = sort { my ($x) = ($a =~ m!/([0-9]+)/*\z!); my ($y) = ($b =~ m!/([0-9]+)/*\z!); ($y // -1) <=> ($x // -1) # we may have non-shards } keys %$tmp; my ($dname) = ($order[0] =~ m!(.*/)[^/]+/*\z!); my $mode = (stat($dname))[2]; for my $old (@order) { next if $old eq ''; # no invalid paths my $newdir = $tmp->{$old}; my $have_old = -e $old; if (!$have_old && !defined($opt->{reshard})) { die "failed to stat($old): $!"; } my $new = $newdir->dirname if defined($newdir); my $over = "$old/over.sqlite3"; if (-f $over) { # only for v1, v2 over is untouched defined $new or die "BUG: $over exists when culling v2"; $over = PublicInbox::Over->new($over); my $tmp_over = "$new/over.sqlite3"; $over->dbh->sqlite_backup_to_file($tmp_over); $over = undef; $over_chg = 1; } if (!defined($new)) { # culled shard push @old_shard, $old; next; } chmod($mode & 07777, $new) or die "chmod($new): $!\n"; if ($have_old) { rename($old, "$new/old") or die "rename $old => $new/old: $!\n"; } rename($new, $old) or die "rename $new => $old: $!\n"; push @old_shard, "$old/old" if $have_old; } # trigger ->check_inodes in read-only daemons syswrite($im->{lockfh}, '.') if $over_chg && $im; remove_tree(@old_shard); $tmp = undef; if (!$opt->{-coarse_lock}) { $opt->{-skip_lock} = 1; $im //= $ibx if $ibx->can('eidx_sync'); if ($im->can('count_shards')) { # v2w or eidx my $pr = $opt->{-progress}; my $n = $im->count_shards; if (defined $reshard && $n != $reshard) { die "BUG: counted $n shards after resharding to $reshard"; } my $prev = $im->{shards}; if ($pr && $prev != $n) { $pr->("shard count changed: $prev => $n\n"); $im->{shards} = $n; } } my $env = $opt->{-idx_env}; local %ENV = (%ENV, %$env) if $env; if ($ibx->can('eidx_sync')) { $ibx->eidx_sync($opt); } else { PublicInbox::Admin::index_inbox($ibx, $im, $opt); } } } sub cb_spawn { my ($cb, $args, $opt) = @_; # $cb = cpdb() or compact() my $seed = rand(0xffffffff); my $pid = fork // die "fork: $!"; return $pid if $pid > 0; srand($seed); $SIG{__DIE__} = sub { warn @_; _exit(1) }; # don't jump up stack $cb->($args, $opt); _exit(0); } sub runnable_or_die ($) { my ($exe) = @_; which($exe) or die "$exe not found in PATH\n"; } sub prepare_reindex ($$) { my ($ibx, $opt) = @_; if ($ibx->can('eidx_sync')) { # no prep needed for ExtSearchIdx } elsif ($ibx->version == 1) { my $dir = $ibx->search->xdir(1); my $xdb = $PublicInbox::Search::X{Database}->new($dir); if (my $lc = $xdb->get_metadata('last_commit')) { $opt->{reindex}->{from} = $lc; } } else { # v2 my $max = $ibx->max_git_epoch // return; my $from = $opt->{reindex}->{from}; my $mm = $ibx->mm; my $v = PublicInbox::Search::SCHEMA_VERSION(); foreach my $i (0..$max) { $from->[$i] = $mm->last_commit_xap($v, $i); } } } sub same_fs_or_die ($$) { my ($x, $y) = @_; return if ((stat($x))[0] == (stat($y))[0]); # 0 - st_dev die "$x and $y reside on different filesystems\n"; } sub kill_pids { my ($sig, $pids) = @_; kill($sig, keys %$pids); # pids may be empty } sub process_queue { my ($queue, $cb, $opt) = @_; my $max = $opt->{jobs} // scalar(@$queue); if ($max <= 1) { while (defined(my $args = shift @$queue)) { $cb->($args, $opt); } return; } # run in parallel: my %pids; local @SIG{keys %SIG} = values %SIG; setup_signals(\&kill_pids, \%pids); while (@$queue) { while (scalar(keys(%pids)) < $max && scalar(@$queue)) { my $args = shift @$queue; $pids{cb_spawn($cb, $args, $opt)} = $args; } my $flags = 0; while (scalar keys %pids) { my $pid = waitpid(-1, $flags) or last; last if $pid < 0; my $args = delete $pids{$pid}; if ($args) { die join(' ', @$args)." failed: $?\n" if $?; } else { warn "unknown PID($pid) reaped: $?\n"; } $flags = WNOHANG if scalar(@$queue); } } } sub prepare_run { my ($ibx, $opt) = @_; my $tmp = {}; # old shard dir => File::Temp->newdir object or undef my @queue; # ([old//src,newdir]) - list of args for cpdb() or compact() my ($old, $misc_ok); if ($ibx->can('eidx_sync')) { $misc_ok = 1; $old = $ibx->xdir(1); } elsif (my $srch = $ibx->search) { $old = $srch->xdir(1); } if (defined $old) { -d $old or die "$old does not exist\n"; } my $reshard = $opt->{reshard}; if (defined $reshard && $reshard <= 0) { die "--reshard must be a positive number\n"; } # we want temporary directories to be as deep as possible, # so v2 shards can keep "xap$SCHEMA_VERSION" on a separate FS. if (defined($old) && $ibx->can('version') && $ibx->version == 1) { if (defined $reshard) { warn "--reshard=$reshard ignored for v1 $ibx->{inboxdir}\n"; } my ($dir) = ($old =~ m!(.*?/)[^/]+/*\z!); same_fs_or_die($dir, $old); my $v = PublicInbox::Search::SCHEMA_VERSION(); my $wip = File::Temp->newdir("xapian$v-XXXX", DIR => $dir); $tmp->{$old} = $wip; PublicInbox::Syscall::nodatacow_dir($wip->dirname); push @queue, [ $old, $wip ]; } elsif (defined $old) { opendir my $dh, $old or die "Failed to opendir $old: $!\n"; my @old_shards; while (defined(my $dn = readdir($dh))) { if ($dn =~ /\A[0-9]+\z/) { push @old_shards, $dn; } elsif ($dn eq '.' || $dn eq '..') { } elsif ($dn =~ /\Aover\.sqlite3/) { } elsif ($dn eq 'misc' && $misc_ok) { } else { warn "W: skipping unknown dir: $old/$dn\n" } } die "No Xapian shards found in $old\n" unless @old_shards; my ($src, $max_shard); if (!defined($reshard) || $reshard == scalar(@old_shards)) { # 1:1 copy $max_shard = scalar(@old_shards) - 1; } else { # M:N copy $max_shard = $reshard - 1; $src = [ map { "$old/$_" } @old_shards ]; } foreach my $dn (0..$max_shard) { my $wip = File::Temp->newdir("$dn-XXXX", DIR => $old); same_fs_or_die($old, $wip->dirname); my $cur = "$old/$dn"; push @queue, [ $src // $cur , $wip ]; PublicInbox::Syscall::nodatacow_dir($wip->dirname); $tmp->{$cur} = $wip; } # mark old shards to be unlinked if ($src) { $tmp->{$_} ||= undef for @$src; } } ($tmp, \@queue); } sub check_compact () { runnable_or_die($XAPIAN_COMPACT) } sub _run { # with_umask callback my ($ibx, $cb, $opt) = @_; my $im = $ibx->can('importer') ? $ibx->importer(0) : undef; ($im // $ibx)->lock_acquire; my ($tmp, $queue) = prepare_run($ibx, $opt); # fine-grained locking if we prepare for reindex if (!$opt->{-coarse_lock}) { prepare_reindex($ibx, $opt); ($im // $ibx)->lock_release; } $ibx->cleanup if $ibx->can('cleanup'); process_queue($queue, $cb, $opt); ($im // $ibx)->lock_acquire if !$opt->{-coarse_lock}; commit_changes($ibx, $im, $tmp, $opt); } sub run { my ($ibx, $task, $opt) = @_; # task = 'cpdb' or 'compact' my $cb = \&$task; PublicInbox::Admin::progress_prepare($opt ||= {}); my $dir; for my $fld (qw(inboxdir topdir)) { my $d = $ibx->{$fld} // next; -d $d or die "$fld=$d does not exist\n"; $dir = $d; last; } check_compact() if $opt->{compact} && $ibx->search; if (!$ibx->can('eidx_sync') && !$opt->{-coarse_lock}) { # per-epoch ranges for v2 # v1:{ from => $OID }, v2:{ from => [ $OID, $OID, $OID ] } } $opt->{reindex} = { from => $ibx->version == 1 ? '' : [] }; PublicInbox::SearchIdx::load_xapian_writable(); } local @SIG{keys %SIG} = values %SIG; setup_signals(); $ibx->with_umask(\&_run, $ibx, $cb, $opt); } sub cpdb_retryable ($$) { my ($src, $pfx) = @_; if (ref($@) =~ /\bDatabaseModifiedError\b/) { warn "$pfx Xapian DB modified, reopening and retrying\n"; $src->reopen; return 1; } if ($@) { warn "$pfx E: ", ref($@), "\n"; die; } 0; } sub progress_pfx ($) { my ($wip) = @_; # tempdir v2: ([0-9])+-XXXX my @p = split('/', $wip); # return "xap15/0" for v2, or "xapian15" for v1: ($p[-1] =~ /\A([0-9]+)/) ? "$p[-2]/$1" : $p[-1]; } sub kill_compact { # setup_signals callback my ($sig, $pidref) = @_; kill($sig, $$pidref) if defined($$pidref); } # xapian-compact wrapper sub compact ($$) { # cb_spawn callback my ($args, $opt) = @_; my ($src, $newdir) = @$args; my $dst = ref($newdir) ? $newdir->dirname : $newdir; my $pfx = $opt->{-progress_pfx} ||= progress_pfx($src); my $pr = $opt->{-progress}; my $rdr = {}; foreach my $fd (0..2) { defined(my $dfd = $opt->{$fd}) or next; $rdr->{$fd} = $dfd; } # we rely on --no-renumber to keep docids synched to NNTP my $cmd = [ $XAPIAN_COMPACT, '--no-renumber' ]; for my $sw (qw(no-full fuller)) { push @$cmd, "--$sw" if $opt->{$sw}; } for my $sw (qw(blocksize)) { defined(my $v = $opt->{$sw}) or next; push @$cmd, "--$sw", $v; } $pr->("$pfx `".join(' ', @$cmd)."'\n") if $pr; push @$cmd, $src, $dst; my ($rd, $pid); local @SIG{keys %SIG} = values %SIG; setup_signals(\&kill_compact, \$pid); ($rd, $pid) = popen_rd($cmd, undef, $rdr); while (<$rd>) { if ($pr) { s/\r/\r$pfx /g; $pr->("$pfx $_"); } } waitpid($pid, 0); die "@$cmd failed: \$?=$?\n" if $?; } sub cpdb_loop ($$$;$$) { my ($src, $dst, $pr_data, $cur_shard, $reshard) = @_; my ($pr, $fmt, $nr, $pfx); if ($pr_data) { $pr = $pr_data->{pr}; $fmt = $pr_data->{fmt}; $nr = \($pr_data->{nr}); $pfx = $pr_data->{pfx}; } my ($it, $end); do { eval { $it = $src->postlist_begin(''); $end = $src->postlist_end(''); }; } while (cpdb_retryable($src, $pfx)); do { eval { for (; $it != $end; $it++) { my $docid = $it->get_docid; if (defined $reshard) { my $dst_shard = $docid % $reshard; next if $dst_shard != $cur_shard; } my $doc = $src->get_document($docid); $dst->replace_document($docid, $doc); if ($pr_data && !(++$$nr & 1023)) { $pr->(sprintf($fmt, $$nr)); } } # unlike copydatabase(1), we don't copy spelling # and synonym data (or other user metadata) since # the Perl APIs don't expose iterators for them # (and public-inbox does not use those features) }; } while (cpdb_retryable($src, $pfx)); } # Like copydatabase(1), this is horribly slow; and it doesn't seem due # to the overhead of Perl. sub cpdb ($$) { # cb_spawn callback my ($args, $opt) = @_; my ($old, $newdir) = @$args; my $new = $newdir->dirname; my ($src, $cur_shard); my $reshard; PublicInbox::SearchIdx::load_xapian_writable(); my $XapianDatabase = $PublicInbox::Search::X{Database}; if (ref($old) eq 'ARRAY') { ($cur_shard) = ($new =~ m!(?:xap|ei)[0-9]+/([0-9]+)\b!); defined $cur_shard or die "BUG: could not extract shard # from $new"; $reshard = $opt->{reshard}; defined $reshard or die 'BUG: got array src w/o --reshard'; # resharding, M:N copy means have full read access foreach (@$old) { if ($src) { my $sub = $XapianDatabase->new($_); $src->add_database($sub); } else { $src = $XapianDatabase->new($_); } } } else { $src = $XapianDatabase->new($old); } my ($tmp, $ft); local @SIG{keys %SIG} = values %SIG; if ($opt->{compact}) { my ($dir) = ($new =~ m!(.*?/)[^/]+/*\z!); same_fs_or_die($dir, $new); $ft = File::Temp->newdir("$new.compact-XXXX", DIR => $dir); setup_signals(); $tmp = $ft->dirname; PublicInbox::Syscall::nodatacow_dir($tmp); } else { $tmp = $new; } # like copydatabase(1), be sure we don't overwrite anything in case # of other bugs: my $flag = eval($PublicInbox::Search::Xap.'::DB_CREATE()'); die if $@; my $XapianWritableDatabase = $PublicInbox::Search::X{WritableDatabase}; $flag |= $PublicInbox::SearchIdx::DB_NO_SYNC if !$opt->{fsync}; my $dst = $XapianWritableDatabase->new($tmp, $flag); my $pr = $opt->{-progress}; my $pfx = $opt->{-progress_pfx} = progress_pfx($new); my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr; do { eval { # update the only metadata key for v1: my $lc = $src->get_metadata('last_commit'); $dst->set_metadata('last_commit', $lc) if $lc; # only the first xapian shard (0) gets 'indexlevel' if ($new =~ m!(?:xapian[0-9]+|xap[0-9]+/0)\b!) { my $l = $src->get_metadata('indexlevel'); if ($l eq 'medium') { $dst->set_metadata('indexlevel', $l); } } if ($pr_data) { my $tot = $src->get_doccount; # we can only estimate when resharding, # because removed spam causes slight imbalance my $est = ''; if (defined $cur_shard && $reshard > 1) { $tot = int($tot/$reshard); $est = 'around '; } my $fmt = "$pfx % ".length($tot)."u/$tot\n"; $pr->("$pfx copying $est$tot documents\n"); $pr_data->{fmt} = $fmt; $pr_data->{total} = $tot; } }; } while (cpdb_retryable($src, $pfx)); if (defined $reshard) { # we rely on document IDs matching NNTP article number, # so we can't have the Xapian sharding DB support rewriting # document IDs. Thus we iterate through each shard # individually. $src = undef; foreach (@$old) { my $old = $XapianDatabase->new($_); cpdb_loop($old, $dst, $pr_data, $cur_shard, $reshard); } } else { cpdb_loop($src, $dst, $pr_data); } $pr->(sprintf($pr_data->{fmt}, $pr_data->{nr})) if $pr; return unless $opt->{compact}; $src = $dst = undef; # flushes and closes # this is probably the best place to do xapian-compact # since $dst isn't readable by HTTP or NNTP clients, yet: compact([ $tmp, $new ], $opt); remove_tree($tmp) or die "failed to remove $tmp: $!\n"; } 1; public-inbox-1.9.0/lib/PublicInbox/gcf2_libgit2.h000066400000000000000000000062151430031475700215160ustar00rootroot00000000000000/* * Copyright (C) 2020-2021 all contributors * License: AGPL-3.0+ * * libgit2 for Inline::C * Avoiding Git::Raw since it doesn't guarantee a stable API, * while libgit2 itself seems reasonably stable. */ #include #include #include #include static void croak_if_err(int rc, const char *msg) { if (rc != GIT_OK) { const git_error *e = giterr_last(); croak("%d %s (%s)", rc, msg, e ? e->message : "unknown"); } } SV *new() { git_odb *odb; SV *ref, *self; int rc = git_odb_new(&odb); croak_if_err(rc, "git_odb_new"); ref = newSViv((IV)odb); self = newRV_noinc(ref); sv_bless(self, gv_stashpv("PublicInbox::Gcf2", GV_ADD)); SvREADONLY_on(ref); return self; } static git_odb *odb_ptr(SV *self) { return (git_odb *)SvIV(SvRV(self)); } void DESTROY(SV *self) { git_odb_free(odb_ptr(self)); } /* needs "$GIT_DIR/objects", not $GIT_DIR */ void add_alternate(SV *self, const char *objects_path) { int rc = git_odb_add_disk_alternate(odb_ptr(self), objects_path); croak_if_err(rc, "git_odb_add_disk_alternate"); } #define CAPA(v) (sizeof(v) / sizeof((v)[0])) /* * returns true on success, false on failure * this requires an unabbreviated git OID */ int cat_oid(SV *self, int fd, SV *oidsv) { /* * adjust when libgit2 gets SHA-256 support, we return the * same header as git-cat-file --batch "$OID $TYPE $SIZE\n" */ char hdr[GIT_OID_HEXSZ + sizeof(" commit 18446744073709551615")]; struct iovec vec[3]; size_t nvec = CAPA(vec); git_oid oid; git_odb_object *object = NULL; int rc, err = 0; STRLEN oidlen; char *oidptr = SvPV(oidsv, oidlen); /* same trailer as git-cat-file --batch */ vec[2].iov_len = 1; vec[2].iov_base = "\n"; rc = git_oid_fromstrn(&oid, oidptr, oidlen); if (rc == GIT_OK) rc = git_odb_read(&object, odb_ptr(self), &oid); if (rc == GIT_OK) { vec[0].iov_base = hdr; vec[1].iov_base = (void *)git_odb_object_data(object); vec[1].iov_len = git_odb_object_size(object); git_oid_nfmt(hdr, GIT_OID_HEXSZ, git_odb_object_id(object)); vec[0].iov_len = GIT_OID_HEXSZ + snprintf(hdr + GIT_OID_HEXSZ, sizeof(hdr) - GIT_OID_HEXSZ, " %s %zu\n", git_object_type2string( git_odb_object_type(object)), vec[1].iov_len); } else { /* caller retries */ nvec = 0; } while (nvec && !err) { ssize_t w = writev(fd, vec + CAPA(vec) - nvec, nvec); if (w > 0) { size_t done = 0; size_t i; for (i = CAPA(vec) - nvec; i < CAPA(vec); i++) { if (w >= vec[i].iov_len) { /* fully written vec */ w -= vec[i].iov_len; done++; } else { /* partially written vec */ char *p = vec[i].iov_base; vec[i].iov_base = p + w; vec[i].iov_len -= w; break; } } nvec -= done; } else if (w < 0) { err = errno; switch (err) { case EAGAIN: { struct pollfd pfd; pfd.events = POLLOUT; pfd.fd = fd; poll(&pfd, 1, -1); } /* fall-through */ case EINTR: err = 0; } } else { /* w == 0 */ err = ENOSPC; } } if (object) git_odb_object_free(object); if (err) croak("writev error: %s", strerror(err)); return rc == GIT_OK; } public-inbox-1.9.0/sa_config/000077500000000000000000000000001430031475700160505ustar00rootroot00000000000000public-inbox-1.9.0/sa_config/Makefile000066400000000000000000000007371430031475700175170ustar00rootroot00000000000000INSTALL = install all:: @cat README ROOT_FILES = etc/spamassassin/public-inbox.pre install-root: @mkdir -p /etc/spamassassin for f in $(ROOT_FILES); do $(INSTALL) -m 0644 root/$$f /$$f; done diff-root: for f in $(ROOT_FILES); do diff -u root/$$f /$$f; done USER_FILES = .spamassassin/user_prefs install-user: @mkdir -p ~/.spamassassin/ for f in $(USER_FILES); do $(INSTALL) -m 0644 user/$$f ~/$$f; done diff-user: for f in $(USER_FILES); do diff -u user/$$f ~/$$f; done public-inbox-1.9.0/sa_config/README000066400000000000000000000012721430031475700167320ustar00rootroot00000000000000SpamAssassin configs for public-inbox.org ----------------------------------------- root/ - files for system-wide use (plugins, rule definitions, new rules should have a zero score which should be overridden) user/ - per-user config (keep as much in here as possible) These files go into the users home directory All files in these example directory are CC0: To the extent possible under law, Eric Wong has waived all copyright and related or neighboring rights to these examples. Make targets ------------ install-root - install system-wide files (run as root) install-user - install user-specific files (run as the user public-inbox runs as) public-inbox-1.9.0/sa_config/root/000077500000000000000000000000001430031475700170335ustar00rootroot00000000000000public-inbox-1.9.0/sa_config/root/etc/000077500000000000000000000000001430031475700176065ustar00rootroot00000000000000public-inbox-1.9.0/sa_config/root/etc/spamassassin/000077500000000000000000000000001430031475700223135ustar00rootroot00000000000000public-inbox-1.9.0/sa_config/root/etc/spamassassin/public-inbox.pre000066400000000000000000000006631430031475700254230ustar00rootroot00000000000000# public-inbox.org uses the Debian spamd installation + init and sets # CRON=1 in /etc/default/spamassassin for automatic rule updates # compile rules to C, sa-compile(1) must be run as the appropriate user # (debian-spamd on Debian). sa-compile(1) will also be run by the cronjob loadplugin Mail::SpamAssassin::Plugin::Rule2XSBody # for ok_languages in user_prefs loadplugin Mail::SpamAssassin::Plugin::TextCat allow_user_rules 1 public-inbox-1.9.0/sa_config/user/000077500000000000000000000000001430031475700170265ustar00rootroot00000000000000public-inbox-1.9.0/sa_config/user/.spamassassin/000077500000000000000000000000001430031475700216115ustar00rootroot00000000000000public-inbox-1.9.0/sa_config/user/.spamassassin/user_prefs000066400000000000000000000070601430031475700237140ustar00rootroot00000000000000# raise or lower as needed required_score 3.0 # do not mess with the original message body, only notify in headers clear_report_template clear_unsafe_report_template report_safe 0 # we do not use nor support this on NFS lock_method flock # do not throw off Bayes bayes_ignore_header X-Bogosity bayes_ignore_header X-Spam-Flag bayes_ignore_header X-Spam-Status bayes_ignore_header X-Spam-Report # English-only for all lists on public-inbox.org ok_locales en # we have "loadplugin Mail::SpamAssassin::Plugin::TextCat" in a *.pre file ok_languages en # uncomment the following for importing archives: # dns_available no # skip_rbl_checks 1 # skip_uribl_checks 1 # manual rules (some stolen from debian) # these require "allow_user_rules 1" in the system-wide config rawbody LOCAL_VIEWINBROWSER /View this message in a browser/ describe LOCAL_VIEWINBROWSER "HTML mail not welcome" score LOCAL_VIEWINBROWSER 2.8 body MEETUPSECURELY /meetupsecurely\.com/i describe MEETUPSECURELY "site used by spammers" score MEETUPSECURELY 2.8 body HELLOMYDEAR /hello my dear/i describe HELLOMYDEAR "spam phrase" score HELLOMYDEAR 2.8 body JUSTAMAILBOX /I'm just a mailbox used for sending notifications/ describe JUSTAMAILBOX "autoreply phrase" score JUSTAMAILBOX 5.0 # hello foo header PI_HELLO subject =~ /^hello \w+/i describe PI_HELLO "Hello foo" score PI_HELLO 3 # no delivery header PI_DNOT subject =~ /delivery\s+(?:status\s+)?notification/i describe PI_DNOT "delivery notification" score PI_DNOT 3 # no delivery header PI_PARCEL subject =~ /could not deliver your parcel/ describe PI_PARCEL "delivery notification" score PI_PARCEL 3 # notice to appear header PI_DNOTICE subject =~ /notice to appear/i describe PI_DNOTICE "notice to appear" score PI_DNOTICE 3 full ZIPFILE /\b(?:file)?name\=.*\.zip\b/i describe ZIPFILE zipfile attachment score ZIPFILE 1.5 header PI_NUM_WORD_SUBJ subject =~ /^\d+ [a-z0-9_-]+$/ describe PI_NUM_WORD_SUBJ "number and single word subject" score PI_NUM_WORD_SUBJ 0.5 header PI_NUM_ONLY_SUBJ subject =~ /^\d+$/ describe PI_NUM_ONLY_SUBJ "number only subject" score PI_NUM_ONLY_SUBJ 0.5 header PI_IMPORTANCE_HIGH importance =~ /^high$/ describe PI_IMPORTANCE_HIGH "importance: high header" score PI_IMPORTANCE_HIGH 0.5 meta PI_ZIPFILE_NUM_WORD_SUBJ PI_NUM_WORD_SUBJ && ZIPFILE describe PI_ZIPFILE_NUM_WORD_SUBJ "common spam/virus pattern" score PI_ZIPFILE_NUM_WORD_SUBJ 3 meta PI_ZIPFILE_NUM_ONLY_SUBJ PI_NUM_ONLY_SUBJ && ZIPFILE describe PI_ZIPFILE_NUM_ONLY_SUBJ "common spam/virus pattern" score PI_ZIPFILE_NUM_ONLY_SUBJ 3 full DOCFILE /\b(?:file)?name\=.*\.doc\b/i describe DOCFILE doc attachment score DOCFILE 3 score BAYES_999 3 score BAYES_05 -1.5 score BAYES_00 -3 # trust paid whitelist services? never score RCVD_IN_RP_SAFE 0.0 score RCVD_IN_RP_CERTIFIED 0.0 # this depends on PublicInbox::SaPlugin::ListMirror: # header LIST_MIRROR_RECEIVED eval:check_list_mirror_received() # describe LIST_MIRROR_RECEIVED Received does not match expected # score LIST_MIRROR_RECEIVED 10 # # this depends on PublicInbox::SaPlugin::ListMirror: # header LIST_MIRROR_BCC eval:check_list_mirror_bcc() # describe LIST_MIRROR_BCC Mailing list was Bcc-ed # score LIST_MIRROR_BCC 10 # list_mirror X-Mailing-List git@vger.kernel.org *.kernel.org git@vger.kernel.org # DNSWL scores is a bit too powerful by default (-5 adjustment!) # default score RCVD_IN_DNSWL_LOW 0 -0.7 0 -0.7 # default score RCVD_IN_DNSWL_MED 0 -2.3 0 -2.3 # default score RCVD_IN_DNSWL_HI 0 -5 0 -5 score RCVD_IN_DNSWL_MED 0 -0.1 0 -0.1 score RCVD_IN_DNSWL_HI 0 -0.2 0 -0.2 public-inbox-1.9.0/script/000077500000000000000000000000001430031475700154245ustar00rootroot00000000000000public-inbox-1.9.0/script/lei000077500000000000000000000105161430031475700161260ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use v5.12; use Socket qw(AF_UNIX SOCK_SEQPACKET MSG_EOR pack_sockaddr_un); use PublicInbox::CmdIPC4; my $narg = 5; my $sock; my $recv_cmd = PublicInbox::CmdIPC4->can('recv_cmd4'); my $send_cmd = PublicInbox::CmdIPC4->can('send_cmd4') // do { require PublicInbox::Syscall; $recv_cmd = PublicInbox::Syscall->can('recv_cmd4'); PublicInbox::Syscall->can('send_cmd4'); } // do { my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= ( $ENV{XDG_CACHE_HOME} // ( ($ENV{HOME} // '/nonexistent').'/.cache' ) ).'/public-inbox/inline-c'; if (!-d $inline_dir) { require File::Path; File::Path::make_path($inline_dir); } require PublicInbox::Spawn; # takes ~50ms even if built *sigh* $recv_cmd = PublicInbox::Spawn->can('recv_cmd4'); PublicInbox::Spawn->can('send_cmd4'); } // die 'please install Inline::C or Socket::MsgHdr'; my %pids; my $sigchld = sub { my $flags = scalar(@_) ? POSIX::WNOHANG() : 0; for my $pid (keys %pids) { delete($pids{$pid}) if waitpid($pid, $flags) == $pid; } }; my @parent; my $exec_cmd = sub { my ($fds, $argc, @argv) = @_; my $parent = $$; require POSIX; my @old = (*STDIN{IO}, *STDOUT{IO}, *STDERR{IO}); my @rdr; for my $fd (@$fds) { open(my $newfh, '+<&=', $fd) or die "open +<&=$fd: $!"; push @rdr, shift(@old), $newfh; } my $do_exec = sub { my @non_std; # ex. $op_p from lei_edit_search while (my ($io, $newfh) = splice(@rdr, 0, 2)) { my $old_io = !!$io; open $io, '+<&', $newfh or die "open +<&=: $!"; push @non_std, $io unless $old_io; } if (@non_std) { require Fcntl; fcntl($_, Fcntl::F_SETFD(), 0) for @non_std; } my %env = map { split(/=/, $_, 2) } splice(@argv, $argc); @ENV{keys %env} = values %env; umask 077; exec(@argv); warn "exec: @argv: $!\n"; POSIX::_exit(1); }; $SIG{CHLD} = $sigchld; my $pid = fork // die "fork: $!"; if ($pid == 0) { $do_exec->() if $fds->[1]; # git-credential, pager # parent backgrounds on MUA POSIX::setsid() > 0 or die "setsid: $!"; @parent = ($parent); return; # continue $recv_cmd in background } if ($fds->[1]) { $pids{$pid} = undef; } else { $do_exec->(); # MUA reuses stdout } }; my $runtime_dir = ($ENV{XDG_RUNTIME_DIR} // '') . '/lei'; if ($runtime_dir eq '/lei') { require File::Spec; $runtime_dir = File::Spec->tmpdir."/lei-$<"; } unless (-d $runtime_dir) { require File::Path; File::Path::make_path($runtime_dir, { mode => 0700 }); } my $path = "$runtime_dir/$narg.seq.sock"; my $addr = pack_sockaddr_un($path); socket($sock, AF_UNIX, SOCK_SEQPACKET, 0) or die "socket: $!"; unless (connect($sock, $addr)) { # start the daemon if not started local $ENV{PERL5LIB} = join(':', @INC); open(my $daemon, '-|', $^X, qw[-MPublicInbox::LEI -E PublicInbox::LEI::lazy_start(@ARGV)], $path, $! + 0, $narg) or die "popen: $!"; while (<$daemon>) { warn $_ } # EOF when STDERR is redirected close($daemon) or warn <<""; lei-daemon could not start, exited with \$?=$? # try connecting again anyways, unlink+bind may be racy connect($sock, $addr) or die <<""; connect($path): $! (after attempted daemon start) } # (Socket::MsgHdr|Inline::C), $sock are all available: open my $dh, '<', '.' or die "open(.) $!"; my $buf = join("\0", scalar(@ARGV), @ARGV); while (my ($k, $v) = each %ENV) { $buf .= "\0$k=$v" } $buf .= "\0\0"; $send_cmd->($sock, [0, 1, 2, fileno($dh)], $buf, MSG_EOR) or die "sendmsg: $!"; $SIG{TSTP} = sub { send($sock, 'STOP', MSG_EOR); kill 'STOP', $$ }; $SIG{CONT} = sub { send($sock, 'CONT', MSG_EOR) }; my $x_it_code = 0; while (1) { my (@fds) = $recv_cmd->($sock, my $buf, 4096 * 33); if (scalar(@fds) == 1 && !defined($fds[0])) { next if $!{EINTR}; die "recvmsg: $!"; } last if $buf eq ''; if ($buf =~ /\Aexec (.+)\z/) { $exec_cmd->(\@fds, split(/\0/, $1)); } elsif ($buf eq '-WINCH') { kill($buf, @parent); # for MUA } elsif ($buf eq 'umask') { send($sock, 'u'.pack('V', umask), MSG_EOR) or die "send: $!" } elsif ($buf =~ /\Ax_it ([0-9]+)\z/) { $x_it_code ||= $1 + 0; last; } elsif ($buf =~ /\Achild_error ([0-9]+)\z/) { $x_it_code ||= $1 + 0; } elsif ($buf eq 'wait') { $sigchld->(); } else { $sigchld->(); die $buf; } } $sigchld->(); if (my $sig = ($x_it_code & 127)) { kill $sig, $$; sleep(1) while 1; # no self-pipe/signalfd, here, so we loop } exit($x_it_code >> 8); public-inbox-1.9.0/script/public-inbox-clone000077500000000000000000000040071430031475700210440ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # Wrapper to git clone remote public-inboxes use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $opt = {}; my $help = <{help}) { print $help; exit }; require PublicInbox::Admin; # loads Config PublicInbox::Admin::do_chdir(delete $opt->{C}); PublicInbox::Admin::setup_signals(); $SIG{PIPE} = 'IGNORE'; my ($url, $dst, $extra) = @ARGV; die $help if !defined($url) || defined($extra); defined($dst) or ($dst) = ($url =~ m!/([^/]+)/?\z!); index($dst, "\n") >= 0 and die "`\\n' not allowed in `$dst'"; # n.b. this is still a truckload of code... require URI; require PublicInbox::LEI; require PublicInbox::LeiExternal; require PublicInbox::LeiMirror; require PublicInbox::LeiCurl; require PublicInbox::Lock; $url = PublicInbox::LeiExternal::ext_canonicalize($url); my $lei = bless { env => \%ENV, opt => $opt, cmd => 'public-inbox-clone', 0 => *STDIN{GLOB}, 2 => *STDERR{GLOB}, }, 'PublicInbox::LEI'; open $lei->{1}, '+<&=', 1 or die "dup: $!"; open $lei->{3}, '.' or die "open . $!"; my $mrr = bless { lei => $lei, src => $url, dst => $dst, }, 'PublicInbox::LeiMirror'; $mrr->do_mirror; $mrr->can('_wq_done_wait')->([$mrr, $lei], $$); exit(($lei->{child_error} // 0) >> 8); public-inbox-1.9.0/script/public-inbox-compact000077500000000000000000000025661430031475700214020ustar00rootroot00000000000000#!perl -w # Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $opt = { compact => 1, -coarse_lock => 1, -eidx_ok => 1 }; my $help = < Compact Xapian DBs in an inbox options: --all index all configured inboxes --jobs=NUM control parallelization See public-inbox-compact(1) man page for full documentation. EOF GetOptions($opt, qw(all C=s@ help|h), # compact options: qw(jobs|j=i quiet|q blocksize|b=s no-full|n fuller|F), ) or die $help; if ($opt->{help}) { print $help; exit 0 }; require PublicInbox::Admin; PublicInbox::Admin::require_or_die('-index'); PublicInbox::Admin::do_chdir(delete $opt->{C}); PublicInbox::Admin::progress_prepare($opt); require PublicInbox::InboxWritable; require PublicInbox::Xapcmd; my $cfg = PublicInbox::Config->new; my ($ibxs, $eidxs) = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); unless ($ibxs) { print STDERR $help; exit 1 } for my $ibx (@$ibxs) { $ibx = PublicInbox::InboxWritable->new($ibx); PublicInbox::Xapcmd::run($ibx, 'compact', $opt); } for my $eidx (@$eidxs) { PublicInbox::Xapcmd::run($eidx, 'compact', $opt); } public-inbox-1.9.0/script/public-inbox-convert000077500000000000000000000136431430031475700214320ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = < 1, # index defaults: quiet => -1, compact => 0, maxsize => undef, fsync => 1, reindex => 1, # we always reindex }; GetOptions($opt, qw(jobs|j=i index! help|h C=s@), # index options qw(verbose|v+ rethread compact|c+ fsync|sync! indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s sequential-shard|seq-shard )) or die $help; if ($opt->{help}) { print $help; exit 0 }; require PublicInbox::Admin; PublicInbox::Admin::do_chdir(delete $opt->{C}); my $old_dir = shift(@ARGV) // ''; my $new_dir = shift(@ARGV) // ''; die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq ''); die "$new_dir exists\n" if -d $new_dir; die "$old_dir not a directory\n" unless -d $old_dir; require PublicInbox::Config; require PublicInbox::InboxWritable; my $cfg = PublicInbox::Config->new; my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg); @old > 1 and die "BUG: resolved several inboxes from $old_dir:\n", map { "\t$_->{inboxdir}\n" } @old; my $old = PublicInbox::InboxWritable->new($old[0]); if (delete $old->{-unconfigured}) { warn "W: $old_dir not configured in " . PublicInbox::Config::default_file() . "\n"; } die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2; my $detected = PublicInbox::Admin::detect_indexlevel($old); $old->{indexlevel} //= $detected; my $env; if ($opt->{'index'}) { my $mods = {}; PublicInbox::Admin::scan_ibx_modules($mods, $old); PublicInbox::Admin::require_or_die(keys %$mods); PublicInbox::Admin::progress_prepare($opt); $env = PublicInbox::Admin::index_prepare($opt, $cfg); } local %ENV = (%$env, %ENV) if $env; my $new = { %$old }; $new->{inboxdir} = $cfg->rel2abs_collapsed($new_dir); $new->{version} = 2; $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); $new->{-no_fsync} = 1 if !$opt->{fsync}; my $v2w; sub link_or_copy ($$) { my ($src, $dst) = @_; link($src, $dst) and return; $!{EXDEV} or warn "link $src, $dst failed: $!, trying cp\n"; require File::Copy; # preserves permissions: File::Copy::cp($src, $dst) or die "cp $src, $dst failed: $!\n"; } $old->with_umask(sub { my $old_cfg = "$old->{inboxdir}/config"; local $ENV{GIT_CONFIG} = $old_cfg; my $new_cfg = "$new->{inboxdir}/all.git/config"; $v2w = $new->importer(1); $v2w->init_inbox(delete $opt->{jobs}); unlink $new_cfg; link_or_copy($old_cfg, $new_cfg); if (my $alt = $new->{altid}) { require PublicInbox::AltId; foreach my $i (0..$#$alt) { my $src = PublicInbox::AltId->new($old, $alt->[$i], 0); $src = $src->mm_alt or next; $src = $src->{dbh}->sqlite_db_filename; my $dst = PublicInbox::AltId->new($new, $alt->[$i], 1); $dst->mm_alt->{dbh}->sqlite_backup_from_file($src); } } my $desc = "$old->{inboxdir}/description"; link_or_copy($desc, "$new->{inboxdir}/description") if -e $desc; my $clone = "$old->{inboxdir}/cloneurl"; if (-e $clone) { warn <<""; $clone may not be valid after migrating to v2, not copying } }); my $state = ''; my $head = $old->{ref_head} || 'HEAD'; my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head); $v2w->idx_init($opt); my $im = $v2w->importer; my ($r, $w) = $im->gfi_start; my $h = '[0-9a-f]'; my %D; my $last; while (<$rd>) { if ($_ eq "blob\n") { $state = 'blob'; } elsif (/^commit /) { $state = 'commit'; } elsif (/^data ([0-9]+)/) { my $len = $1; print $w $_ or $im->wfail; while ($len) { my $n = read($rd, my $tmp, $len) or die "read: $!"; warn "$n != $len\n" if $n != $len; $len -= $n; print $w $tmp or $im->wfail; } next; } elsif ($state eq 'commit') { if (m{^M 100644 :([0-9]+) (${h}{2}/${h}{38})}o) { my ($mark, $path) = ($1, $2); $D{$path} = $mark; if ($last && $last ne 'm') { print $w "D $last\n" or $im->wfail; } print $w "M 100644 :$mark m\n" or $im->wfail; $last = 'm'; next; } if (m{^D (${h}{2}/${h}{38})}o) { my $mark = delete $D{$1}; defined $mark or die "undeleted path: $1\n"; if ($last && $last ne 'd') { print $w "D $last\n" or $im->wfail; } print $w "M 100644 :$mark d\n" or $im->wfail; $last = 'd'; next; } } last if $_ eq "done\n"; print $w $_ or $im->wfail; } close $rd or die "close fast-export: $!\n"; waitpid($pid, 0) or die "waitpid failed: $!\n"; $? == 0 or die "fast-export failed: $?\n"; $r = $w = undef; # v2w->done does the actual close and error checking $v2w->done; if (my $old_mm = $old->mm) { $old->cleanup; $old_mm = $old_mm->{dbh}->sqlite_db_filename; # we want to trigger a reindex, not a from scratch index if # we're reusing the msgmap from an existing v1 installation. $v2w->idx_init($opt); $v2w->{mm}->{dbh}->sqlite_backup_from_file($old_mm); my $epoch0 = PublicInbox::Git->new($v2w->{mg}->add_epoch(0)); chop(my $cmt = $epoch0->qx(qw(rev-parse --verify), $head)); $v2w->last_epoch_commit(0, $cmt); } $v2w->index_sync($opt) if delete $opt->{'index'}; $v2w->done; public-inbox-1.9.0/script/public-inbox-edit000077500000000000000000000166561430031475700207060ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # # Used for editing messages in a public-inbox. # Supports v2 inboxes only, for now. use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use PublicInbox::AdminEdit; use File::Temp 0.19 (); # 0.19 for TMPDIR use PublicInbox::ContentHash qw(content_hash); use PublicInbox::MID qw(mid_clean mids); PublicInbox::Admin::check_require('-index'); use PublicInbox::Eml; use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::Import; my $help = <<'EOF'; usage: public-inbox-edit -m MESSAGE-ID [--all] [INBOX_DIRS] destructively edit messages in a public inbox options: --all edit all configured inboxes -m MESSAGE-ID edit the message with a given Message-ID -F FILE edit the message matching the contents of FILE --force forcibly edit even if Message-ID is ambiguous --raw do not perform "From " line escaping See public-inbox-edit(1) man page for full documentation. EOF my $opt = { verbose => 1, all => 0, -min_inbox_version => 2, raw => 0 }; my @opt = qw(mid|m=s file|F=s raw C=s@); GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or die $help; if ($opt->{help}) { print $help; exit 0 }; PublicInbox::Admin::do_chdir(delete $opt->{C}); my $cfg = PublicInbox::Config->new; my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f" unless (defined $editor) { my $k = 'publicinbox.mailEditor'; $editor = $cfg->{lc($k)} if $cfg; unless (defined $editor) { warn "\`$k' not configured, trying \`git var GIT_EDITOR'\n"; chomp($editor = `git var GIT_EDITOR`); warn "Will use $editor to edit mail\n"; } } my $mid = $opt->{mid}; my $file = $opt->{file}; if (defined $mid && defined $file) { die "the --mid and --file options are mutually exclusive\n"; } my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::AdminEdit::check_editable(\@ibxs); my $found = {}; # chash => [ [ibx, smsg] [, [ibx, smsg] ] ] sub find_mid ($$$) { my ($found, $mid, $ibxs) = @_; foreach my $ibx (@$ibxs) { my $over = $ibx->over; my ($id, $prev); while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) { my $ref = $ibx->msg_by_smsg($smsg); my $mime = PublicInbox::Eml->new($ref); my $chash = content_hash($mime); my $tuple = [ $ibx, $smsg ]; push @{$found->{$chash} ||= []}, $tuple } PublicInbox::InboxWritable::cleanup($ibx); } $found; } sub show_cmd ($$) { my ($ibx, $smsg) = @_; " GIT_DIR=$ibx->{inboxdir}/all.git \\\n git show $smsg->{blob}\n"; } sub show_found ($) { my ($found) = @_; foreach my $to_edit (values %$found) { foreach my $tuple (@$to_edit) { my ($ibx, $smsg) = @$tuple; warn show_cmd($ibx, $smsg); } } } if (defined($mid)) { $mid = mid_clean($mid); find_mid($found, $mid, \@ibxs); my $nr = scalar(keys %$found); die "No message found for <$mid>\n" unless $nr; if ($nr > 1) { warn <<""; Multiple messages with different content found matching <$mid>: show_found($found); die "Use --force to edit all of them\n" if !$opt->{force}; warn "Will edit all of them\n"; } } else { my $eml = eml_from_path($file) or die "open($file) failed: $!"; my $mids = mids($eml); find_mid($found, $_, \@ibxs) for (@$mids); # populates $found my $chash = content_hash($eml); my $to_edit = $found->{$chash}; unless ($to_edit) { my $nr = scalar(keys %$found); if ($nr > 0) { warn <<""; $nr matches to Message-ID(s) in $file, but none matched content Partial matches below: show_found($found); } elsif ($nr == 0) { $mids = join('', map { " <$_>\n" } @$mids); warn <<""; No matching messages found matching Message-ID(s) in $file $mids } exit 1; } $found = { $chash => $to_edit }; } my %tmpopt = ( TEMPLATE => 'public-inbox-edit-XXXX', TMPDIR => 1, SUFFIX => $opt->{raw} ? '.eml' : '.mbox', ); foreach my $to_edit (values %$found) { my $edit_fh = File::Temp->new(%tmpopt); $edit_fh->autoflush(1); my $edit_fn = $edit_fh->filename; my ($ibx, $smsg) = @{$to_edit->[0]}; my $old_raw = $ibx->msg_by_smsg($smsg); PublicInbox::InboxWritable::cleanup($ibx); my $tmp = $$old_raw; if (!$opt->{raw}) { my $oid = $smsg->{blob}; print $edit_fh "From mboxrd\@$oid Thu Jan 1 00:00:00 1970\n" or die "failed to write From_ line: $!"; $tmp =~ s/^(>*From )/>$1/gm; } print $edit_fh $tmp or die "failed to write tempfile for editing: $!"; # run the editor, respecting spaces/quote retry_edit: if (system(qw(sh -c), $editor.' "$@"', $editor, $edit_fn)) { if (!(-t STDIN) && !$opt->{force}) { die "E: $editor failed: $?\n"; } print STDERR "$editor failed, "; print STDERR "continuing as forced\n" if $opt->{force}; while (!$opt->{force}) { print STDERR "(r)etry, (c)ontinue, (q)uit?\n"; chomp(my $op = || ''); $op = lc($op); goto retry_edit if $op eq 'r'; if ($op eq 'q') { # n.b. we'll lose the exit signal, here, # oh well; "q" is user-specified anyways. exit($? >> 8); } last if $op eq 'c'; # continuing print STDERR "\`$op' not recognized\n"; } } # reread the edited file, not using $edit_fh since $EDITOR may # rename/relink $edit_fn open my $new_fh, '<', $edit_fn or die "can't read edited file ($edit_fn): $!\n"; defined(my $new_raw = do { local $/; <$new_fh> }) or die "read $edit_fn: $!\n"; if (!$opt->{raw}) { # get rid of the From we added $new_raw =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; # check if user forgot to purge (in mutt) after editing if ($new_raw =~ /^From /sm) { if (-t STDIN) { print STDERR <<''; Extra "From " lines detected in new mbox. Did you forget to purge the original message from the mbox after editing? while (1) { print STDERR <<""; (y)es to re-edit, (n)o to continue chomp(my $op = || ''); $op = lc($op); goto retry_edit if $op eq 'y'; last if $op eq 'n'; # continuing print STDERR "\`$op' not recognized\n"; } } else { # non-interactive path # unlikely to happen, as extra From lines are # only a common mistake (for me) with # interactive use warn <<""; W: possible message boundary splitting error } } # unescape what we escaped: $new_raw =~ s/^>(>*From )/$1/gm; } my $new_mime = PublicInbox::Eml->new(\$new_raw); my $old_mime = PublicInbox::Eml->new($old_raw); # make sure we don't compare unwanted headers, since mutt adds # Content-Length, Status, and Lines headers: PublicInbox::Import::drop_unwanted_headers($new_mime); PublicInbox::Import::drop_unwanted_headers($old_mime); # allow changing Received: and maybe other headers which can # contain sensitive info. my $nhdr = $new_mime->header_obj->as_string; my $ohdr = $old_mime->header_obj->as_string; if (($nhdr eq $ohdr) && (content_hash($new_mime) eq content_hash($old_mime))) { warn "No change detected to:\n", show_cmd($ibx, $smsg); next unless $opt->{verbose}; # should we consider this machine-parseable? PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, []); next; } foreach my $tuple (@$to_edit) { $ibx = PublicInbox::InboxWritable->new($tuple->[0]); $smsg = $tuple->[1]; my $im = $ibx->importer(0); my $commits = $im->replace($old_mime, $new_mime); $im->done; unless ($commits) { warn "Failed to replace:\n", show_cmd($ibx, $smsg); next; } next unless $opt->{verbose}; # should we consider this machine-parseable? PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits); } } public-inbox-1.9.0/script/public-inbox-extindex000077500000000000000000000064471430031475700216060ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = < -1, compact => 0, fsync => 1, scan => 1 }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i fsync|sync! fast dangerous indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s dedupe:s@ gc commit-interval=i watch scan! dry-run|n all C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; require IO::Handle; STDOUT->autoflush(1); STDERR->autoflush(1); local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync # require lazily to speed up --help require PublicInbox::Admin; PublicInbox::Admin::do_chdir(delete $opt->{C}); my $cfg = PublicInbox::Config->new; my $eidx_dir = shift(@ARGV); unless (defined $eidx_dir) { if ($opt->{all} && $cfg->ALL) { $eidx_dir = $cfg->ALL->{topdir}; } else { die "E: $help"; } } my @ibxs; if ($opt->{gc}) { die "E: inbox paths must not be specified with --gc\n" if @ARGV; for my $sw (qw(all watch dry-run dedupe)) { die "E: --$sw is not compatible with --gc\n" if $opt->{$sw}; } } else { @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); } $opt->{'dry-run'} && !$opt->{dedupe} and die "E: --dry-run only affects --dedupe\n"; $opt->{fast} && !$opt->{reindex} and die "E: --fast only affects --reindex\n"; PublicInbox::Admin::require_or_die(qw(-search)); PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n"; PublicInbox::Admin::progress_prepare($opt); my $env = PublicInbox::Admin::index_prepare($opt, $cfg); local %ENV = (%ENV, %$env) if $env; require PublicInbox::ExtSearchIdx; my $eidx = PublicInbox::ExtSearchIdx->new($eidx_dir, $opt); if ($opt->{gc}) { $eidx->attach_config($cfg); $eidx->eidx_gc($opt); } else { if ($opt->{all}) { $eidx->attach_config($cfg); } else { $eidx->attach_config($cfg, \@ibxs); } if ($opt->{watch}) { $cfg = undef; # save memory only after SIGHUP $eidx->eidx_watch($opt); } else { $eidx->eidx_sync($opt); } } public-inbox-1.9.0/script/public-inbox-fetch000077500000000000000000000027601430031475700210410ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # Wrapper to git fetch remote public-inboxes use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $opt = {}; my $help = <{help}) { print $help; exit }; require PublicInbox::Fetch; # loads Admin PublicInbox::Admin::do_chdir(delete $opt->{C}); PublicInbox::Admin::setup_signals(); $SIG{PIPE} = 'IGNORE'; my $lei = bless { env => \%ENV, opt => $opt, cmd => 'public-inbox-fetch', 0 => *STDIN{GLOB}, 1 => *STDOUT{GLOB}, 2 => *STDERR{GLOB}, }, 'PublicInbox::LEI'; PublicInbox::Fetch->do_fetch($lei, '.'); exit(($lei->{child_error} // 0) >> 8); public-inbox-1.9.0/script/public-inbox-httpd000077500000000000000000000004061430031475700210660ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # # Standalone HTTP server for public-inbox. use v5.12; use PublicInbox::Daemon; PublicInbox::Daemon::run('http://0.0.0.0:8080'); public-inbox-1.9.0/script/public-inbox-imapd000077500000000000000000000004171430031475700210370ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # # Standalone read-only IMAP server for public-inbox. use v5.12; use PublicInbox::Daemon; PublicInbox::Daemon::run('imap://0.0.0.0:143'); public-inbox-1.9.0/script/public-inbox-index000077500000000000000000000134431430031475700210570ustar00rootroot00000000000000#!perl -w # Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ # Basic tool to create a Xapian search index for a public-inbox. # Usage with libeatmydata # highly recommended: eatmydata public-inbox-index INBOX_DIR use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = < -1, compact => 0, max_size => undef, fsync => 1, 'update-extindex' => [], # ":s@" optional arg sets '' if no arg given }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune fsync|sync! xapian_only|xapian-only dangerous indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s since|after=s until|before=s sequential-shard|seq-shard no-update-extindex update-extindex|E=s@ fast-noop|F skip-docdata all C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; if ($opt->{xapian_only} && !$opt->{reindex}) { die "--xapian-only requires --reindex\n"; } if ($opt->{reindex} && delete($opt->{'fast-noop'})) { warn "--fast-noop ignored with --reindex\n"; } # require lazily to speed up --help require PublicInbox::Admin; PublicInbox::Admin::require_or_die('-index'); PublicInbox::Admin::do_chdir(delete $opt->{C}); my $cfg = PublicInbox::Config->new; # Config is loaded by Admin $opt->{-use_cwd} = 1; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die('-index'); unless (@ibxs) { print STDERR $help; exit 1 } my (@eidx, %eidx_seen); my $update_extindex = $opt->{'update-extindex'}; if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) { # extindex and normal inboxes may have different owners push(@$update_extindex, 'all') if -w $ALL->{topdir}; } @$update_extindex = () if $opt->{'no-update-extindex'}; if (scalar @$update_extindex) { PublicInbox::Admin::require_or_die('-search'); require PublicInbox::ExtSearchIdx; } for my $ei_name (@$update_extindex) { my $es = $cfg->lookup_ei($ei_name); my $topdir; if (!$es && -d $ei_name) { # allow dirname or config section name $topdir = $ei_name; } elsif ($es) { $topdir = $es->{topdir}; } else { die "extindex `$ei_name' not configured or found\n"; } my $o = { %$opt }; delete $o->{indexlevel} if ($o->{indexlevel}//'') eq 'basic'; $eidx_seen{$topdir} //= push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $o)); } my $mods = {}; my @eidx_unconfigured; foreach my $ibx (@ibxs) { # detect_indexlevel may also set $ibx->{-skip_docdata} my $detected = PublicInbox::Admin::detect_indexlevel($ibx); # XXX: users can shoot themselves in the foot, with opt->{indexlevel} $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ? 'full' : $detected); PublicInbox::Admin::scan_ibx_modules($mods, $ibx); if (@eidx && $ibx->{-unconfigured}) { push @eidx_unconfigured, " $ibx->{inboxdir}\n"; } } warn <{compact} = 0 if !$mods->{'Search::Xapian'}; PublicInbox::Admin::require_or_die(keys %$mods); my $env = PublicInbox::Admin::index_prepare($opt, $cfg); local %ENV = (%ENV, %$env) if $env; require PublicInbox::InboxWritable; PublicInbox::Xapcmd::check_compact() if $opt->{compact}; PublicInbox::Admin::progress_prepare($opt); for my $ibx (@ibxs) { $ibx = PublicInbox::InboxWritable->new($ibx); if ($opt->{compact} >= 2) { PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt}); } $ibx->{-no_fsync} = 1 if !$opt->{fsync}; $ibx->{-dangerous} = 1 if $opt->{dangerous}; $ibx->{-skip_docdata} //= $opt->{'skip-docdata'}; my $ibx_opt = $opt; if (defined(my $s = $ibx->{lc('indexSequentialShard')})) { defined(my $v = $cfg->git_bool($s)) or die <{name}.indexSequentialShard not boolean EOL $ibx_opt = { %$opt, 'sequential-shard' => $v }; } my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); last if $ibx_opt->{quit}; if (my $copt = $opt->{compact_opt}) { local $copt->{jobs} = 0 if $ibx_opt->{'sequential-shard'}; PublicInbox::Xapcmd::run($ibx, 'compact', $copt); } last if $ibx_opt->{quit}; next if $ibx->{-unconfigured} || !$nidx; for my $eidx (@eidx) { $eidx->attach_inbox($ibx); } } my $pr = $opt->{-progress}; for my $eidx (@eidx) { $pr->("indexing $eidx->{topdir} ...\n") if $pr; $eidx->eidx_sync($opt); last if $opt->{quit}; } public-inbox-1.9.0/script/public-inbox-init000077500000000000000000000172551430031475700207200ustar00rootroot00000000000000#!perl -w # Copyright (C) 2014-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; use Fcntl qw(:DEFAULT); my $help = < \$version, 'L|index-level|indexlevel=s' => \$indexlevel, 'S|skip|skip-epoch=i' => \$skip_epoch, 'skip-artnum=i' => \$skip_artnum, 'j|jobs=i' => \$jobs, 'ng|newsgroup=s' => \$ng, 'skip-docdata' => \$skip_docdata, 'help|h' => \$show_help, 'c=s@' => \@c_extra, 'C=s@' => \@chdir, ); my $usage_cb = sub { print STDERR $help; exit 1; }; GetOptions(%opts) or $usage_cb->(); if ($show_help) { print $help; exit 0 }; my $name = shift @ARGV or $usage_cb->(); my $inboxdir = shift @ARGV or $usage_cb->(); my $http_url = shift @ARGV or $usage_cb->(); my (@address) = @ARGV; @address or $usage_cb->(); +PublicInbox::Admin::do_chdir(\@chdir); @c_extra = map { my ($k, $v) = split(/=/, $_, 2); defined($v) or die "Usage: -c KEY=VALUE\n"; $k =~ /\A[a-z]+\z/i or die "$k contains invalid characters\n"; $k = lc($k); if ($k eq 'newsgroup') { die "newsgroup already set ($ng)\n" if $ng ne ''; $ng = $v; (); } elsif ($k eq 'address') { push @address, $v; # for conflict checking (); } elsif ($k =~ /\A(?:inboxdir|mainrepo)\z/) { die "$k not allowed via -c $_\n" } elsif ($k eq 'indexlevel') { defined($indexlevel) and die "indexlevel already set ($indexlevel)\n"; $indexlevel = $v; (); } else { $_ } } @c_extra; PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel; $ng =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! and die "--newsgroup `$ng' is not valid\n"; ($ng =~ m!\A\.! || $ng =~ m!\.\z!) and die "--newsgroup `$ng' must not start or end with `.'\n"; require PublicInbox::Config; my $pi_config = PublicInbox::Config->default_file; my ($dir) = ($pi_config =~ m!(.*?/)[^/]+\z!); require File::Path; File::Path::mkpath($dir); # will croak on fatal errors # first, we grab a flock to prevent simultaneous public-inbox-init # processes from trampling over each other, or exiting with 255 on # O_EXCL failure below. This gets unlocked automatically on exit: require PublicInbox::Lock; my $lock_obj = { lock_path => "$pi_config.flock" }; PublicInbox::Lock::lock_acquire($lock_obj); # git-config will operate on this (and rename on success): require File::Temp; my $fh = File::Temp->new(TEMPLATE => 'pi-init-XXXX', DIR => $dir); # Now, we grab another lock to use git-config(1) locking, so it won't # wait on the lock, unlike some of our internal flock()-based locks. # This is to prevent direct git-config(1) usage from clobbering our # changes. my $lockfile = "$pi_config.lock"; my $lockfh; sysopen($lockfh, $lockfile, O_RDWR|O_CREAT|O_EXCL) or do { warn "could not open config file: $lockfile: $!\n"; exit(255); }; require PublicInbox::OnDestroy; my $auto_unlink = PublicInbox::OnDestroy->new($$, sub { unlink $lockfile }); my ($perm, %seen); if (-e $pi_config) { open(my $oh, '<', $pi_config) or die "unable to read $pi_config: $!\n"; my @st = stat($oh); $perm = $st[2]; defined $perm or die "(f)stat failed on $pi_config: $!\n"; chmod($perm & 07777, $fh) or die "(f)chmod failed on future $pi_config: $!\n"; defined(my $old = do { local $/; <$oh> }) or die "read $pi_config: $!\n"; print $fh $old or die "failed to write: $!\n"; close $oh or die "failed to close $pi_config: $!\n"; # yes, this conflict checking is racy if multiple instances of this # script are run by the same $PI_DIR my $cfg = PublicInbox::Config->new; my $conflict; foreach my $addr (@address) { my $found = $cfg->lookup($addr); if ($found) { if ($found->{name} ne $name) { print STDERR "`$addr' already defined for ", "`$found->{name}',\n", "does not match intend `$name'\n"; $conflict = 1; } else { $seen{lc($addr)} = 1; } } } exit(1) if $conflict; my $ibx = $cfg->lookup_name($name); $indexlevel //= $ibx->{indexlevel} if $ibx; } my $pi_config_tmp = $fh->filename; close($fh) or die "failed to close $pi_config_tmp: $!\n"; my $pfx = "publicinbox.$name"; my @x = (qw/git config/, "--file=$pi_config_tmp"); $inboxdir = PublicInbox::Config::rel2abs_collapsed($inboxdir); die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0; if (-f "$inboxdir/inbox.lock") { if (!defined $version) { $version = 2; } elsif ($version != 2) { die "$inboxdir is a -V2 inbox, -V$version specified\n" } } elsif (-d "$inboxdir/objects") { if (!defined $version) { $version = 1; } elsif ($version != 1) { die "$inboxdir is a -V1 inbox, -V$version specified\n" } } $version = 1 unless defined $version; if ($version == 1 && defined $skip_epoch) { die "--skip-epoch is only supported for -V2 inboxes\n"; } my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => $name, version => $version, -primary_address => $address[0], indexlevel => $indexlevel, }); my $creat_opt = {}; if (defined $jobs) { die "--jobs is only supported for -V2 inboxes\n" if $version == 1; die "--jobs=$jobs must be >= 1\n" if $jobs <= 0; $creat_opt->{nproc} = $jobs; } require PublicInbox::InboxWritable; $ibx = PublicInbox::InboxWritable->new($ibx, $creat_opt); if ($skip_docdata) { $ibx->{indexlevel} //= 'full'; # ensure init_inbox writes xdb $ibx->{indexlevel} eq 'basic' and die "--skip-docdata ignored with --indexlevel=basic\n"; $ibx->{-skip_docdata} = $skip_docdata; } $ibx->init_inbox(0, $skip_epoch, $skip_artnum); my $f = "$inboxdir/description"; if (sysopen $fh, $f, O_CREAT|O_EXCL|O_WRONLY) { print $fh "public inbox for $address[0]\n" or die "print($f): $!"; close $fh or die "close($f): $!"; } # needed for git prior to v2.1.0 umask(0077) if defined $perm; require PublicInbox::Spawn; PublicInbox::Spawn->import(qw(run_die)); foreach my $addr (@address) { next if $seen{lc($addr)}; run_die([@x, "--add", "$pfx.address", $addr]); } run_die([@x, "$pfx.url", $http_url]); run_die([@x, "$pfx.inboxdir", $inboxdir]); if (defined($indexlevel)) { run_die([@x, "$pfx.indexlevel", $indexlevel]); } run_die([@x, "$pfx.newsgroup", $ng]) if $ng ne ''; for my $kv (@c_extra) { my ($k, $v) = split(/=/, $kv, 2); # git 2.30+ has --fixed-value for idempotent invocations, # but that's too new to depend on in 2021. Perl quotemeta # seems compatible enough for POSIX ERE which git uses my $re = '^'.quotemeta($v).'$'; run_die([@x, qw(--replace-all), "$pfx.$k", $v, $re]); } # needed for git prior to v2.1.0 if (defined $perm) { chmod($perm & 07777, $pi_config_tmp) or die "(f)chmod failed on future $pi_config: $!\n"; } rename $pi_config_tmp, $pi_config or die "failed to rename `$pi_config_tmp' to `$pi_config': $!\n"; undef $auto_unlink; # trigger ->DESTROY public-inbox-1.9.0/script/public-inbox-learn000077500000000000000000000067351430031475700210570ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2014-2021 all contributors # License: AGPL-3.0+ # # Used for training spam (via SpamAssassin) and removing messages from a # public-inbox my $help = < 0); GetOptions(\%opt, qw(all help|h)) or die $help; my $train = shift or die $help; if ($train !~ /\A(?:ham|spam|rm)\z/) { die "`$train' not recognized.\n$help"; } die "--all only works with `rm'\n" if $opt{all} && $train ne 'rm'; my $spamc = PublicInbox::Spamcheck::Spamc->new; my $pi_cfg = PublicInbox::Config->new; my $err; my $mime = PublicInbox::Eml->new(do{ defined(my $data = do { local $/; }) or die "read STDIN: $!\n"; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; if ($train ne 'rm') { eval { if ($train eq 'ham') { $spamc->hamlearn(\$data); } elsif ($train eq 'spam') { $spamc->spamlearn(\$data); } die "spamc failed with: $?\n" if $?; }; $err = $@; } \$data }); sub remove_or_add ($$$$) { my ($ibx, $train, $mime, $addr) = @_; # We do not touch GIT_COMMITTER_* env here so we can track # who trained the message. $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name}; $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr; $ibx = PublicInbox::InboxWritable->new($ibx); my $im = $ibx->importer(0); if ($train eq "rm") { # This needs to be idempotent, as my inotify trainer # may train for each cross-posted message, and this # script already learns for every list in # ~/.public-inbox/config $im->remove($mime, $train); } elsif ($train eq "ham") { # no checking for spam here, we assume the message has # been reviewed by a human at this point: PublicInbox::MDA->set_list_headers($mime, $ibx); # Ham messages are trained when they're marked into # a SEEN state, so this is idempotent: $im->add($mime); } $im->done; } # spam is removed from all known inboxes since it is often Bcc:-ed if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) { $pi_cfg->each_inbox(sub { my ($ibx) = @_; $ibx = PublicInbox::InboxWritable->new($ibx); my $im = $ibx->importer(0); $im->remove($mime, $train); $im->done; }); } else { require PublicInbox::MDA; # get all recipients my %dests; # address => for ($mime->header('Cc'), $mime->header('To')) { foreach my $addr (PublicInbox::Address::emails($_)) { $addr = lc($addr); $dests{$addr} //= $pi_cfg->lookup($addr) // 0; } } # n.b. message may be cross-posted to multiple public-inboxes my %seen; while (my ($addr, $ibx) = each %dests) { next unless ref($ibx); # $ibx may be 0 next if $seen{"$ibx"}++; remove_or_add($ibx, $train, $mime, $addr); } my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_cfg, $mime); for my $ibx (@$dests) { next if $seen{"$ibx"}++; remove_or_add($ibx, $train, $mime, $ibx->{-primary_address}); } } if ($err) { warn $err; exit 1; } public-inbox-1.9.0/script/public-inbox-mda000077500000000000000000000100711430031475700205030ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2013-2021 all contributors # License: AGPL-3.0+ # # Mail delivery agent for public-inbox, run from your MTA upon mail delivery my $help = < \$precheck, 'help|h' => \$show_help) or do { print STDERR $help; exit 1 }; my $do_exit = sub { my ($code) = shift; $emm = $ems = undef; # trigger DESTROY exit $code; }; use PublicInbox::Eml; use PublicInbox::MDA; use PublicInbox::Config; use PublicInbox::Emergency; use PublicInbox::Filter::Base; use PublicInbox::InboxWritable; use PublicInbox::Spamcheck; # n.b: hopefully we can setup the emergency path without bailing due to # user error, we really want to setup the emergency destination ASAP # in case there's bugs in our code or user error. my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/"; $ems = PublicInbox::Emergency->new($emergency); my $str = do { local $/; }; $str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; $ems->prepare(\$str); my $eml = PublicInbox::Eml->new(\$str); my $cfg = PublicInbox::Config->new; my $key = 'publicinboxmda.spamcheck'; my $default = 'PublicInbox::Spamcheck::Spamc'; my $spamc = PublicInbox::Spamcheck::get($cfg, $key, $default); my $dests = []; my $recipient = $ENV{ORIGINAL_RECIPIENT}; if (defined $recipient) { my $ibx = $cfg->lookup($recipient); # first check push @$dests, $ibx if $ibx; } if (!scalar(@$dests)) { $dests = PublicInbox::MDA->inboxes_for_list_id($cfg, $eml); if (!scalar(@$dests) && !defined($recipient)) { die "ORIGINAL_RECIPIENT not defined in ENV\n"; } scalar(@$dests) or $do_exit->(67); # EX_NOUSER 5.1.1 user unknown } my $err; @$dests = grep { my $ibx = PublicInbox::InboxWritable->new($_); eval { $ibx->assert_usable_dir }; if ($@) { warn $@; $err = 1; 0; # pre-check, MDA has stricter rules than an importer might; } elsif ($precheck) { !!PublicInbox::MDA->precheck($eml, $ibx->{address}); } else { 1; } } @$dests; $do_exit->(67) if $err && scalar(@$dests) == 0; $eml = undef; my $spam_ok; if ($spamc) { $str = ''; $spam_ok = $spamc->spamcheck($ems->fh, \$str); # update the emergency dump with the new message: $emm = PublicInbox::Emergency->new($emergency); $emm->prepare(\$str); $ems = $ems->abort; } else { # no spam checking configured: $spam_ok = 1; $emm = $ems; my $fh = $emm->fh; read($fh, $str, -s $fh); } $do_exit->(0) unless $spam_ok; # -mda defaults to the strict base filter which we won't use anywhere else sub mda_filter_adjust ($) { my ($ibx) = @_; my $fcfg = $ibx->{filter} || ''; if ($fcfg eq '') { $ibx->{filter} = 'PublicInbox::Filter::Base'; } elsif ($fcfg eq 'scrub') { # legacy alias, undocumented, remove? $ibx->{filter} = 'PublicInbox::Filter::Mirror'; } } my @rejects; for my $ibx (@$dests) { mda_filter_adjust($ibx); my $filter = $ibx->filter; my $mime = PublicInbox::Eml->new($str); my $ret = $filter->delivery($mime); if (ref($ret) && ($ret->isa('PublicInbox::Eml') || $ret->isa('Email::MIME'))) { # filter altered message $mime = $ret; } elsif ($ret == PublicInbox::Filter::Base::IGNORE) { next; # nothing, keep looping } elsif ($ret == PublicInbox::Filter::Base::REJECT) { push @rejects, $filter->err; next; } PublicInbox::MDA->set_list_headers($mime, $ibx); my $im = $ibx->importer(0); if (defined $im->add($mime)) { # ->abort is idempotent, no emergency if a single # destination succeeds $emm->abort; } else { # v1-only my $mid = $mime->header_raw('Message-ID'); # this message is similar to what ssoma-mda shows: print STDERR "CONFLICT: Message-ID: $mid exists\n"; } $im->done; } if (scalar(@rejects) && scalar(@rejects) == scalar(@$dests)) { $! = 65; # EX_DATAERR 5.6.0 data format error die join("\n", @rejects, ''); } $do_exit->(0); public-inbox-1.9.0/script/public-inbox-netd000077500000000000000000000003151430031475700206740ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use v5.12; use PublicInbox::Daemon; PublicInbox::Daemon::run(); public-inbox-1.9.0/script/public-inbox-nntpd000077500000000000000000000004051430031475700210650ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # # Standalone NNTP server for public-inbox. use v5.12; use PublicInbox::Daemon; PublicInbox::Daemon::run('nntp://0.0.0.0:119'); public-inbox-1.9.0/script/public-inbox-pop3d000077500000000000000000000004051430031475700207670ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # # Standalone POP3 server for public-inbox. use v5.12; use PublicInbox::Daemon; PublicInbox::Daemon::run('pop3://0.0.0.0:110'); public-inbox-1.9.0/script/public-inbox-purge000077500000000000000000000036541430031475700210750ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # # Used for purging messages entirely from a public-inbox. Currently # supports v2 inboxes only, for now. use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use PublicInbox::AdminEdit; PublicInbox::Admin::check_require('-index'); use PublicInbox::Filter::Base qw(REJECT); use PublicInbox::Eml; require PublicInbox::V2Writable; my $help = < 1, all => 0, -min_inbox_version => 2 }; GetOptions($opt, @PublicInbox::AdminEdit::OPT, 'C=s@') or die $help; if ($opt->{help}) { print $help; exit 0 }; PublicInbox::Admin::do_chdir(delete $opt->{C}); my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); PublicInbox::AdminEdit::check_editable(\@ibxs); defined(my $data = do { local $/; }) or die "read STDIN: $!\n"; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; my $n_purged = 0; foreach my $ibx (@ibxs) { my $mime = PublicInbox::Eml->new($data); my $v2w = PublicInbox::V2Writable->new($ibx, 0); my $commits = $v2w->purge($mime) || []; if (my $scrub = $ibx->filter($v2w)) { my $scrubbed = $scrub->scrub($mime, 1); if ($scrubbed && $scrubbed != REJECT()) { my $scrub_commits = $v2w->purge($scrubbed); push @$commits, @$scrub_commits if $scrub_commits; } } $v2w->done; if ($opt->{verbose}) { # should we consider this machine-parseable? PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits); } $n_purged += scalar @$commits; } # behave like "rm -f" exit(0) if ($opt->{force} || $n_purged); warn "Not found\n" if $opt->{verbose}; exit(1); public-inbox-1.9.0/script/public-inbox-watch000077500000000000000000000030701430031475700210510ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ my $help = <autoflush use PublicInbox::Watch; use PublicInbox::Config; use PublicInbox::DS; my $do_scan = 1; GetOptions('scan!' => \$do_scan, # undocumented, testing only 'help|h' => \(my $show_help)) or do { print STDERR $help; exit 1 }; if ($show_help) { print $help; exit 0 }; my $oldset = PublicInbox::DS::block_signals(); STDOUT->autoflush(1); STDERR->autoflush(1); local $0 = $0; # local since this script may be eval-ed my $watch = PublicInbox::Watch->new(PublicInbox::Config->new); my $reload = sub { my $prev = $watch or return; # SIGQUIT issued $watch->quit; $watch = PublicInbox::Watch->new(PublicInbox::Config->new); if ($watch) { warn("I: reloaded\n"); } else { warn("E: reloading failed\n"); $watch = $prev; } }; if ($watch) { my $scan = sub { return if !$watch; warn "I: scanning\n"; $watch->trigger_scan('full'); }; my $quit = sub { $watch->quit if $watch; $watch = undef; $0 .= ' quitting'; }; my $sig = { HUP => $reload, USR1 => $scan, CHLD => \&PublicInbox::DS::enqueue_reap, }; $sig->{QUIT} = $sig->{TERM} = $sig->{INT} = $quit; # --no-scan is only intended for testing atm, undocumented. PublicInbox::DS::requeue($scan) if $do_scan; $watch->watch($sig, $oldset) while ($watch); } public-inbox-1.9.0/script/public-inbox-xcpdb000077500000000000000000000046511430031475700210510ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = < upgrade or reshard Xapian DB(s) used by public-inbox options: --compact | -c run public-inbox-compact(1) after indexing --all copy all configured inboxes --reshard=NUM change number the number of shards --jobs=NUM limit parallelism to JOBS count --verbose | -v increase verbosity (may be repeated) --sequential-shard copy+index Xapian shards sequentially (for slow HDD) index options (see public-inbox-index(1) man page for full description): --no-fsync speed up indexing, risk corruption on power outage --batch-size=BYTES flush changes to OS after a given number of bytes --max-size=BYTES do not index messages larger than the given size See public-inbox-xcpdb(1) man page for full documentation. EOF my $opt = { quiet => -1, compact => 0, fsync => 1, -eidx_ok => 1 }; GetOptions($opt, qw( fsync|sync! compact|c reshard|R=i max_size|max-size=s batch_size|batch-size=s sequential-shard|seq-shard jobs|j=i quiet|q verbose|v blocksize|b=s no-full|n fuller|F all C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; use PublicInbox::Admin; PublicInbox::Admin::require_or_die('-search'); PublicInbox::Admin::do_chdir(delete $opt->{C}); require PublicInbox::Config; my $cfg = PublicInbox::Config->new; my ($ibxs, $eidxs) = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); unless ($ibxs) { print STDERR $help; exit 1 } my $idx_env = PublicInbox::Admin::index_prepare($opt, $cfg); # we only set XAPIAN_FLUSH_THRESHOLD for index, since cpdb doesn't # know sizes, only doccounts $opt->{-idx_env} = $idx_env; if ($opt->{'sequential-shard'} && ($opt->{jobs} // 1) > 1) { warn "W: --jobs=$opt->{jobs} ignored with --sequential-shard\n"; $opt->{jobs} = 0; } require PublicInbox::InboxWritable; require PublicInbox::Xapcmd; # we rely on --no-renumber to keep docids synched for NNTP(artnum) + IMAP(UID) for my $ibx (@$ibxs) { $ibx = PublicInbox::InboxWritable->new($ibx); PublicInbox::Xapcmd::run($ibx, 'cpdb', $opt); } for my $eidx (@$eidxs) { PublicInbox::Xapcmd::run($eidx, 'cpdb', $opt); } public-inbox-1.9.0/script/public-inbox.cgi000077500000000000000000000015071430031475700205110ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2014-2021 all contributors # License: AGPL-3.0+ or later # # Enables using PublicInbox::WWW as a CGI script use strict; BEGIN { for (qw(Plack::Builder Plack::Handler::CGI)) { eval("require $_") or die "E: Plack is required for $0\n"; } Plack::Builder->import; require PublicInbox::WWW; PublicInbox::WWW->preload if $ENV{MOD_PERL}; } my $www = PublicInbox::WWW->new; my $app = builder { # Enable to ensure redirects and Atom feed URLs are generated # properly when running behind a reverse proxy server which # sets the X-Forwarded-Proto request header. # See Plack::Middleware::ReverseProxy documentation for details # enable 'ReverseProxy'; enable 'Head'; sub { $www->call(@_) }; }; Plack::Handler::CGI->new->run($app); public-inbox-1.9.0/scripts/000077500000000000000000000000001430031475700156075ustar00rootroot00000000000000public-inbox-1.9.0/scripts/README000066400000000000000000000004061430031475700164670ustar00rootroot00000000000000This directory contains informal scripts and random tools used in the development of public-inbox. Some only exist only for historical purposes, and some may not work anymore. See the "script/" directory (not "scripts/") for supported and documented commands. public-inbox-1.9.0/scripts/dc-dlvr000077500000000000000000000033251430031475700170730ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2008-2021 all contributors # License: GPL-3.0+ # This is installed as /etc/dc-dcvr on my system # to use with postfix main.cf: mailbox_command = /etc/dc-dlvr "$EXTENSION" DELIVER=/usr/lib/dovecot/deliver CLAMDSCAN=clamdscan # change if your spamc/spamd listens elsewhere spamc='spamc' # allow plus addressing to train spam filters, $1 is the $EXTENSION # which may be "trainspam" or "trainham". Only allow spam training # when $CLIENT_ADDRESS is empty (local client) case $1,$CLIENT_ADDRESS in trainspam,) exec $spamc -L spam > /dev/null 2>&1 ;; trainham,) exec $spamc -L ham > /dev/null 2>&1 ;; esac TMPMSG=$(mktemp -t dc-dlvr.orig.$USER.XXXXXX || exit 1) CDMSG=$(mktemp -t dc-dlvr.orig.$USER.XXXXXX || exit 1) rm_list="$TMPMSG $CDMSG" cat >$CDMSG $CLAMDSCAN --quiet - <$CDMSG if test $? -eq 1 then $DELIVER -m INBOX.spam <$CDMSG exec rm -f $rm_list fi # pre-filter, for infrequently read lists which do their own spam filtering: if test -r ~/.dc-dlvr.pre then set -e mv -f $CDMSG $TMPMSG DEFAULT_INBOX=$(. ~/.dc-dlvr.pre) case $DEFAULT_INBOX in '') exec rm -f $rm_list ;; INBOX) ;; # do nothing *) $DELIVER -m $DEFAULT_INBOX <$TMPMSG exec rm -f $rm_list ;; esac PREMSG=$(mktemp -t dc-dlvr.orig.$USER.XXXXXX || exit 1) rm_list="$rm_list $PREMSG" set +e mv -f $TMPMSG $PREMSG $spamc -E --headers <$PREMSG >$TMPMSG else $spamc -E --headers <$CDMSG >$TMPMSG fi err=$? # normal delivery set -e case $err in 1) $DELIVER -m INBOX.spam <$TMPMSG ;; *) # users may override normal delivery and have it go elsewhere if test -r ~/.dc-dlvr.rc then . ~/.dc-dlvr.rc else $DELIVER -m INBOX <$TMPMSG fi ;; esac exec rm -f $rm_list public-inbox-1.9.0/scripts/dc-dlvr.pre000066400000000000000000000007351430031475700176570ustar00rootroot00000000000000# Copyright (C) 2014, Eric Wong # License: AGPL-3.0+ # sourced by /etc/dc-dlvr in ~$PI_USER/.dc-dlvr.rc, this just exits, # aborting /etc/dc-dlvr export PATH=/usr/local/bin:/usr/bin:/bin trap 'err=$?; set +e; test $err -eq 0 || rm -f $TMPMSG; exit $err' EXIT case $1,$CLIENT_ADDRESS in pispam,) exec public-inbox-learn spam <$TMPMSG ;; piham,) exec public-inbox-learn ham <$TMPMSG ;; esac exec public-inbox-mda <$TMPMSG public-inbox-1.9.0/scripts/dupe-finder000066400000000000000000000025521430031475700177400ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ # # ad-hoc tool for finding duplicates, unstable! use strict; use warnings; use PublicInbox::Inbox; use PublicInbox::Over; use PublicInbox::Search; use PublicInbox::Config; my $repo = shift; my $ibx; if (index($repo, '@') > 0) { $ibx = PublicInbox::Config->new->lookup($repo); } elsif (-d $repo) { $ibx = { inboxdir => $repo, address => 'unnamed@example.com' }; $ibx = PublicInbox::Inbox->new($ibx); } else { $ibx = PublicInbox::Config->new->lookup_name($repo); } $ibx or die "No inbox"; $ibx->search or die "search not available for inbox"; my $over = $ibx->over; my $dbh = $over->dbh; sub emit ($) { my ($nums) = @_; foreach my $n (@$nums) { my $smsg = $over->get_art($n) or next; print STDERR "$n $smsg->{blob} $smsg->{mid}\n"; my $msg = $ibx->msg_by_smsg($smsg) or next; print "From $smsg->{blob}\@$n Thu Jan 1 00:00:00 1970\n"; $$msg =~ s/^(>*From )/>$1/gm; print $$msg, "\n"; } } my $sth = $dbh->prepare(<<''); SELECT id,num FROM id2num WHERE num > 0 ORDER BY id $sth->execute; my $prev_id = -1; my ($id, $num, @nums); while (1) { ($id, $num) = $sth->fetchrow_array; defined $id or last; if ($prev_id != $id) { emit(\@nums) if scalar(@nums) > 1; @nums = (); } $prev_id = $id; push @nums, $num; } public-inbox-1.9.0/scripts/edit-sa-prefs000077500000000000000000000015741430031475700202070ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2014, Eric Wong # License: GPLv3 or later # edit and atomically update ~/.spamasassin/user_prefs safely set -e cd ~/.spamassassin cp user_prefs user_prefs.edit.$$ # don't care if we clobber old files # non-blocking lock if ! ln user_prefs.edit.$$ user_prefs.edit then rm user_prefs.edit.$$ echo >&2 "we are already editing user_prefs.edit" exit 1 fi rm user_prefs.edit.$$ ${VISUAL-vi} user_prefs.edit if diff -u user_prefs user_prefs.edit then rm -f user_prefs.edit echo 'no changes' exit 0 fi # check until we're good or $EDITOR fails while ! spamassassin -p user_prefs.edit --lint do echo >&2 "respawning editor, press Enter to continue" read ignored_var ${VISUAL-vi} user_prefs.edit done # atomically replace user_prefs mv user_prefs.edit user_prefs echo '~/.spamassassin/user_prefs updated' public-inbox-1.9.0/scripts/import_maildir000077500000000000000000000027611430031475700205560ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2014, Eric Wong and all contributors # License: AGPL-3.0+ # # Script to import a Maildir into a public-inbox =begin usage export GIT_DIR=/path/to/your/repo.git export GIT_AUTHOR_EMAIL='list@example.com' export GIT_AUTHOR_NAME='list name' ./import_maildir /path/to/maildir/ =cut use strict; use warnings; use Date::Parse qw/str2time/; use PublicInbox::Eml; use PublicInbox::Git; use PublicInbox::Import; sub usage { "Usage:\n".join('', grep(/\t/, `head -n 24 $0`)) } my $dir = shift @ARGV or die usage(); my $git_dir = `git rev-parse --git-dir`; chomp $git_dir; foreach my $sub (qw(cur new tmp)) { -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; } my @msgs; foreach my $sub (qw(cur new)) { foreach my $fn (glob("$dir/$sub/*")) { open my $fh, '<', $fn or next; my $s = PublicInbox::Eml->new(do { local $/; <$fh> }); my $date = $s->header('Date'); my $t = eval { str2time($date) }; defined $t or next; my @fn = split(m!/!, $fn); push @msgs, [ $t, "$sub/" . pop @fn, $date ]; } } my $git = PublicInbox::Git->new($git_dir); chomp(my $name = `git config user.name`); chomp(my $email = `git config user.email`); my $im = PublicInbox::Import->new($git, $name, $email); @msgs = sort { $b->[0] <=> $a->[0] } @msgs; while (my $ary = pop @msgs) { my $fn = "$dir/$ary->[1]"; open my $fh, '<', $fn or next; my $mime = PublicInbox::Eml->new(do { local $/; <$fh> }); $im->add($mime); } $im->done; 1; public-inbox-1.9.0/scripts/import_slrnspool000077500000000000000000000040261430031475700211640ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ # # Incremental (or one-shot) importer of a slrnpull news spool =begin usage export ORIGINAL_RECIPIENT=address@example.com public-inbox-init $INBOX $GIT_DIR $HTTP_URL $ORIGINAL_RECIPIENT ./import_slrnspool SLRNPULL_ROOT/news/foo/bar =cut use strict; use warnings; use PublicInbox::Config; use PublicInbox::Eml; use PublicInbox::Import; use PublicInbox::Git; sub usage { "Usage:\n".join('',grep(/\t/, `head -n 10 $0`)) } my $exit = 0; my $sighandler = sub { $exit = 1 }; $SIG{INT} = $sighandler; $SIG{TERM} = $sighandler; my $spool = shift @ARGV or die usage(); my $recipient = $ENV{ORIGINAL_RECIPIENT}; defined $recipient or die usage(); my $cfg = PublicInbox::Config->new; my $ibx = $cfg->lookup($recipient); my $git = $ibx->git; my $im; if ($ibx->version == 2) { require PublicInbox::V2Writable; $im = PublicInbox::V2Writable->new($ibx); $im->{parallel} = 0; # pointless to be parallel for a single message } else { $im = PublicInbox::Import->new($git, $ibx->{name}, $ibx->{-primary_address}); } $ibx->{filter} ||= 'PublicInbox::Filter::Gmane'; my $filter = $ibx->filter; sub key { "publicinbox.$ibx->{name}.importslrnspoolstate"; } sub get_min { my $f = PublicInbox::Config->default_file; my $out = $git->qx('config', "--file=$f", key($ibx)); $out ||= 0; chomp $out; $out =~ /\A[0-9]+\z/ and return $out; 0; } sub set_min { my ($num) = @_; my $f = PublicInbox::Config->default_file; my @cmd = (qw/git config/, "--file=$f", key($ibx), $num); system(@cmd) == 0 or die join(' ', @cmd). " failed: $?\n"; } my $n = get_min(); my $ok; my $max_gap = 200000; my $max = $n + $max_gap; $spool =~ s!/+\z!!; for (; $exit == 0 && $n < $max; $n++) { my $fn = "$spool/$n"; open(my $fh, '<', $fn) or next; $max = $n + $max_gap; print STDERR $fn, "\n"; my $mime = PublicInbox::Eml->new(do { local $/; <$fh> }); $filter->scrub($mime); $im->add($mime); $ok = $n + 1; set_min($ok); } $im->done; public-inbox-1.9.0/scripts/import_vger_from_mbox000066400000000000000000000024101430031475700221340ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; use PublicInbox::InboxWritable; my $usage = "usage: $0 NAME EMAIL DIR \$dry_run, 'V|version=i' => \$version, 'F|format=s' => \$variant, 'filter=s' => \$filter, ); GetOptions(%opts) or die $usage; if ($variant ne 'mboxrd' && $variant ne 'mboxo') { die "Unsupported mbox variant: $variant\n"; } my $name = shift or die $usage; # git my $email = shift or die $usage; # git@vger.kernel.org my $inboxdir = shift or die $usage; # /path/to/v2/repo my $ibx = { inboxdir => $inboxdir, name => $name, version => $version, address => [ $email ], filter => $filter, }; $ibx = PublicInbox::Inbox->new($ibx); unless ($dry_run) { if ($version >= 2) { require PublicInbox::V2Writable; PublicInbox::V2Writable->new($ibx, 1)->init_inbox(0); } else { system(qw(git init --bare -q), $inboxdir) == 0 or die; } } $ibx = PublicInbox::InboxWritable->new($ibx); binmode STDIN; $ibx->import_mbox(\*STDIN, $variant); public-inbox-1.9.0/scripts/report-spam000077500000000000000000000032621430031475700200110ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2008-2014, Eric Wong # License: GPLv3 or later # Usage: report-spam /path/to/message/in/maildir # This is intended for use with incron or similar systems. # my incrontab(5) looks like this: # /path/to/maildir/.INBOX.good/cur IN_MOVED_TO /path/to/report-spam $@/$# # /path/to/maildir/.INBOX.spam/cur IN_MOVED_TO /path/to/report-spam $@/$# # gigantic emails tend not to be spam (but they suck anyways...) bytes=$(stat -c %s $1) if test $bytes -gt 512000 then exit fi # Only tested with the /usr/sbin/sendmail which ships with postfix # *** Why not call spamc directly in this script? *** # I route this through my MTA so it gets queued properly. # incrond has no concurrency limits and will fork a new process on # every single event, which sucks with rename storms when a client # commits folder changes. The sendmail executable exits quickly and # queues up the message for training. This should also ensure fairness # to newly arriving mail. Instead of installing/configuring # another queueing system, I reuse the queue in the MTA. # See scripts/dc-dlvr for corresponding trainspam/trainham handlers, # which are for my personal bayes training, and scripts/dc-dlvr.pre # for the pispam/piham handlers for training emails going to public-inbox DO_SENDMAIL='/usr/sbin/sendmail -oi' PI_USER=pi case $1 in *[/.]spam/cur/*) # non-new messages in spam get trained $DO_SENDMAIL $PI_USER+pispam <$1 exec $DO_SENDMAIL $USER+trainspam <$1 ;; *:2,*S*) # otherwise, seen messages only case $1 in *:2,*T*) exit 0 ;; # ignore trashed messages esac $DO_SENDMAIL $PI_USER+piham <$1 exec $DO_SENDMAIL $USER+trainham <$1 ;; esac public-inbox-1.9.0/scripts/slrnspool2maildir000077500000000000000000000026551430031475700212240ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2013-2021 all contributors # License: AGPL-3.0+ # # One-off script to convert an slrnpull news spool to Maildir =begin usage ./slrnspool2maildir SLRNPULL_ROOT/news/foo/bar /path/to/maildir/ =cut use strict; use warnings; use Email::Filter; use Email::LocalDelivery; use File::Glob qw(bsd_glob GLOB_NOSORT); sub usage { "Usage:\n".join('',grep(/\t/, `head -n 12 $0`)) } my $spool = shift @ARGV or die usage(); my $dir = shift @ARGV or die usage(); -d $dir or die "$dir is not a directory\n"; $dir .= '/' unless $dir =~ m!/\z!; foreach my $sub (qw(cur new tmp)) { my $nd = "$dir/$sub"; -d $nd and next; mkdir $nd or die "mkdir $nd failed: $!\n"; } foreach my $n (grep(/\d+\z/, bsd_glob("$spool/*", GLOB_NOSORT))) { if (open my $fh, '<', $n) { my $f = Email::Filter->new(data => do { local $/; <$fh> }); my $s = $f->simple; # gmane rewrites Received headers, which increases spamminess # Some older archives set Original-To foreach my $x (qw(Received To)) { my @h = $s->header("Original-$x"); if (@h) { $s->header_set($x, @h); $s->header_set("Original-$x"); } } # triggers for the SA HEADER_SPAM rule foreach my $drop (qw(Approved)) { $s->header_set($drop) } # appears to be an old gmane bug: $s->header_set('connect()'); $f->exit(0); $f->accept($dir); } else { warn "Failed to open $n: $!\n"; } } public-inbox-1.9.0/scripts/ssoma-replay000077500000000000000000000064101430031475700201520ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ # # A work-in-progress, but one day I hope this script is no longer # necessary and users will all pull from public-inboxes instead # of having mail pushed to them via mlmmj. # # This is for use with ssoma, using "command:" delivery mechanism # (as opposed to normal Maildir or mbox). # It assumes mlmmj-process is in /usr/bin (mlmmj requires absolute paths) # and assumes FOO@domain.example.com has web archives available at: # https://domain.example.com/FOO/ # # The goal here is _anybody_ can setup a mirror of any public-inbox # repository and run their own mlmmj instance to replay traffic. =begin usage with ssoma: NAME=meta URL=https://public-inbox.org/meta/ ssoma add $NAME $URL "command:/path/to/ssoma-replay -L /path/to/spool/$NAME" ; $GIT_DIR/ssoma.state should have something like the following target: ; (where GIT_DIR is ~/.ssoma/meta.git/ in the above example) [target "local"] command = /path/to/ssoma-replay -L /path/to/spool/meta =cut use strict; use Email::Simple; use URI::Escape qw/uri_escape_utf8/; use File::Temp qw/tempfile/; my ($fh, $filename) = tempfile('ssoma-replay-XXXX', TMPDIR => 1); my $msg = Email::Simple->new(do { local $/; }); select $fh; # Note: the archive URL makes assumptions about where the # archive is hosted. It is currently true of all the domains # hosted by me. my $header_obj = $msg->header_obj; my $body = $msg->body; my $list_id = $header_obj->header('List-Id'); my ($archive_url, $user, $domain); if (defined $list_id) { # due to a bug in old versions of public-inbox, was used # as the list-Id instead of as recommended in RFC2919 ($user, $domain) = ($list_id =~ /<([^\.@]+)[\.@](.+)>/g); if (defined $domain) { $archive_url = "https://$domain/$user/"; my $mid = $header_obj->header('Message-Id'); if ($mid =~ /<[ \t]*([^>]+)?[ \t]*>/s) { $mid = $1; } $mid = uri_escape_utf8($mid, '^A-Za-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@'); $header_obj->header_set('List-Archive', "<$archive_url>"); foreach my $h (qw(Help Unsubscribe Subscribe Owner)) { my $lch = lc $h; my $v = ""; $header_obj->header_set("List-$h", $v); } $header_obj->header_set('List-Post', ""); # RFC 5064 $header_obj->header_set('Archived-At', "<$archive_url$mid/>"); $header_obj->header_set('X-Archived-At'); } } print $header_obj->as_string, $msg->crlf, $body; # don't break inline signatures goto out if ($body =~ /^-----BEGIN PGP SIG.+-----/sm); # try not to break dkim/dmarc/spf crap, either foreach (qw(domainkey-signature dkim-signature authentication-results)) { goto out if defined $header_obj->header($_); } my $ct = $header_obj->header('Content-Type'); if (!defined($ct) || $ct =~ m{\A\s*text/plain\b}i) { print "\n" unless $body =~ /\n\z/s; defined $archive_url or goto out; # Do not add a space after '--' as is standard for user-generated # signatures, we want to preserve the "-- \n" in original user sigs # for mail software which splits on that. print "--\n", "unsubscribe: $user+unsubscribe\@$domain\n", "archive: $archive_url\n"; } out: $| = 1; exec '/usr/bin/mlmmj-process', @ARGV, '-m', $filename; public-inbox-1.9.0/scripts/xhdr-num2mid000077500000000000000000000030641430031475700200560ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # Useful for mapping article IDs from existing NNTP servers to MIDs use strict; use warnings; use Net::NNTP; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $usage = "usage: NNTPSERVER=news.example.org $0 GROUP [FIRST_NUM]\n"; my ($msgmap, $mm); my %opts = ( '--msgmap=s' => \$msgmap ); GetOptions(%opts) or die "bad command-line args\n$usage"; if ($msgmap) { require PublicInbox::Msgmap; require PublicInbox::MID; # mid_clean $mm = PublicInbox::Msgmap->new_file($msgmap, 1); } my $group = shift or die $usage; my $nntp = Net::NNTP->new($ENV{NNTPSERVER} || '127.0.0.1'); my ($num, $first, $last) = $nntp->group($group); die "Invalid group\n" if !(defined $num && defined $first && defined $last); my $arg_first = shift; if (defined $arg_first) { $arg_first =~ /\A[0-9]+\z/ or die $usage; $first = $arg_first; } elsif ($mm) { my $last_article = $mm->meta_accessor('last_article'); $first = $last_article + 1 if defined $last_article; } my $batch = 1000; my $i; for ($i = $first; $i < $last; $i += $batch) { my $j = $i + $batch - 1; $j = $last if $j > $last; my $num2mid = $nntp->xhdr('Message-ID', "$i-$j"); $mm->{dbh}->begin_work if $mm; for my $n ($i..$j) { defined(my $mid = $num2mid->{$n}) or next; print "$n $mid\n"; if ($mm) { $mid = PublicInbox::MID::mid_clean($mid); $mm->mid_set($n, $mid); } } if ($mm) { $mm->meta_accessor('last_article', $j); $mm->{dbh}->commit; } } public-inbox-1.9.0/t/000077500000000000000000000000001430031475700143635ustar00rootroot00000000000000public-inbox-1.9.0/t/.gitconfig000066400000000000000000000002021430031475700163270ustar00rootroot00000000000000; this becomes ~/.gitconfig for tests where we use ; "$ENV{HOME} = '/path/to/worktree/t'" in tests [gc] writeCommitGraph = false public-inbox-1.9.0/t/address.t000066400000000000000000000050661430031475700162040ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use_ok 'PublicInbox::Address'; sub test_pkg { my ($pkg) = @_; my $emails = $pkg->can('emails'); my $names = $pkg->can('names'); my $pairs = $pkg->can('pairs'); is_deeply([qw(e@example.com e@example.org)], [$emails->('User , e@example.org')], 'address extraction works as expected'); is_deeply($pairs->('User , e@example.org'), [[qw(User e@example.com)], [undef, 'e@example.org']], "pair extraction works ($pkg)"); is_deeply(['user@example.com'], [$emails->('')], 'comment after domain accepted before >'); is_deeply($pairs->(''), [[qw(Comment user@example.com)]], "comment as name ($pkg)"); my $s = 'User , e@e, "John A. Doe" , , (xyz), '. 'U Ser (do not use)'; my @names = $names->($s); is_deeply(\@names, ['User', 'e', 'John A. Doe', 'x', 'xyz', 'U Ser'], 'name extraction works as expected'); is_deeply($pairs->($s), [ [ 'User', 'e@e' ], [ undef, 'e@e' ], [ 'John A. Doe', 'j@d' ], [ undef, 'x@x' ], [ 'xyz', 'y@x' ], [ 'U Ser', 'u@x' ] ], "pairs extraction works for $pkg"); @names = $names->('"user@example.com" '); is_deeply(['user'], \@names, 'address-as-name extraction works as expected'); is_deeply($pairs->('"user@example.com" '), [ [ 'user@example.com', 'user@example.com' ] ], "pairs for $pkg"); { my $backwards = 'u@example.com (John Q. Public)'; @names = $names->($backwards); is_deeply(\@names, ['John Q. Public'], 'backwards name OK'); my @emails = $emails->($backwards); is_deeply(\@emails, ['u@example.com'], 'backwards emails OK'); is_deeply($pairs->($backwards), [ [ 'John Q. Public', 'u@example.com' ] ], "backwards pairs $pkg"); } $s = '"Quote Unneeded" '; @names = $names->($s); is_deeply(['Quote Unneeded'], \@names, 'extra quotes dropped'); is_deeply($pairs->($s), [ [ 'Quote Unneeded', 'user@example.com' ] ], "extra quotes dropped in pairs $pkg"); my @emails = $emails->('Local User '); is_deeply([], \@emails , 'no address for local address'); @names = $emails->('Local User '); is_deeply([], \@names, 'no address, no name'); } test_pkg('PublicInbox::Address'); SKIP: { if ($INC{'PublicInbox/AddressPP.pm'}) { skip 'Email::Address::XS missing', 8; } use_ok 'PublicInbox::AddressPP'; test_pkg('PublicInbox::AddressPP'); } done_testing; public-inbox-1.9.0/t/admin.t000066400000000000000000000065211430031475700156440ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Import; use_ok 'PublicInbox::Admin'; my $v1 = create_inbox 'v1', -no_gc => 1, sub {}; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = $v1->{inboxdir}; my ($res, $err, $v); my $v2ibx; SKIP: { require_mods(qw(DBD::SQLite), 5); require_git(2.6, 1) or skip 5, 'git too old'; $v2ibx = create_inbox 'v2', indexlevel => 'basic', version => 2, -no_gc => 1, sub { my ($v2w, $ibx) = @_; $v2w->idx_init; $v2w->importer; }; }; *resolve_inboxdir = \&PublicInbox::Admin::resolve_inboxdir; # v1 is(resolve_inboxdir($git_dir), $git_dir, 'top-level GIT_DIR resolved'); is(resolve_inboxdir("$git_dir/objects"), $git_dir, 'GIT_DIR/objects resolved'); ok(chdir($git_dir), 'chdir GIT_DIR works'); is(resolve_inboxdir(), $git_dir, 'resolve_inboxdir works in GIT_DIR'); ok(chdir("$git_dir/objects"), 'chdir GIT_DIR/objects works'); is(resolve_inboxdir(), $git_dir, 'resolve_inboxdir works in GIT_DIR'); $res = resolve_inboxdir(undef, \$v); is($v, 1, 'version 1 detected'); is($res, $git_dir, 'detects directory along with version'); # $tmpdir could be inside a git working, directory, so we test '/' SKIP: { my $no_vcs_dir = '/'; # do people version-control "/"? skip "$no_vcs_dir is version controlled by git", 4 if -d '/.git'; open my $null, '>', '/dev/null' or die "open /dev/null: $!"; open my $olderr, '>&', \*STDERR or die "dup stderr: $!"; ok(chdir($no_vcs_dir), 'chdir to a non-inbox'); open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!"; $res = eval { resolve_inboxdir() }; open STDERR, '>&', $olderr or die "restore stderr: $!"; is($res, undef, 'fails inside non-version-controlled dir'); ok(chdir($tmpdir), 'back to test-specific $tmpdir'); open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!"; $res = eval { resolve_inboxdir($no_vcs_dir) }; $err = $@; open STDERR, '>&', $olderr or die "restore stderr: $!"; is($res, undef, 'fails on non-version-controlled dir'); ok($err, '$@ set on failure'); } # v2 if ($v2ibx) { my $v2_dir = $v2ibx->{inboxdir}; is(resolve_inboxdir($v2_dir), $v2_dir, 'resolve_inboxdir works on v2_dir'); chdir($v2_dir) or BAIL_OUT "chdir v2_dir: $!"; is(resolve_inboxdir(), $v2_dir, 'resolve_inboxdir works inside v2_dir'); $res = resolve_inboxdir(undef, \$v); is($v, 2, 'version 2 detected'); is($res, $v2_dir, 'detects directory along with version'); # TODO: should work from inside Xapian dirs, and git dirs, here... my $objdir = "$v2_dir/git/0.git/objects"; is($v2_dir, resolve_inboxdir($objdir, \$v), 'at $objdir'); is($v, 2, 'version 2 detected at $objdir'); chdir($objdir) or BAIL_OUT "chdir objdir: $!"; is(resolve_inboxdir(undef, \$v), $v2_dir, 'inside $objdir'); is($v, 2, 'version 2 detected inside $objdir'); } chdir '/' or BAIL_OUT "chdir: $!"; my @pairs = ( '1g' => 1024 ** 3, 666 => 666, '1500K' => 1500 * 1024, '15m' => 15 * (1024 ** 2), ); while (@pairs) { my ($in, $out) = splice(@pairs, 0, 2); my $orig = $in; ok(PublicInbox::Admin::parse_unsigned(\$in), "parse_unsigned $orig"); is($in, $out, "got $orig => ($in == $out)"); } for my $v ('', 'bogus', '1p', '1gig') { ok(!PublicInbox::Admin::parse_unsigned(\$v), "parse_unsigned rejects $v"); } done_testing(); public-inbox-1.9.0/t/alt.psgi000066400000000000000000000006441430031475700160330ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ use v5.12; use warnings; use Plack::Builder; my $pi_config = $ENV{PI_CONFIG} // 'unset'; # capture ASAP my $app = sub { my ($env) = @_; $env->{'psgi.errors'}->print("ALT\n"); [ 200, ['Content-Type', 'text/plain'], [ $pi_config ] ] }; builder { enable 'ContentLength'; enable 'Head'; $app; } public-inbox-1.9.0/t/altid.t000066400000000000000000000030461430031475700156500ustar00rootroot00000000000000#!perl -w # Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Eml; require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::Msgmap'; use_ok 'PublicInbox::SearchIdx'; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; my $alt_file = "$tmpdir/another-nntp.sqlite3"; my $altid = [ "serial:gmane:file=$alt_file" ]; my $ibx; { my $mm = PublicInbox::Msgmap->new_file($alt_file, 2); is($mm->mid_set(1234, 'a@example.com'), 1, 'mid_set once OK'); ok(0 == $mm->mid_set(1234, 'a@example.com'), 'mid_set not idempotent'); ok(0 == $mm->mid_set(1, 'a@example.com'), 'mid_set fails with dup MID'); } { $ibx = create_inbox 'testbox', tmpdir => $git_dir, sub { my ($im) = @_; $im->add(PublicInbox::Eml->new(<<'EOF')); From: a@example.com To: b@example.com Subject: boo! Message-ID: hello world gmane:666 EOF }; $ibx->{altid} = $altid; PublicInbox::SearchIdx->new($ibx, 1)->index_sync; } { my $mset = $ibx->search->mset("gmane:1234"); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); $msgs = [ map { $_->{mid} } @$msgs ]; is_deeply($msgs, ['a@example.com'], 'got one match'); $mset = $ibx->search->mset('gmane:666'); is($mset->size, 0, 'body did NOT match'); }; { my $mm = PublicInbox::Msgmap->new_file($alt_file, 2); my ($min, $max) = $mm->minmax; my $num = $mm->mid_insert('b@example.com'); ok($num > $max, 'auto-increment goes beyond mid_set'); } done_testing; public-inbox-1.9.0/t/altid_v2.t000066400000000000000000000027001430031475700162530ustar00rootroot00000000000000#!perl -w # Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Eml; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::Msgmap; my $another = 'another-nntp.sqlite3'; my $altid = [ "serial:gmane:file=$another" ]; my $ibx = create_inbox 'v2', version => 2, indexlevel => 'medium', altid => $altid, sub { my ($im, $ibx) = @_; my $mm = PublicInbox::Msgmap->new_file("$ibx->{inboxdir}/$another", 2); $mm->mid_set(1234, 'a@example.com') == 1 or BAIL_OUT 'mid_set once'; ok(0 == $mm->mid_set(1234, 'a@example.com'), 'mid_set not idempotent'); ok(0 == $mm->mid_set(1, 'a@example.com'), 'mid_set fails with dup MID'); $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; From: a@example.com To: b@example.com Subject: boo! Message-ID: hello world gmane:666 EOF }; my $mm = PublicInbox::Msgmap->new_file("$ibx->{inboxdir}/$another", 2); ok(0 == $mm->mid_set(1234, 'a@example.com'), 'mid_set not idempotent'); ok(0 == $mm->mid_set(1, 'a@example.com'), 'mid_set fails with dup MID'); my $mset = $ibx->search->mset('gmane:1234'); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); $msgs = [ map { $_->{mid} } @$msgs ]; is_deeply($msgs, ['a@example.com'], 'got one match'); $mset = $ibx->search->mset('gmane:666'); is($mset->size, 0, 'body did NOT match'); done_testing(); public-inbox-1.9.0/t/cgi.t000066400000000000000000000076311430031475700153210ustar00rootroot00000000000000#!perl -w # Copyright (C) 2014-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use IO::Uncompress::Gunzip qw(gunzip); use PublicInbox::Eml; use IO::Handle; my ($tmpdir, $for_destroy) = tmpdir(); require_mods(qw(Plack::Handler::CGI Plack::Util)); my $slashy_mid = 'slashy/asdf@example.com'; my $ibx = create_inbox 'test', tmpdir => "$tmpdir/test", sub { my ($im, $ibx) = @_; mkdir "$ibx->{inboxdir}/home", 0755 or BAIL_OUT; mkdir "$ibx->{inboxdir}/home/.public-inbox", 0755 or BAIL_OUT; my $eml = PublicInbox::Eml->new(< To: You Cc: $ibx->{-primary_address} Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 zzzzzz EOF $im->add($eml) or BAIL_OUT; $eml->header_set('Message-ID', ''); $eml->body_set("z\n" x 1024); $im->add($eml) or BAIL_OUT; $eml = PublicInbox::Eml->new(< To: Me Cc: $ibx->{-primary_address} In-Reply-To: Message-Id: Subject: Re: hihi Date: Thu, 01 Jan 1970 00:00:01 +0000 Me wrote: > zzzzzz what? EOF $im->add($eml) or BAIL_OUT; $eml = PublicInbox::Eml->new(< To: Me Cc: $ibx->{-primary_address} Message-Id: <$slashy_mid> Subject: Re: hihi Date: Thu, 01 Jan 1970 00:00:01 +0000 slashy EOF $im->add($eml) or BAIL_OUT; }; # create_inbox my $home = "$ibx->{inboxdir}/home"; open my $cfgfh, '>>', "$home/.public-inbox/config" or BAIL_OUT $!; print $cfgfh <{-primary_address} inboxdir = $ibx->{inboxdir} EOF $cfgfh->flush or BAIL_OUT $!; { local $ENV{HOME} = $home; my $res = cgi_run("/test/slashy/asdf\@example.com/raw"); like($res->{body}, qr/Message-Id: <\Q$slashy_mid\E>/, "slashy mid raw hit"); } # retrieve thread as an mbox SKIP: { local $ENV{HOME} = $home; my $path = "/test/blahblah\@example.com/t.mbox.gz"; my $res = cgi_run($path); like($res->{head}, qr/^Status: 501 /, "search not-yet-enabled"); my $cmd = ['-index', $ibx->{inboxdir}, '--max-size=2k']; print $cfgfh "\tindexlevel = basic\n" or BAIL_OUT $!; $cfgfh->flush or BAIL_OUT $!; my $opt = { 2 => \(my $err) }; my $indexed = run_script($cmd, undef, $opt); if ($indexed) { $res = cgi_run($path); like($res->{head}, qr/^Status: 200 /, "search returned mbox"); my $in = $res->{body}; my $out; gunzip(\$in => \$out); like($out, qr/^From /m, "From lines in mbox"); $res = cgi_run('/test/toobig@example.com/'); like($res->{head}, qr/^Status: 300 /, 'did not index or return >max-size message'); like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping large OID'); } else { like($res->{head}, qr/^Status: 501 /, "search not available"); skip('DBD::SQLite not available', 7); # (4 - 1) above, 4 below } require_mods('XML::TreePP', 4); $path = "/test/blahblah\@example.com/t.atom"; $res = cgi_run($path); like($res->{head}, qr/^Status: 200 /, "atom returned 200"); like($res->{head}, qr!^Content-Type: application/atom\+xml!m, "search returned atom"); my $t = XML::TreePP->new->parse($res->{body}); is(scalar @{$t->{feed}->{entry}}, 3, "parsed three entries"); like($t->{feed}->{-xmlns}, qr/\bAtom\b/, 'looks like an an Atom feed'); } done_testing(); sub cgi_run { my $env = { PATH_INFO => $_[0], QUERY_STRING => $_[1] || "", SCRIPT_NAME => '', REQUEST_URI => $_[0] . ($_[1] ? "?$_[1]" : ''), REQUEST_METHOD => $_[2] || "GET", GATEWAY_INTERFACE => 'CGI/1.1', HTTP_ACCEPT => '*/*', HTTP_HOST => 'test.example.com', }; my ($in, $out, $err) = ("", "", ""); my $rdr = { 0 => \$in, 1 => \$out, 2 => \$err }; run_script(['.cgi'], $env, $rdr); fail "unexpected error: \$?=$? ($err)" if $?; my ($head, $body) = split(/\r\n\r\n/, $out, 2); { head => $head, body => $body, err => $err } } public-inbox-1.9.0/t/check-www-inbox.perl000066400000000000000000000121621430031475700202650ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # Parallel WWW checker my $usage = "$0 [-j JOBS] [-s SLOW_THRESHOLD] URL_OF_INBOX\n"; use strict; use warnings; use File::Temp qw(tempfile); use GDBM_File; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use IO::Socket; use LWP::ConnCache; use POSIX qw(:sys_wait_h); use Time::HiRes qw(gettimeofday tv_interval); use WWW::Mechanize; use Data::Dumper; # we want to use vfork+exec with spawn, WWW::Mechanize can use too much # memory and fork(2) fails use PublicInbox::Spawn qw(spawn which); $ENV{PERL_INLINE_DIRECTORY} or warn "PERL_INLINE_DIRECTORY unset, may OOM\n"; our $tmp_owner = $$; my $nproc = 4; my $slow = 0.5; my %opts = ( '-j|jobs=i' => \$nproc, '-s|slow-threshold=f' => \$slow, ); GetOptions(%opts) or die "bad command-line args\n$usage"; my $root_url = shift or die $usage; chomp(my $xmlstarlet = which('xmlstarlet')); my $atom_check = eval { my $cmd = [ qw(xmlstarlet val -e -) ]; sub { my ($in, $out, $err) = @_; use autodie; open my $in_fh, '+>', undef; open my $out_fh, '+>', undef; open my $err_fh, '+>', undef; print $in_fh $$in; $in_fh->flush; sysseek($in_fh, 0, 0); my $rdr = { 0 => fileno($in_fh), 1 => fileno($out_fh), 2 => fileno($err_fh), }; my $pid = spawn($cmd, undef, $rdr); while (waitpid($pid, 0) != $pid) { next if $!{EINTR}; warn "waitpid(xmlstarlet, $pid) $!"; return $!; } sysseek($out_fh, 0, 0); sysread($out_fh, $$out, -s $out_fh); sysseek($err_fh, 0, 0); sysread($err_fh, $$err, -s $err_fh); $? } } if $xmlstarlet; my %workers; $SIG{INT} = sub { exit 130 }; $SIG{TERM} = sub { exit 0 }; $SIG{CHLD} = sub { while (1) { my $pid = waitpid(-1, WNOHANG); return if !defined $pid || $pid <= 0; my $p = delete $workers{$pid} || '(unknown)'; warn("$pid [$p] exited with $?\n") if $?; } }; my @todo = IO::Socket->socketpair(AF_UNIX, SOCK_SEQPACKET, 0); die "socketpair failed: $!" unless $todo[1]; my @done = IO::Socket->socketpair(AF_UNIX, SOCK_SEQPACKET, 0); die "socketpair failed: $!" unless $done[1]; $| = 1; foreach my $p (1..$nproc) { my $pid = fork; die "fork failed: $!\n" unless defined $pid; if ($pid) { $workers{$pid} = $p; } else { $todo[1]->close; $done[0]->close; worker_loop($todo[0], $done[1]); } } my ($fh, $tmp) = tempfile('www-check-XXXX', SUFFIX => '.gdbm', UNLINK => 1, TMPDIR => 1); my $gdbm = tie my %seen, 'GDBM_File', $tmp, &GDBM_WRCREAT, 0600; defined $gdbm or die "gdbm open failed: $!\n"; $todo[0]->close; $done[1]->close; my ($rvec, $wvec); $todo[1]->blocking(0); $done[0]->blocking(0); $seen{$root_url} = 1; my $ndone = 0; my $nsent = 1; my @queue = ($root_url); my $timeout = $slow * 4; while (keys %workers) { # reacts to SIGCHLD $wvec = $rvec = ''; my $u; vec($rvec, fileno($done[0]), 1) = 1; if (@queue) { vec($wvec, fileno($todo[1]), 1) = 1; } elsif ($ndone == $nsent) { kill 'TERM', keys %workers; exit; } if (!select($rvec, $wvec, undef, $timeout)) { while (my ($k, $v) = each %seen) { next if $v == 2; print "WAIT ($ndone/$nsent) <$k>\n"; } } while ($u = shift @queue) { my $s = $todo[1]->send($u, MSG_EOR); if ($!{EAGAIN}) { unshift @queue, $u; last; } } my $r; do { $r = $done[0]->recv($u, 65535, 0); } while (!defined $r && $!{EINTR}); next unless $u; if ($u =~ s/\ADONE\t//) { $ndone++; $seen{$u} = 2; } else { next if $seen{$u}; $seen{$u} = 1; $nsent++; push @queue, $u; } } sub worker_loop { my ($todo_rd, $done_wr) = @_; $SIG{CHLD} = 'DEFAULT'; my $m = WWW::Mechanize->new(autocheck => 0); my $cc = LWP::ConnCache->new; $m->stack_depth(0); # no history $m->conn_cache($cc); while (1) { $todo_rd->recv(my $u, 65535, 0); next unless $u; my $t = [ gettimeofday ]; my $r = $m->get($u); $t = tv_interval($t); printf "SLOW %0.06f % 5d %s\n", $t, $$, $u if $t > $slow; my @links; if ($r->is_success) { my %links = map { (split('#', $_->URI->abs->as_string))[0] => 1; } grep { $_->tag && $_->url !~ /:/ } $m->links; @links = keys %links; } elsif ($r->code != 300) { warn "W: ".$r->code . " $u\n" } my $s; # blocking foreach my $l (@links, "DONE\t$u") { next if $l eq '' || $l =~ /\.mbox(?:\.gz)\z/; do { $s = $done_wr->send($l, MSG_EOR); } while (!defined $s && $!{EINTR}); die "$$ send $!\n" unless defined $s; my $n = length($l); die "$$ send truncated $s < $n\n" if $s != $n; } # make sure the HTML source doesn't screw up terminals # when people curl the source (not remotely an expert # on languages or encodings, here). my $ct = $r->header('Content-Type') || ''; warn "no Content-Type: $u\n" if $ct eq ''; if ($atom_check && $ct =~ m!\bapplication/atom\+xml\b!) { my $raw = $r->decoded_content; my ($out, $err) = ('', ''); my $fail = $atom_check->(\$raw, \$out, \$err); warn "Atom ($fail) - $u - <1:$out> <2:$err>\n" if $fail; } next if $ct !~ m!\btext/html\b!; my $dc = $r->decoded_content; if ($dc =~ /([\x00-\x08\x0d-\x1f\x7f-\x{99999999}]+)/s) { my $o = $1; my $c = Dumper($o); warn "bad: $u $c\n"; } } } public-inbox-1.9.0/t/cmd_ipc.t000066400000000000000000000113641430031475700161530ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use Socket qw(AF_UNIX SOCK_STREAM MSG_EOR); pipe(my ($r, $w)) or BAIL_OUT; my ($send, $recv); require_ok 'PublicInbox::Spawn'; my $SOCK_SEQPACKET = eval { Socket::SOCK_SEQPACKET() } // undef; use Time::HiRes qw(usleep); my $do_test = sub { SKIP: { my ($type, $flag, $desc) = @_; defined $type or skip 'SOCK_SEQPACKET missing', 7; my ($s1, $s2); my $src = 'some payload' x 40; socketpair($s1, $s2, AF_UNIX, $type, 0) or BAIL_OUT $!; my $sfds = [ fileno($r), fileno($w), fileno($s1) ]; $send->($s1, $sfds, $src, $flag); my (@fds) = $recv->($s2, my $buf, length($src) + 1); is($buf, $src, 'got buffer payload '.$desc); my ($r1, $w1, $s1a); my $opens = sub { ok(open($r1, '<&=', $fds[0]), 'opened received $r'); ok(open($w1, '>&=', $fds[1]), 'opened received $w'); ok(open($s1a, '+>&=', $fds[2]), 'opened received $s1'); }; $opens->(); my @exp = stat $r; my @cur = stat $r1; is("$exp[0]\0$exp[1]", "$cur[0]\0$cur[1]", '$r dev/ino matches'); @exp = stat $w; @cur = stat $w1; is("$exp[0]\0$exp[1]", "$cur[0]\0$cur[1]", '$w dev/ino matches'); @exp = stat $s1; @cur = stat $s1a; is("$exp[0]\0$exp[1]", "$cur[0]\0$cur[1]", '$s1 dev/ino matches'); if (defined($SOCK_SEQPACKET) && $type == $SOCK_SEQPACKET) { $r1 = $w1 = $s1a = undef; $src = (',' x 1023) . '-' .('.' x 1024); $send->($s1, $sfds, $src, $flag); (@fds) = $recv->($s2, $buf, 1024); is($buf, (',' x 1023) . '-', 'silently truncated buf'); $opens->(); $r1 = $w1 = $s1a = undef; $s2->blocking(0); @fds = $recv->($s2, $buf, length($src) + 1); ok($!{EAGAIN}, "EAGAIN set by ($desc)"); is_deeply(\@fds, [ undef ], "EAGAIN $desc"); $s2->blocking(1); if ($ENV{TEST_ALRM}) { my $alrm = 0; local $SIG{ALRM} = sub { $alrm++ }; my $tgt = $$; my $pid = fork // xbail "fork: $!"; if ($pid == 0) { # need to loop since Perl signals are racy # (the interpreter doesn't self-pipe) while (usleep(1000)) { kill 'ALRM', $tgt; } } @fds = $recv->($s2, $buf, length($src) + 1); ok($!{EINTR}, "EINTR set by ($desc)"); kill('KILL', $pid); waitpid($pid, 0); is_deeply(\@fds, [ undef ], "EINTR $desc"); ok($alrm, 'SIGALRM hit'); } close $s1; @fds = $recv->($s2, $buf, length($src) + 1); is_deeply(\@fds, [], "no FDs on EOF $desc"); is($buf, '', "buffer cleared on EOF ($desc)"); socketpair($s1, $s2, AF_UNIX, $type, 0) or BAIL_OUT $!; $s1->blocking(0); my $nsent = 0; while (defined(my $n = $send->($s1, $sfds, $src, $flag))) { $nsent += $n; fail "sent 0 bytes" if $n == 0; } ok($!{EAGAIN} || $!{ETOOMANYREFS}, "hit EAGAIN || ETOOMANYREFS on send $desc") or diag "send failed with: $!"; ok($nsent > 0, 'sent some bytes'); socketpair($s1, $s2, AF_UNIX, $type, 0) or BAIL_OUT $!; is($send->($s1, [], $src, $flag), length($src), 'sent w/o FDs'); $buf = 'nope'; @fds = $recv->($s2, $buf, length($src)); is(scalar(@fds), 0, 'no FDs received'); is($buf, $src, 'recv w/o FDs'); my $nr = 2 * 1024 * 1024; while (1) { vec(my $vec = '', $nr * 8 - 1, 1) = 1; my $n = $send->($s1, [], $vec, $flag); if (defined($n)) { $n == length($vec) or fail "short send: $n != ".length($vec); diag "sent $nr, retrying with more"; $nr += 2 * 1024 * 1024; } else { ok($!{EMSGSIZE} || $!{ENOBUFS}, 'got EMSGSIZE or ENOBUFS') or diag "$nr bytes fails with: $!"; last; } } } } }; my $send_ic = PublicInbox::Spawn->can('send_cmd4'); my $recv_ic = PublicInbox::Spawn->can('recv_cmd4'); SKIP: { ($send_ic && $recv_ic) or skip 'Inline::C not installed/enabled', 12; $send = $send_ic; $recv = $recv_ic; $do_test->(SOCK_STREAM, 0, 'Inline::C stream'); $do_test->($SOCK_SEQPACKET, MSG_EOR, 'Inline::C seqpacket'); } SKIP: { require_mods('Socket::MsgHdr', 13); require_ok 'PublicInbox::CmdIPC4'; $send = PublicInbox::CmdIPC4->can('send_cmd4'); $recv = PublicInbox::CmdIPC4->can('recv_cmd4'); $do_test->(SOCK_STREAM, 0, 'MsgHdr stream'); $do_test->($SOCK_SEQPACKET, MSG_EOR, 'MsgHdr seqpacket'); SKIP: { ($send_ic && $recv_ic) or skip 'Inline::C not installed/enabled', 12; $recv = $recv_ic; $do_test->(SOCK_STREAM, 0, 'Inline::C -> MsgHdr stream'); $do_test->($SOCK_SEQPACKET, 0, 'Inline::C -> MsgHdr seqpacket'); } } SKIP: { skip 'not Linux', 1 if $^O ne 'linux'; require_ok 'PublicInbox::Syscall'; $send = PublicInbox::Syscall->can('send_cmd4') or skip 'send_cmd4 not defined for arch'; $recv = PublicInbox::Syscall->can('recv_cmd4') or skip 'recv_cmd4 not defined for arch'; $do_test->(SOCK_STREAM, 0, 'PP Linux stream'); $do_test->($SOCK_SEQPACKET, MSG_EOR, 'PP Linux seqpacket'); } done_testing; public-inbox-1.9.0/t/config.t000066400000000000000000000175211430031475700160230ustar00rootroot00000000000000# Copyright (C) 2014-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Import; use_ok 'PublicInbox'; ok(defined(eval('$PublicInbox::VERSION')), 'VERSION defined'); use_ok 'PublicInbox::Config'; my ($tmpdir, $for_destroy) = tmpdir(); { PublicInbox::Import::init_bare($tmpdir); my $inboxdir = "$tmpdir/new\nline"; my @cmd = ('git', "--git-dir=$tmpdir", qw(config publicinbox.foo.inboxdir), $inboxdir); is(xsys(@cmd), 0, "set config"); my $tmp = PublicInbox::Config->new("$tmpdir/config"); is($tmp->{'publicinbox.foo.inboxdir'}, $inboxdir, 'config read correctly'); is($tmp->{'core.bare'}, 'true', 'init used --bare repo'); my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; $tmp = PublicInbox::Config->new("$tmpdir/config"); is($tmp->lookup_name('foo'), undef, 'reject invalid inboxdir'); like("@warn", qr/^E:.*must not contain `\\n'/sm, 'warned about newline'); } { my $f = "examples/public-inbox-config"; ok(-r $f, "$f is readable"); my $cfg = PublicInbox::Config->new($f); is_deeply($cfg->lookup('meta@public-inbox.org'), { 'inboxdir' => '/home/pi/meta-main.git', 'address' => [ 'meta@public-inbox.org' ], 'domain' => 'public-inbox.org', 'url' => [ 'http://example.com/meta' ], -primary_address => 'meta@public-inbox.org', 'name' => 'meta', -httpbackend_limiter => undef, }, "lookup matches expected output"); is($cfg->lookup('blah@example.com'), undef, "non-existent lookup returns undef"); my $test = $cfg->lookup('test@public-inbox.org'); is_deeply($test, { 'address' => ['try@public-inbox.org', 'sandbox@public-inbox.org', 'test@public-inbox.org'], -primary_address => 'try@public-inbox.org', 'inboxdir' => '/home/pi/test-main.git', 'domain' => 'public-inbox.org', 'name' => 'test', 'url' => [ 'http://example.com/test' ], -httpbackend_limiter => undef, }, "lookup matches expected output for test"); } { my $cfgpfx = "publicinbox.test"; my @altid = qw(serial:gmane:file=a serial:enamg:file=b); my $config = PublicInbox::Config->new(\<lookup_name('test'); is_deeply($ibx->{altid}, [ @altid ]); $config = PublicInbox::Config->new(\<lookup_name('test'); is($ibx->{inboxdir}, '/path/to/non/existent', 'mainrepo still works'); $config = PublicInbox::Config->new(\<lookup_name('test'); is($ibx->{inboxdir}, '/path/to/non/existent', 'inboxdir takes precedence'); } { my $pfx = "publicinbox.test"; my $str = <new(\$str); my $ibx = $cfg->lookup_name('test'); is_deeply($ibx->nntp_url({ www => { pi_cfg => $cfg }}), [ 'nntp://news.example.com/inbox.test' ], 'nntp_url uses global NNTP server'); $str = <new(\$str); $ibx = $cfg->lookup_name('test'); is_deeply($ibx->nntp_url({ www => { pi_cfg => $cfg }}), [ 'nntp://news.alt.example.com/inbox.test' ], 'nntp_url uses per-inbox NNTP server'); is_deeply($ibx->imap_url({ www => { pi_cfg => $cfg }}), [ 'imaps://mail.example.com/inbox.test' ], 'nntp_url uses per-inbox NNTP server'); } # no obfuscate domains { my $pfx = "publicinbox.test"; my $pfx2 = "publicinbox.foo"; my $str = <new(\$str); my $ibx = $cfg->lookup_name('test'); my $re = $ibx->{-no_obfuscate_re}; like('meta@public-inbox.org', $re, 'public-inbox.org address not to be obfuscated'); like('t@example.com', $re, 'example.com address not to be obfuscated'); unlike('t@example.comM', $re, 'example.comM address does not match'); is_deeply($ibx->{-no_obfuscate}, { 'test@example.com' => 1, 'foo@example.com' => 1, 'z@example.com' => 1, }, 'known addresses populated'); } my @invalid = ( # git rejects this because it locks refnames, but we don't have # this problem with inbox names: # 'inbox.lock', # git rejects these: '', '..', '.', 'stash@{9}', 'inbox.', '^caret', '~tilde', '*asterisk', 's p a c e s', ' leading-space', 'trailing-space ', 'question?', 'colon:', '[square-brace]', "\fformfeed", "\0zero", "\bbackspace", ); my %X = ("\0" => '\\0', "\b" => '\\b', "\f" => '\\f', "'" => "\\'"); my $xre = join('|', keys %X); for my $s (@invalid) { my $d = $s; $d =~ s/($xre)/$X{$1}/g; ok(!PublicInbox::Config::valid_foo_name($s), "`$d' name rejected"); } # obviously-valid examples my @valid = qw(a a@example a@example.com); # Rejecting more was considered, but then it dawned on me that # people may intentionally use inbox names which are not URL-friendly # to prevent the PSGI interface from displaying them... # URL-unfriendly # '<', '>', '%', '#', '?', '&', '(', ')', # maybe these aren't so bad, they're common in Message-IDs, even: # '!', '$', '=', '+' push @valid, qw[bang! ca$h less< more> 1% (parens) &more eql= +plus], '#hash'; for my $s (@valid) { ok(PublicInbox::Config::valid_foo_name($s), "`$s' name accepted"); } { my $f = "$tmpdir/ordered"; open my $fh, '>', $f or die "open: $!"; my @expect; foreach my $i (0..3) { push @expect, "$i"; print $fh <<"" or die "print: $!"; [publicinbox "$i"] inboxdir = /path/to/$i.git address = $i\@example.com } close $fh or die "close: $!"; my $cfg = PublicInbox::Config->new($f); my @result; $cfg->each_inbox(sub { push @result, $_[0]->{name} }); is_deeply(\@result, \@expect); } { my $pfx1 = "publicinbox.test1"; my $pfx2 = "publicinbox.test2"; my $str = <new(\$str); my $t1 = $cfg->lookup_name('test1'); my $t2 = $cfg->lookup_name('test2'); is($cfg->repo_objs($t1)->[0], $cfg->repo_objs($t2)->[0], 'inboxes share ::Git object'); } { for my $t (qw(TRUE true yes on 1 +1 -1 13 0x1 0x12 0X5)) { is(PublicInbox::Config::git_bool($t), 1, "$t is true"); is(xqx([qw(git -c), "test.val=$t", qw(config --bool test.val)]), "true\n", "$t matches git-config behavior"); } for my $f (qw(FALSE false no off 0 +0 +000 00 0x00 0X0)) { is(PublicInbox::Config::git_bool($f), 0, "$f is false"); is(xqx([qw(git -c), "test.val=$f", qw(config --bool test.val)]), "false\n", "$f matches git-config behavior"); } is(PublicInbox::Config::git_bool('bogus'), undef, 'bogus is undef'); } SKIP: { # XXX wildcard match requires git 2.26+ require_git('1.8.5', 2) or skip 'git 1.8.5+ required for --url-match', 2; my $f = "$tmpdir/urlmatch"; open my $fh, '>', $f or BAIL_OUT $!; print $fh <new; my $url = 'imap://mail.example.com/INBOX'; is($cfg->urlmatch('imap.pollInterval', $url), 9, 'urlmatch hit'); is($cfg->urlmatch('imap.idleInterval', $url), undef, 'urlmatch miss'); }; done_testing(); public-inbox-1.9.0/t/config_limiter.t000066400000000000000000000026051430031475700175450ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Config; my $cfgpfx = "publicinbox.test"; { my $config = PublicInbox::Config->new(\<lookup_name('test'); my $git = $ibx->git; my $old = "$git"; my $lim = $git->{-httpbackend_limiter}; ok($lim, 'Limiter exists'); is($lim->{max}, 12, 'limiter has expected slots'); $ibx->{git} = undef; $git = $ibx->git; isnt($old, "$git", 'got new Git object'); is("$git->{-httpbackend_limiter}", "$lim", 'same limiter'); } { my $config = PublicInbox::Config->new(\<lookup_name('test'); my $git = $ibx->git; ok($git, 'got git object'); my $old = "$git"; # stringify object ref "Git(0xDEADBEEF)" my $lim = $git->{-httpbackend_limiter}; ok($lim, 'Limiter exists'); is($lim->{max}, 3, 'limiter has expected slots'); $ibx->{git} = undef; my $new = $ibx->git; isnt($old, "$new", 'got new Git object'); is("$new->{-httpbackend_limiter}", "$lim", 'same limiter'); is($lim->{max}, 3, 'limiter has expected slots'); } done_testing; public-inbox-1.9.0/t/content_hash.t000066400000000000000000000026111430031475700172250ustar00rootroot00000000000000#!perl -w # Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::ContentHash qw(content_hash); use PublicInbox::Eml; my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: b@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $orig = content_hash($mime); my $reload = content_hash(PublicInbox::Eml->new($mime->as_string)); is($orig, $reload, 'content_hash matches after serialization'); { my $s1 = PublicInbox::Eml->new($mime->as_string); $s1->header_set('Sender', 's@example.com'); is(content_hash($s1), $orig, "Sender ignored when 'From' present"); my $s2 = PublicInbox::Eml->new($s1->as_string); $s1->header_set('Sender', 'sender@example.com'); is(content_hash($s2), $orig, "Sender really ignored 'From'"); $_->header_set('From') for ($s1, $s2); isnt(content_hash($s1), content_hash($s2), 'sender accounted when From missing'); } foreach my $h (qw(From To Cc)) { my $n = q("Quoted N'Ame" ); $mime->header_set($h, "$n"); my $q = content_hash($mime); is($mime->header($h), $n, "content_hash does not mutate $h:"); $mime->header_set($h, 'Quoted N\'Ame '); my $nq = content_hash($mime); is($nq, $q, "quotes ignored in $h:"); } done_testing(); public-inbox-1.9.0/t/convert-compact.t000066400000000000000000000075051430031475700176630ustar00rootroot00000000000000#!perl -w # Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::Eml; use PublicInbox::TestCommon; use PublicInbox::Import; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); have_xapian_compact or plan skip_all => 'xapian-compact missing for '.__FILE__; my ($tmpdir, $for_destroy) = tmpdir(); my $ibx = create_inbox 'v1', indexlevel => 'medium', tmpdir => "$tmpdir/v1", pre_cb => sub { my ($inboxdir) = @_; PublicInbox::Import::init_bare($inboxdir); xsys_e(qw(git) , "--git-dir=$inboxdir", qw(config core.sharedRepository 0644)); }, sub { my ($im, $ibx) = @_; $im->done; umask(077) or BAIL_OUT "umask: $!"; $_[0] = $im = $ibx->importer(0); my $eml = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: b@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF $im->add($eml) or BAIL_OUT '->add'; $im->remove($eml) or BAIL_OUT '->remove'; $im->add($eml) or BAIL_OUT '->add'; }; umask(077) or BAIL_OUT "umask: $!"; is(((stat("$ibx->{inboxdir}/public-inbox"))[2]) & 07777, 0755, 'sharedRepository respected for v1'); is(((stat("$ibx->{inboxdir}/public-inbox/msgmap.sqlite3"))[2]) & 07777, 0644, 'sharedRepository respected for v1 msgmap'); my @xdir = glob("$ibx->{inboxdir}/public-inbox/xap*/*"); foreach (@xdir) { my @st = stat($_); is($st[2] & 07777, -f _ ? 0644 : 0755, 'sharedRepository respected on file after convert'); } local $ENV{PI_CONFIG} = '/dev/null'; my ($out, $err) = ('', ''); my $rdr = { 1 => \$out, 2 => \$err }; my $cmd = [ '-compact', $ibx->{inboxdir} ]; ok(run_script($cmd, undef, $rdr), 'v1 compact works'); @xdir = glob("$ibx->{inboxdir}/public-inbox/xap*"); is(scalar(@xdir), 1, 'got one xapian directory after compact'); is(((stat($xdir[0]))[2]) & 07777, 0755, 'sharedRepository respected on v1 compact'); my $hwm = do { my $mm = $ibx->mm; $ibx->cleanup; $mm->num_highwater; }; ok(defined($hwm) && $hwm > 0, "highwater mark set #$hwm"); $cmd = [ '-convert', '--no-index', $ibx->{inboxdir}, "$tmpdir/no-index" ]; ok(run_script($cmd, undef, $rdr), 'convert --no-index works'); $cmd = [ '-convert', $ibx->{inboxdir}, "$tmpdir/x/v2" ]; ok(run_script($cmd, undef, $rdr), 'convert works'); @xdir = glob("$tmpdir/x/v2/xap*/*"); foreach (@xdir) { my @st = stat($_); is($st[2] & 07777, -f _ ? 0644 : 0755, 'sharedRepository respected after convert'); } $cmd = [ '-compact', "$tmpdir/x/v2" ]; my $env = { NPROC => 2 }; ok(run_script($cmd, $env, $rdr), 'v2 compact works'); $ibx->{inboxdir} = "$tmpdir/x/v2"; $ibx->{version} = 2; is($ibx->mm->num_highwater, $hwm, 'highwater mark unchanged in v2 inbox'); @xdir = glob("$tmpdir/x/v2/xap*/*"); foreach (@xdir) { my @st = stat($_); is($st[2] & 07777, -f _ ? 0644 : 0755, 'sharedRepository respected after v2 compact'); } is(((stat("$tmpdir/x/v2/msgmap.sqlite3"))[2]) & 07777, 0644, 'sharedRepository respected for v2 msgmap'); @xdir = (glob("$tmpdir/x/v2/git/*.git/objects/*/*"), glob("$tmpdir/x/v2/git/*.git/objects/pack/*")); foreach (@xdir) { my @st = stat($_); is($st[2] & 07777, -f _ ? 0444 : 0755, 'sharedRepository respected after v2 compact'); } my $msgs = $ibx->recent({limit => 1000}); is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); is(scalar @$msgs, 1, 'only one message in history'); $ibx = undef; $err = ''; $cmd = [ qw(-index -j0 --reindex -c), "$tmpdir/x/v2" ]; ok(run_script($cmd, undef, $rdr), '--reindex -c'); like($err, qr/xapian-compact/, 'xapian-compact ran (-c)'); $rdr->{2} = \(my $err2 = ''); $cmd = [ qw(-index -j0 --reindex -cc), "$tmpdir/x/v2" ]; ok(run_script($cmd, undef, $rdr), '--reindex -c -c'); like($err2, qr/xapian-compact/, 'xapian-compact ran (-c -c)'); ok(($err2 =~ tr/\n/\n/) > ($err =~ tr/\n/\n/), '-compacted twice'); done_testing(); public-inbox-1.9.0/t/data-gen/000077500000000000000000000000001430031475700160435ustar00rootroot00000000000000public-inbox-1.9.0/t/data-gen/.gitignore000066400000000000000000000000621430031475700200310ustar00rootroot00000000000000# read-only test data generated by create_inbox * public-inbox-1.9.0/t/data/000077500000000000000000000000001430031475700152745ustar00rootroot00000000000000public-inbox-1.9.0/t/data/0001.patch000066400000000000000000000024601430031475700166770ustar00rootroot00000000000000From: Eric Wong Date: Fri, 20 Jul 2018 07:21:41 +0000 To: test@example.com Subject: [PATCH] search: use boolean prefix for filenames in diffs, too Message-ID: <20180720072141.GA15957@example> Filenames within a project tend to be reasonably stable within a project and I plan on having automated searches hit these. Also, using no term prefix at all (the default for searching) still allows probabilistic searches on everything that's in a "git diff", including the blob names which were just made boolean. Note, attachment filenames ("n:" prefix) will still use probabilistic search, as they're hardly standardized. --- lib/PublicInbox/Search.pm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 090d998b6c2c..6e006fd73b1d 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -53,6 +53,9 @@ my %bool_pfx_external = ( dfpre => 'XDFPRE', dfpost => 'XDFPOST', dfblob => 'XDFPRE XDFPOST', + dfn => 'XDFN', + dfa => 'XDFA', + dfb => 'XDFB', ); my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST'; @@ -72,9 +75,6 @@ my %prob_prefix = ( q => 'XQUOT', nq => $non_quoted_body, - dfn => 'XDFN', - dfa => 'XDFA', - dfb => 'XDFB', dfhh => 'XDFHH', dfctx => 'XDFCTX', -- ^_^ public-inbox-1.9.0/t/data/binary.patch000066400000000000000000000010111430031475700175720ustar00rootroot00000000000000From 7a1921ba7bd99c63ad6dc6ec0791691ee80e279a Mon Sep 17 00:00:00 2001 From: BOFH Date: Fri, 13 May 2022 23:04:14 +0000 Subject: [PATCH] binary patch test Message-ID: --- zero | Bin 0 -> 1 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 zero diff --git a/zero b/zero new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 public-inbox-1.9.0/t/data/message_embed.eml000066400000000000000000000111241430031475700205520ustar00rootroot00000000000000Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 977481F45A; Sat, 18 Apr 2020 22:25:08 +0000 (UTC) Date: Sat, 18 Apr 2020 22:25:08 +0000 From: Eric Wong To: test@public-inbox.org Subject: Re: embedded message test Message-ID: <20200418222508.GA13918@dcvr> References: <20200418222020.GA2745@dcvr> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="TB36FDmn/VVEgNH/" Content-Disposition: inline In-Reply-To: <20200418222020.GA2745@dcvr> --TB36FDmn/VVEgNH/ Content-Type: text/plain; charset=utf-8 Content-Disposition: inline testing embedded message harder --TB36FDmn/VVEgNH/ Content-Type: message/rfc822 Content-Disposition: attachment; filename="embed2x.eml" Date: Sat, 18 Apr 2020 22:20:20 +0000 From: Eric Wong To: test@public-inbox.org Subject: embedded message test Message-ID: <20200418222020.GA2745@dcvr> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="/04w6evG8XlLl3ft" Content-Disposition: inline --/04w6evG8XlLl3ft Content-Type: text/plain; charset=utf-8 Content-Disposition: inline testing embedded message --/04w6evG8XlLl3ft Content-Type: message/rfc822 Content-Disposition: attachment; filename="test.eml" From: Eric Wong To: spew@80x24.org Subject: [PATCH] mail header experiments Date: Sat, 18 Apr 2020 21:41:14 +0000 Message-Id: <20200418214114.7575-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit --- lib/PublicInbox/MailHeader.pm | 55 +++++++++++++++++++++++++++++++++++ t/mail_header.t | 31 ++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 lib/PublicInbox/MailHeader.pm create mode 100644 t/mail_header.t diff --git a/lib/PublicInbox/MailHeader.pm b/lib/PublicInbox/MailHeader.pm new file mode 100644 index 00000000..166baf91 --- /dev/null +++ b/lib/PublicInbox/MailHeader.pm @@ -0,0 +1,55 @@ +# Copyright (C) 2020-2021 all contributors +# License: AGPL-3.0+ +package PublicInbox::MailHeader; +use strict; +use HTTP::Parser::XS qw(parse_http_response HEADERS_AS_ARRAYREF); +use bytes (); #bytes::length +my %casemap; + +sub _headerx_to_list { + my (undef, $head, $crlf) = @_; + + # picohttpparser uses `int' as the return value, so the + # actual limit is 2GB on most platforms. However, headers + # exceeding (or even close to) 1MB seems unreasonable + die 'headers too big' if bytes::length($$head) > 0x100000; + my ($ret, undef, undef, undef, $headers) = + parse_http_response('HTTP/1.0 1 X'. $crlf . $$head, + HEADERS_AS_ARRAYREF); + die 'failed to parse headers' if $ret <= 0; + # %casemap = map {; lc($_) => $_ } ($$head =~ m/^([^:]+):/gsm); + # my $nr = @$headers; + for (my $i = 0; $i < @$headers; $i += 2) { + my $key = $headers->[$i]; # = $casemap{$headers->[$i]}; + my $val = $headers->[$i + 1]; + (my $trimmed = $val) =~ s/\r?\n\s+/ /; + $headers->[$i + 1] = [ + $trimmed, + "$key: $val" + ] + } + $headers; +} + +sub _header_to_list { + my (undef, $head, $crlf) = @_; + my @tmp = ($$head =~ m/^(([^ \t:][^:\n]*):[ \t]* + ([^\n]*\n(?:[ \t]+[^\n]*\n)*))/gsmx); + my @headers; + $#headers = scalar @tmp; + @headers = (); + while (@tmp) { + my ($orig, $key, $val) = splice(@tmp, 0, 3); + # my $v = $tmp[$i + 2]; + # $v =~ s/\r?\n[ \t]+/ /sg; + # $v =~ s/\r?\n\z//s; + $val =~ s/\n[ \t]+/ /sg; + chomp($val, $orig); + # $val =~ s/\r?\n\z//s; + # $orig =~ s/\r?\n\z//s; + push @headers, $key, [ $val, $orig ]; + } + \@headers; +} + +1; diff --git a/t/mail_header.t b/t/mail_header.t new file mode 100644 index 00000000..4dc62c50 --- /dev/null +++ b/t/mail_header.t @@ -0,0 +1,31 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use Test::More; +use PublicInbox::TestCommon; +require_mods('PublicInbox::MailHeader'); + +my $head = <<'EOF'; +From d0147582e289fdd4cdd84e91d8b0f8ae9c230124 Mon Sep 17 00:00:00 2001 +From: Eric Wong +Date: Fri, 17 Apr 2020 09:28:49 +0000 +Subject: [PATCH] searchthread: reduce indirection by removing container + +EOF +my $orig = $head; +use Email::Simple; +my $xshdr = PublicInbox::MailHeader->_header_to_list(\$head, "\n"); +my $simpl = Email::Simple::Header->_header_to_list(\$head, "\n"); +is_deeply($xshdr, $simpl); +use Benchmark qw(:all); +my $res = timethese(100000, { + pmh => sub { + PublicInbox::MailHeader->_header_to_list(\$head, "\n"); + }, + esh => sub { + PublicInbox::MailHeader->_header_to_list(\$head, "\n"); + } +}); +is($head, $orig); +use Data::Dumper; diag Dumper($res); +done_testing; --/04w6evG8XlLl3ft-- --TB36FDmn/VVEgNH/-- public-inbox-1.9.0/t/dir_idle.t000066400000000000000000000027331430031475700163300ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use v5.10.1; use strict; use PublicInbox::TestCommon; use PublicInbox::DS qw(now); use File::Path qw(make_path); use_ok 'PublicInbox::DirIdle'; my ($tmpdir, $for_destroy) = tmpdir(); make_path("$tmpdir/a/b", "$tmpdir/c"); my @x; my $cb = sub { push @x, \@_ }; my $di = PublicInbox::DirIdle->new($cb); $di->add_watches(["$tmpdir/a", "$tmpdir/c"], 1); PublicInbox::DS->SetLoopTimeout(1000); my $end = 3 + now; PublicInbox::DS->SetPostLoopCallback(sub { scalar(@x) == 0 && now < $end }); tick(0.011); rmdir("$tmpdir/a/b") or xbail "rmdir $!"; PublicInbox::DS::event_loop(); is(scalar(@x), 1, 'got an event') and is($x[0]->[0]->fullname, "$tmpdir/a/b", 'got expected fullname') and ok($x[0]->[0]->IN_DELETE, 'IN_DELETE set'); tick(0.011); rmdir("$tmpdir/a") or xbail "rmdir $!"; @x = (); $end = 3 + now; PublicInbox::DS::event_loop(); is(scalar(@x), 1, 'got an event') and is($x[0]->[0]->fullname, "$tmpdir/a", 'got expected fullname') and ok($x[0]->[0]->IN_DELETE_SELF, 'IN_DELETE_SELF set'); tick(0.011); rename("$tmpdir/c", "$tmpdir/j") or xbail "rmdir $!"; @x = (); $end = 3 + now; PublicInbox::DS::event_loop(); is(scalar(@x), 1, 'got an event') and is($x[0]->[0]->fullname, "$tmpdir/c", 'got expected fullname') and ok($x[0]->[0]->IN_DELETE_SELF || $x[0]->[0]->IN_MOVE_SELF, 'IN_DELETE_SELF set on move'); PublicInbox::DS->Reset; done_testing; public-inbox-1.9.0/t/ds-kqxs.t000066400000000000000000000027451430031475700161520ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # Licensed the same as Danga::Socket (and Perl5) # License: GPL-1.0+ or Artistic-1.0-Perl # # use strict; use Test::More; unless (eval { require IO::KQueue }) { my $m = $^O !~ /bsd/ ? 'DSKQXS is only for *BSD systems' : "no IO::KQueue, skipping $0: $@"; plan skip_all => $m; } if ('ensure nested kqueue works for signalfd emulation') { require POSIX; my $new = POSIX::SigSet->new(POSIX::SIGHUP()); my $old = POSIX::SigSet->new; my $hup = 0; local $SIG{HUP} = sub { $hup++ }; POSIX::sigprocmask(POSIX::SIG_SETMASK(), $new, $old) or die; my $kqs = IO::KQueue->new or die; $kqs->EV_SET(POSIX::SIGHUP(), IO::KQueue::EVFILT_SIGNAL(), IO::KQueue::EV_ADD()); kill('HUP', $$) or die; my @events = $kqs->kevent(3000); is(scalar(@events), 1, 'got one event'); is($events[0]->[0], POSIX::SIGHUP(), 'got SIGHUP'); my $parent = IO::KQueue->new or die; my $kqfd = $$kqs; $parent->EV_SET($kqfd, IO::KQueue::EVFILT_READ(), IO::KQueue::EV_ADD()); kill('HUP', $$) or die; @events = $parent->kevent(3000); is(scalar(@events), 1, 'got one event'); is($events[0]->[0], $kqfd, 'got kqfd'); is($hup, 0, '$SIG{HUP} did not fire'); POSIX::sigprocmask(POSIX::SIG_SETMASK(), $old) or die; defined(POSIX::close($kqfd)) or die; defined(POSIX::close($$parent)) or die; } local $ENV{TEST_IOPOLLER} = 'PublicInbox::DSKQXS'; require './t/ds-poll.t'; public-inbox-1.9.0/t/ds-leak.t000066400000000000000000000036071430031475700160760ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # Licensed the same as Danga::Socket (and Perl5) # License: GPL-1.0+ or Artistic-1.0-Perl # # use strict; use v5.10.1; use PublicInbox::TestCommon; use_ok 'PublicInbox::DS'; if ('close-on-exec for epoll and kqueue') { use PublicInbox::Spawn qw(spawn); my $pid; my $evfd_re = qr/(?:kqueue|eventpoll)/i; PublicInbox::DS->SetLoopTimeout(0); PublicInbox::DS->SetPostLoopCallback(sub { 0 }); # make sure execve closes if we're using fork() my ($r, $w); pipe($r, $w) or die "pipe: $!"; PublicInbox::DS::add_timer(0, sub { $pid = spawn([qw(sleep 10)]) }); PublicInbox::DS::event_loop(); ok($pid, 'subprocess spawned'); # wait for execve, we need to ensure lsof sees sleep(1) # and not the fork of this process: close $w or die "close: $!"; my $l = <$r>; is($l, undef, 'cloexec works and sleep(1) is running'); SKIP: { my $lsof = require_cmd('lsof', 1) or skip 'lsof missing', 1; my $rdr = { 2 => \(my $null) }; my @of = grep(/$evfd_re/, xqx([$lsof, '-p', $pid], {}, $rdr)); my $err = $?; skip "lsof broken ? (\$?=$err)", 1 if $err; is_deeply(\@of, [], 'no FDs leaked to subprocess'); }; if (defined $pid) { kill(9, $pid); waitpid($pid, 0); } PublicInbox::DS->Reset; } SKIP: { require_mods('BSD::Resource', 1); my $rlim = BSD::Resource::RLIMIT_NOFILE(); my ($n,undef) = BSD::Resource::getrlimit($rlim); # FreeBSD 11.2 with 2GB RAM gives RLIMIT_NOFILE=57987! if ($n > 1024 && !$ENV{TEST_EXPENSIVE}) { skip "RLIMIT_NOFILE=$n too big w/o TEST_EXPENSIVE for $0", 1; } my $cb = sub {}; for my $i (0..$n) { PublicInbox::DS->SetLoopTimeout(0); PublicInbox::DS->SetPostLoopCallback($cb); PublicInbox::DS::event_loop(); PublicInbox::DS->Reset; } ok(1, "Reset works and doesn't hit RLIMIT_NOFILE ($n)"); }; done_testing; public-inbox-1.9.0/t/ds-poll.t000066400000000000000000000032661430031475700161310ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # Licensed the same as Danga::Socket (and Perl5) # License: GPL-1.0+ or Artistic-1.0-Perl # # use strict; use warnings; use Test::More; use PublicInbox::Syscall qw(:epoll); my $cls = $ENV{TEST_IOPOLLER} // 'PublicInbox::DSPoll'; use_ok $cls; my $p = $cls->new; my ($r, $w, $x, $y); pipe($r, $w) or die; pipe($x, $y) or die; is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($r), EPOLLIN), 0, 'add EPOLLIN'); my $events = []; $p->epoll_wait(9, 0, $events); is_deeply($events, [], 'no events set'); is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($w), EPOLLOUT|EPOLLONESHOT), 0, 'add EPOLLOUT|EPOLLONESHOT'); $p->epoll_wait(9, -1, $events); is(scalar(@$events), 1, 'got POLLOUT event'); is($events->[0], fileno($w), '$w ready'); $p->epoll_wait(9, 0, $events); is(scalar(@$events), 0, 'nothing ready after oneshot'); is_deeply($events, [], 'no events set after oneshot'); syswrite($w, '1') == 1 or die; for my $t (0..1) { $p->epoll_wait(9, $t, $events); is($events->[0], fileno($r), "level-trigger POLLIN ready #$t"); is(scalar(@$events), 1, "only event ready #$t"); } syswrite($y, '1') == 1 or die; is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($x), EPOLLIN|EPOLLONESHOT), 0, 'EPOLLIN|EPOLLONESHOT add'); $p->epoll_wait(9, -1, $events); is(scalar @$events, 2, 'epoll_wait has 2 ready'); my @fds = sort @$events; my @exp = sort((fileno($r), fileno($x))); is_deeply(\@fds, \@exp, 'got both ready FDs'); is($p->epoll_ctl(EPOLL_CTL_DEL, fileno($r), 0), 0, 'EPOLL_CTL_DEL OK'); $p->epoll_wait(9, 0, $events); is(scalar @$events, 0, 'nothing ready after EPOLL_CTL_DEL'); done_testing; public-inbox-1.9.0/t/edit.t000066400000000000000000000151111430031475700154740ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # edit frontend behavior test (t/replace.t for backend) use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::MID qw(mid_clean); require_git(2.6); require_mods('DBD::SQLite'); my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = "$tmpdir/v2"; my $file = 't/data/0001.patch'; my $eml = eml_load($file); my $mid = mid_clean($eml->header('Message-ID')); my $ibx = create_inbox 'v2edit', indexlevel => 'basic', version => 2, tmpdir => $inboxdir, sub { my ($im, $ibx) = @_; $im->add($eml) or BAIL_OUT; }; my $cfgfile = "$tmpdir/config"; local $ENV{PI_CONFIG} = $cfgfile; my ($in, $out, $err, $cmd, $cur, $t); my $git = PublicInbox::Git->new("$ibx->{inboxdir}/git/0.git"); my $opt = { 0 => \$in, 1 => \$out, 2 => \$err }; $t = '-F FILE'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/boolean prefix/bool pfx/'"; $cmd = [ '-edit', "-F$file", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t edit OK"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->header('Subject'), qr/bool pfx/, "$t message edited"); like($out, qr/[a-f0-9]{40,}/, "$t shows commit on success"); } $t = '-m MESSAGE_ID'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t edit OK"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->header('Subject'), qr/boolean prefix/, "$t message edited"); like($out, qr/[a-f0-9]{40,}/, "$t shows commit on success"); } $t = 'no-op -m MESSAGE_ID'; { $in = $out = $err = ''; my $before = $git->qx(qw(rev-parse HEAD)); local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds"); my $prev = $cur; $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); is_deeply($cur, $prev, "$t makes no change"); like($cur->header('Subject'), qr/boolean prefix/, "$t does not change message"); like($out, qr/NONE/, 'noop shows NONE'); my $after = $git->qx(qw(rev-parse HEAD)); is($after, $before, 'git head unchanged'); } $t = 'no-op -m MESSAGE_ID w/Status: header'; { # because mutt does it $in = $out = $err = ''; my $before = $git->qx(qw(rev-parse HEAD)); local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^Subject:.*/Status: RO\\n\$&/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds"); my $prev = $cur; $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); is_deeply($cur, $prev, "$t makes no change"); like($cur->header('Subject'), qr/boolean prefix/, "$t does not change message"); is($cur->header('Status'), undef, 'Status header not added'); like($out, qr/NONE/, 'noop shows NONE'); my $after = $git->qx(qw(rev-parse HEAD)); is($after, $before, 'git head unchanged'); } $t = '-m MESSAGE_ID can change Received: headers'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^Subject:.*/Received: x\\n\$&/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->header('Subject'), qr/boolean prefix/, "$t does not change Subject"); is($cur->header('Received'), 'x', 'added Received header'); } $t = '-m miss'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/boolean/FAIL/'"; $cmd = [ '-edit', "-m$mid-miss", $inboxdir ]; ok(!run_script($cmd, undef, $opt), "$t fails on invalid MID"); like($err, qr/No message found/, "$t shows error"); } $t = 'non-interactive editor failure'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 'END { exit 1 }'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(!run_script($cmd, undef, $opt), "$t detected"); like($err, qr/END \{ exit 1 \}' failed:/, "$t shows error"); } $t = 'mailEditor set in config'; { $in = $out = $err = ''; my $rc = xsys(qw(git config), "--file=$cfgfile", 'publicinbox.maileditor', "$^X -i -p -e 's/boolean prefix/bool pfx/'"); is($rc, 0, 'set publicinbox.mailEditor'); local $ENV{MAIL_EDITOR}; delete $ENV{MAIL_EDITOR}; local $ENV{GIT_EDITOR} = 'echo should not run'; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t edited message"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->header('Subject'), qr/bool pfx/, "$t message edited"); unlike($out, qr/should not run/, 'did not run GIT_EDITOR'); } $t = '--raw and mbox escaping'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^\$/\\nFrom not mbox\\n/'"; $cmd = [ '-edit', "-m$mid", '--raw', $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->body, qr/^From not mbox/sm, 'put "From " line into body'); local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^>From not/\$& an/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds with mbox escaping"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->body, qr/^From not an mbox/sm, 'changed "From " line unescaped'); local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^From not an mbox\\n//s'"; $cmd = [ '-edit', "-m$mid", '--raw', $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds again"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); unlike($cur->body, qr/^From not an mbox/sm, "$t restored body"); } $t = 'reuse Message-ID'; { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my $im = $ibx->importer(0); ok($im->add($eml), "$t and re-add"); $im->done; like($warn[0], qr/reused for mismatched content/, "$t got warning"); } $t = 'edit ambiguous Message-ID with -m'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(!run_script($cmd, undef, $opt), "$t fails w/o --force"); like($err, qr/Multiple messages with different content found matching/, "$t shows matches"); like($err, qr/GIT_DIR=.*git show/is, "$t shows git commands"); } $t .= ' and --force'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^Subject:.*/Subject:x/i'"; $cmd = [ '-edit', "-m$mid", '--force', $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds"); like($err, qr/Will edit all of them/, "$t notes all will be edited"); my @dump = $git->qx(qw(cat-file --batch --batch-all-objects)); chomp @dump; is_deeply([grep(/^Subject:/i, @dump)], [qw(Subject:x Subject:x)], "$t edited both messages"); } done_testing(); public-inbox-1.9.0/t/emergency.t000066400000000000000000000033511430031475700165300ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); use_ok 'PublicInbox::Emergency'; { my $md = "$tmpdir/a"; my $em = PublicInbox::Emergency->new($md); ok(-d $md, 'Maildir a auto-created'); my @tmp = <$md/tmp/*>; is(scalar @tmp, 0, 'no temporary files exist, yet'); $em->prepare(\"BLAH"); @tmp = <$md/tmp/*>; is(scalar @tmp, 1, 'globbed one temporary file'); open my $fh, '<', $tmp[0] or die "failed to open: $!"; is("BLAH", <$fh>, 'wrote contents to temporary location'); my @new = <$md/new/*>; is(scalar @new, 0, 'no new files exist, yet'); $em = undef; @tmp = <$md/tmp/*>; is(scalar @tmp, 0, 'temporary file no longer exists'); @new = <$md/new/*>; is(scalar @new, 1, 'globbed one new file'); open $fh, '<', $new[0] or die "failed to open: $!"; is("BLAH", <$fh>, 'wrote contents to new location'); } { my $md = "$tmpdir/b"; my $em = PublicInbox::Emergency->new($md); ok(-d $md, 'Maildir b auto-created'); my @tmp = <$md/tmp/*>; is(scalar @tmp, 0, 'no temporary files exist, yet'); $em->prepare(\"BLAH"); @tmp = <$md/tmp/*>; is(scalar @tmp, 1, 'globbed one temporary file'); open my $fh, '<', $tmp[0] or die "failed to open: $!"; is("BLAH", <$fh>, 'wrote contents to temporary location'); my @new = <$md/new/*>; is(scalar @new, 0, 'no new files exist, yet'); is(sysread($em->fh, my $buf, 9), 4, 'read file handle exposed'); is($buf, 'BLAH', 'got expected data'); $em->abort; @tmp = <$md/tmp/*>; is(scalar @tmp, 0, 'temporary file no longer exists'); @new = <$md/new/*>; is(scalar @new , 0, 'new file no longer exists'); } done_testing(); public-inbox-1.9.0/t/eml.t000066400000000000000000000330561430031475700153340ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::MsgIter qw(msg_part_text); my @classes = qw(PublicInbox::Eml); SKIP: { require_mods('Email::MIME', 1); # TODO: Email::MIME behavior is not consistent in newer versions # we need to evaluate and possibly adjust our behavior to decide # between DWIM-ness with historical mail... push @classes, 'PublicInbox::MIME'; }; use_ok $_ for @classes; sub mime_load ($) { my ($path) = @_; open(my $fh, '<', $path) or die "open $path: $!"; PublicInbox::MIME->new(\(do { local $/; <$fh> })); } { my $eml = PublicInbox::Eml->new(\(my $str = "a: b\n\nhi\n")); is($str, "hi\n", '->new modified body like Email::Simple'); is($eml->body, "hi\n", '->body works'); is($eml->as_string, "a: b\n\nhi\n", '->as_string'); my $empty = PublicInbox::Eml->new("\n\n"); is($empty->as_string, "\n\n", 'empty message'); } for my $cls (@classes) { my $mime = $cls->new(my $orig = "From: x\n\nb"); is($mime->as_string, $orig, '->as_string works'); is($mime->header_obj->as_string, "From: x\n", 'header ->as_string works'); # headers is($mime->header_raw('From'), 'x', 'header_raw scalar context'); $mime = $cls->new("R:\n\tx\nR:\n 1\n"); is_deeply([$mime->header_raw('r')], [ 'x', '1' ], 'multi-value'); $mime = $cls->new("R:x\nR: 1\n"); is_deeply([$mime->header_raw('r')], [ 'x', '1' ], 'multi-value header'); $mime = $cls->new("R:x\n R: 1\nR:\n f\n"); is_deeply([$mime->header_raw('r')], [ 'x R: 1', 'f' ], 'multi-line, multi-value header'); $mime->header_set('r'); is_deeply([$mime->header_raw('r')], [], 'header_set clears'); $mime->header_set('r'); is_deeply([$mime->header_raw('r')], [], 'header_set clears idempotent'); $mime->header_set('r', 'h'); is_deeply([$mime->header_raw('r')], ['h'], 'header_set'); $mime->header_set('r', 'h', 'i'); is_deeply([$mime->header_raw('r')], ['h', 'i'], 'header_set ary'); $mime->header_set('rr', 'b'); is_deeply([$mime->header_raw('r')], ['h', 'i'], "header_set `rr' did not clobber `r'"); is($mime->header_raw('rr'), 'b', 'got set scalar'); $mime->header_set('rr', 'b'x100); is($mime->header_raw('rr'), 'b'x100, 'got long set scalar'); if ($cls eq 'PublicInbox::Eml') { like($mime->as_string, qr/^rr: b{100}\n(?:\n|\z)/sm, 'single token not wrapped'); } $mime->header_set('rr', ('b'x100) . ' wrap me'); if ($cls eq 'PublicInbox::Eml') { like($mime->as_string, qr/^rr: b{100}\n\twrap me\n/sm, 'wrapped after long token'); } my $exp = "pre\tformatted\n with\n breaks"; $mime->header_set('r', $exp); like($mime->as_string, qr/^r: \Q$exp\E/sm, 'preformatted preserved'); } # for @classes for my $cls (@classes) { # make sure we don't add quotes if not needed my $eml = $cls->new("From: John Smith \n\n"); is($eml->header('From'), 'John Smith ', "name not unnecessarily quoted $cls"); } for my $cls (@classes) { my $eml = $cls->new("Subject: foo\n\n"); $eml->header_str_set('Subject', "\x{100}"); like($eml->header_raw('Subject'), qr/utf-8\?B\?/i, 'MIME-B encoded UTF-8 Subject'); is_deeply([$eml->header('Subject')], [ "\x{100}" ], 'got wide character back'); } # linux-mips apparently got some messages injected w/o Message-ID # and long Subject: lines w/o leading whitespace. # What appears in the blobs was generated by V2Writable. for my $cls (@classes) { my $eml = $cls->new(<<'EOF'); Message-ID: <20101130193431@z> Subject: something really long and really wrong From: linux-mips archive injection Object-Id: 8c56b7abdd551b1264e6522ededbbed9890cccd0 EOF is_deeply([ $eml->header('Subject') ], [ 'something really long and really wrong' ], 'continued long line w/o leading spaces '.$cls); is_deeply([ $eml->header('From') ], [ 'linux-mips archive injection' ], 'subsequent line not corrupted'); is_deeply([ $eml->header('Message-ID') ], ['<20101130193431@z>'], 'preceding line readable'); } # for @classes { my $eml = eml_load 't/msg_iter-order.eml'; my @parts; my $orig = $eml->as_string; $eml->each_part(sub { my ($part, $level, @ex) = @{$_[0]}; my $s = $part->body_str; $s =~ s/\s+//sg; push @parts, [ $s, $level, @ex ]; }); is_deeply(\@parts, [ [ qw(a 1 1) ], [ qw(b 1 2) ] ], 'order is fine'); is($eml->as_string, $orig, 'unchanged by ->each_part'); $eml->each_part(sub {}, undef, 1); is(defined($eml) ? $eml->body_raw : '', # old msg_iter clobbers $eml '', 'each_part can clobber body'); } if ('descend into message/rfc822') { my $eml = eml_load 't/data/message_embed.eml'; my @parts; $eml->each_part(sub { my ($part, $level, @ex) = @{$_[0]}; push @parts, [ $part, $level, @ex ]; }); is(scalar(@parts), 6, 'got all parts'); like($parts[0]->[0]->body, qr/^testing embedded message harder\n/sm, 'first part found'); is_deeply([ @{$parts[0]}[1..2] ], [ 1, '1' ], 'got expected depth and level for part #0'); is($parts[1]->[0]->filename, 'embed2x.eml', 'attachment filename found'); is_deeply([ @{$parts[1]}[1..2] ], [ 1, '2' ], 'got expected depth and level for part #1'); is_deeply([ @{$parts[2]}[1..2] ], [ 2, '2.1' ], 'got expected depth and level for part #2'); is_deeply([ @{$parts[3]}[1..2] ], [ 3, '2.1.1' ], 'got expected depth and level for part #3'); is_deeply([ @{$parts[4]}[1..2] ], [ 3, '2.1.2' ], 'got expected depth and level for part #4'); is($parts[4]->[0]->filename, 'test.eml', 'another attachment filename found'); is_deeply([ @{$parts[5]}[1..2] ], [ 4, '2.1.2.1' ], 'got expected depth and level for part #5'); } # body-less, boundary-less for my $cls (@classes) { my $call = 0; $cls->new(<<'EOF')->each_part(sub { $call++ }, 0, 1); Content-Type: multipart/mixed; boundary="body-less" EOF is($call, 1, 'called on bodyless multipart'); my @tmp; $cls->new(<<'EOF')->each_part(sub { push @tmp, \@_; }, 0, 1); Content-Type: multipart/mixed; boundary="boundary-less" hello world EOF is(scalar(@tmp), 1, 'got one part even w/o boundary'); is($tmp[0]->[0]->[0]->body, "hello world\n", 'body preserved'); is($tmp[0]->[0]->[1], 0, '$depth is zero'); is($tmp[0]->[0]->[2], 1, '@idx is one'); } # I guess the following only worked in PI::M because of a happy accident # involving inheritance: for my $cls (@classes) { my @tmp; my $header_less = <<'EOF'; Archived-At: <85k5su9k59.fsf_-_@lola.goethe.zz> Content-Type: multipart/mixed; boundary="header-less" --header-less this is the body --header-less i-haz: header something else --header-less-- EOF my $expect = "this is the body\n"; $cls->new($header_less)->each_part(sub { push @tmp, \@_ }, 0, 1); my $body = $tmp[0]->[0]->[0]->body; if ($cls eq 'PublicInbox::Eml') { is($body, $expect, 'body-only subpart in '.$cls); } elsif ($body ne $expect) { diag "W: $cls `$body' != `$expect'"; } is($tmp[1]->[0]->[0]->body, "something else\n"); is(scalar(@tmp), 2, 'two parts'); } if ('one newline before headers') { my $eml = PublicInbox::Eml->new("\nNewline: no Header \n"); my @v = $eml->header_raw('Newline'); is_deeply(\@v, ['no Header'], 'no header'); is($eml->crlf, "\n", 'got CRLF as "\n"'); is($eml->body, ""); } if ('body only') { my $str = <new($str); is($eml->body, $str, 'body-only accepted'); } for my $cls (@classes) { # XXX: matching E::M, but not sure about this my $s = <new(\$s); my $nr = 0; my @v; $eml->each_part(sub { @v = $_[0]->[0]->header_raw('Header'); $nr++; }); is($nr, 1, 'only one part'); is_deeply(\@v, [], "nothing w/o body $cls"); } for my $cls (@classes) { my $s = <new(\$s); my $nr = 0; $eml->each_part(sub { my $part = $_[0]->[0]; is_deeply([$part->header_raw('should')], ['appear'], 'only got one header'); is($part->body, "yes\n", 'got expected body'); $nr++; }); is($nr, 1, 'only one part'); } for my $cls (@classes) { SKIP: { skip 'newer Email::MIME behavior inconsistent', 1 if $cls eq 'PublicInbox::MIME'; my $s = <new(\$s); my ($str, $err) = msg_part_text($eml, $eml->content_type); is($str, "\x{100}\n", "got wide character by assuming utf-8 ($cls)"); } # SKIP } if ('we differ from Email::MIME with final "\n" on missing epilogue') { my $s = <new(\$s); is(($eml->subparts)[-1]->body, "no epilogue\n", 'final "\n" preserved on missing epilogue'); } if ('header_size_limit stolen from postfix') { local $PublicInbox::Eml::header_size_limit = 4; my @w; local $SIG{__WARN__} = sub { push @w, @_ }; my $eml = PublicInbox::Eml->new("a:b\na:d\n\nzz"); is_deeply([$eml->header('a')], ['b'], 'no overrun header'); is($eml->body_raw, 'zz', 'body not damaged'); is($eml->header_obj->as_string, "a:b\n", 'header truncated'); is(grep(/truncated/, @w), 1, 'truncation warned'); $eml = PublicInbox::Eml->new("a:b\na:d\n"); is_deeply([$eml->header('a')], ['b'], 'no overrun header w/o body'); local $PublicInbox::Eml::header_size_limit = 5; $eml = PublicInbox::Eml->new("a:b\r\na:d\r\n\nzz"); is_deeply([$eml->header('a')], ['b'], 'no overrun header on CRLF'); is($eml->body_raw, 'zz', 'body not damaged'); @w = (); $eml = PublicInbox::Eml->new("too:long\n"); $eml = PublicInbox::Eml->new("too:long\n\n"); $eml = PublicInbox::Eml->new("too:long\r\n\r\n"); is(grep(/ignored/, @w), 3, 'ignored header warned'); } if ('maxparts is a feature unique to us') { my $eml = eml_load 't/psgi_attach.eml'; my @orig; $eml->each_part(sub { push @orig, $_[0]->[0] }); local $PublicInbox::Eml::mime_parts_limit = scalar(@orig); my $i = 0; $eml->each_part(sub { my $cur = $_[0]->[0]; my $prv = $orig[$i++]; is($cur->body_raw, $prv->body_raw, "part #$i matches"); }); is($i, scalar(@orig), 'maxparts honored'); $PublicInbox::Eml::mime_parts_limit--; my @ltd; $eml->each_part(sub { push @ltd, $_[0]->[0] }); for ($i = 0; $i <= $#ltd; $i++) { is($ltd[$i]->body_raw, $orig[$i]->body_raw, "part[$i] matches"); } is(scalar(@ltd), scalar(@orig) - 1, 'maxparts honored'); } SKIP: { require_mods('PublicInbox::MIME', 1); my $eml = eml_load 't/utf8.eml'; my $mime = mime_load 't/utf8.eml'; for my $h (qw(Subject From To)) { my $v = $eml->header($h); my $m = $mime->header($h); is($v, $m, "decoded -8 $h matches Email::MIME"); ok(utf8::is_utf8($v), "$h is UTF-8"); ok(utf8::valid($v), "UTF-8 valid $h"); } my $s = $eml->body_str; ok(utf8::is_utf8($s), 'body_str is UTF-8'); ok(utf8::valid($s), 'UTF-8 valid body_str'); my $ref = \(my $x = 'ref'); for my $msg ($eml, $mime) { $msg->body_str_set($s .= "\nHI\n"); ok(!utf8::is_utf8($msg->body_raw), 'raw octets after body_str_set'); $s = $msg->body_str; ok(utf8::is_utf8($s), 'body_str is UTF-8 after set'); ok(utf8::valid($s), 'UTF-8 valid body_str after set'); $msg->body_set($ref); is($msg->body_raw, $$ref, 'body_set worked on scalar ref'); $msg->body_set($$ref); is($msg->body_raw, $$ref, 'body_set worked on scalar'); } $eml = eml_load 't/iso-2202-jp.eml'; $mime = mime_load 't/iso-2202-jp.eml'; $s = $eml->body_str; is($s, $mime->body_str, 'ISO-2202-JP body_str'); ok(utf8::is_utf8($s), 'ISO-2202-JP => UTF-8 body_str'); ok(utf8::valid($s), 'UTF-8 valid body_str'); $eml = eml_load 't/psgi_attach.eml'; $mime = mime_load 't/psgi_attach.eml'; is_deeply([ map { $_->body_raw } $eml->subparts ], [ map { $_->body_raw } $mime->subparts ], 'raw ->subparts match deeply'); is_deeply([ map { $_->body } $eml->subparts ], [ map { $_->body } $mime->subparts ], '->subparts match deeply'); for my $msg ($eml, $mime) { my @old = $msg->subparts; $msg->parts_set([]); is_deeply([$msg->subparts], [], 'parts_set can clear'); $msg->parts_set([$old[-1]]); is(scalar $msg->subparts, 1, 'only last remains'); } # some versions of Email::MIME or Email::MIME::* will drop # unnecessary ", while PublicInbox::Eml will preserve the original my $exp = $mime->as_string; $exp =~ s/; boundary=b\b/; boundary="b"/; is($eml->as_string, $exp, 'as_string matches after parts_set'); } for my $cls (@classes) { my $s = <<'EOF'; Content-Type: text/x-patch; name="=?utf-8?q?vtpm-fakefile.patch?=" Content-Disposition: attachment; filename="=?utf-8?q?vtpm-makefile.patch?=" EOF is($cls->new($s)->filename, 'vtpm-makefile.patch', "filename decoded ($cls)") if $cls ne 'PublicInbox::MIME'; $s =~ s/^Content-Disposition:.*$//sm; is($cls->new($s)->filename, 'vtpm-fakefile.patch', "filename fallback ($cls)") if $cls ne 'PublicInbox::MIME'; is($cls->new($s)->content_type, 'text/x-patch; name="vtpm-fakefile.patch"', qq[matches Email::MIME output, "correct" or not ($cls)]); $s = <<'EOF'; Content-Type: multipart/foo; boundary=b --b Content-Disposition: attachment; filename="=?utf-8?q?vtpm-makefile.patch?=" a --b Content-Type: text/x-patch; name="=?utf-8?q?vtpm-fakefile.patch?=" b --b-- EOF SKIP: { skip 'newer Email::MIME is inconsistent here', 1 if $cls eq 'PublicInbox::MIME'; my @x; $cls->new($s)->each_part(sub { push @x, $_[0]->[0]->filename }); is_deeply(['vtpm-makefile.patch', 'vtpm-fakefile.patch'], \@x, "got filename for both attachments ($cls)"); } } done_testing; public-inbox-1.9.0/t/eml_content_disposition.t000066400000000000000000000055511430031475700215110ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES # This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # License: GPL-1.0+ or Artistic-1.0-Perl # # use strict; use Test::More; use PublicInbox::EmlContentFoo qw(parse_content_disposition); my %cd_tests = ( '' => { type => 'attachment', attributes => {} }, 'inline' => { type => 'inline', attributes => {} }, 'attachment' => { type => 'attachment', attributes => {} }, 'attachment; filename=genome.jpeg;' . ' modification-date="Wed, 12 Feb 1997 16:29:51 -0500"' => { type => 'attachment', attributes => { filename => 'genome.jpeg', 'modification-date' => 'Wed, 12 Feb 1997 16:29:51 -0500' } }, q(attachment; filename*=UTF-8''genome.jpeg;) . q( modification-date="Wed, 12 Feb 1997 16:29:51 -0500") => { type => 'attachment', attributes => { filename => 'genome.jpeg', 'modification-date' => 'Wed, 12 Feb 1997 16:29:51 -0500' } }, q(attachment; filename*0*=us-ascii'en'This%20is%20even%20more%20;) . q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { type => 'attachment', attributes => { filename => "This is even more ***fun*** isn't it!" } }, q(attachment; filename*0*='en'This%20is%20even%20more%20;) . q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { type => 'attachment', attributes => { filename => "This is even more ***fun*** isn't it!" } }, q(attachment; filename*0*=''This%20is%20even%20more%20;) . q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { type => 'attachment', attributes => { filename => "This is even more ***fun*** isn't it!" } }, q(attachment; filename*0*=us-ascii''This%20is%20even%20more%20;). q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { type => 'attachment', attributes => { filename => "This is even more ***fun*** isn't it!" } }, ); my %non_strict_cd_tests = ( 'attachment; filename=genome.jpeg;' . ' modification-date="Wed, 12 Feb 1997 16:29:51 -0500";' => { type => 'attachment', attributes => { filename => 'genome.jpeg', 'modification-date' => 'Wed, 12 Feb 1997 16:29:51 -0500' } }, ); sub test { my ($string, $expect, $info) = @_; local $_; $info =~ s/\r/\\r/g; $info =~ s/\n/\\n/g; is_deeply(parse_content_disposition($string), $expect, $info); } for (sort keys %cd_tests) { test($_, $cd_tests{$_}, "Can parse C-D <$_>"); } local $PublicInbox::EmlContentFoo::STRICT_PARAMS = 0; for (sort keys %cd_tests) { test($_, $cd_tests{$_}, "Can parse non-strict C-D <$_>"); } for (sort keys %non_strict_cd_tests) { test($_, $non_strict_cd_tests{$_}, "Can parse non-strict C-D <$_>"); } done_testing; public-inbox-1.9.0/t/eml_content_type.t000066400000000000000000000166711430031475700201330ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES # This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # License: GPL-1.0+ or Artistic-1.0-Perl # # use strict; use Test::More; use PublicInbox::EmlContentFoo qw(parse_content_type); my %ct_tests = ( '' => { type => "text", subtype => "plain", attributes => { charset => "us-ascii" } }, "text/plain" => { type => "text", subtype => "plain", attributes => {} }, 'text/plain; charset=us-ascii' => { type => "text", subtype => "plain", attributes => { charset => "us-ascii" } }, 'text/plain; charset="us-ascii"' => { type => "text", subtype => "plain", attributes => { charset => "us-ascii" } }, "text/plain; charset=us-ascii (Plain text)" => { type => "text", subtype => "plain", attributes => { charset => "us-ascii" } }, 'text/plain; charset=ISO-8859-1' => { type => "text", subtype => "plain", attributes => { charset => "ISO-8859-1" } }, 'text/plain; charset="ISO-8859-1"' => { type => "text", subtype => "plain", attributes => { charset => "ISO-8859-1" } }, 'text/plain; charset="ISO-8859-1" (comment)' => { type => "text", subtype => "plain", attributes => { charset => "ISO-8859-1" } }, '(c) text/plain (c); (c) charset=ISO-8859-1 (c)' => { type => "text", subtype => "plain", attributes => { charset => "ISO-8859-1" } }, '(c \( \\\\) (c) text/plain (c) (c) ; (c) (c) charset=utf-8 (c)' => { type => "text", subtype => "plain", attributes => { charset => "utf-8" } }, 'text/plain; (c (nested ()c)another c)() charset=ISO-8859-1' => { type => "text", subtype => "plain", attributes => { charset => "ISO-8859-1" } }, 'text/plain (c \(!nested ()c\)\)(nested\(c())); charset=utf-8' => { type => "text", subtype => "plain", attributes => { charset => "utf-8" } }, "application/foo" => { type => "application", subtype => "foo", attributes => {} }, "multipart/mixed; boundary=unique-boundary-1" => { type => "multipart", subtype => "mixed", attributes => { boundary => "unique-boundary-1" } }, 'message/external-body; access-type=local-file; name="/u/n/m.jpg"' => { type => "message", subtype => "external-body", attributes => { "access-type" => "local-file", "name" => "/u/n/m.jpg" } }, 'multipart/mixed; boundary="----------=_1026452699-10321-0" ' => { 'type' => 'multipart', 'subtype' => 'mixed', 'attributes' => { 'boundary' => '----------=_1026452699-10321-0' } }, 'multipart/report; boundary= "=_0=73e476c3-cd5a-5ba3-b910-2="' => { 'type' => 'multipart', 'subtype' => 'report', 'attributes' => { 'boundary' => '=_0=73e476c3-cd5a-5ba3-b910-2=' } }, 'multipart/report; boundary=' . " \t" . '"=_0=7-c-5-b-2="' => { 'type' => 'multipart', 'subtype' => 'report', 'attributes' => { 'boundary' => '=_0=7-c-5-b-2=' } }, 'message/external-body; access-type=URL;' . ' URL*0="ftp://";' . ' URL*1="example.com/"' => { 'type' => 'message', 'subtype' => 'external-body', 'attributes' => { 'access-type' => 'URL', 'url' => 'ftp://example.com/' } }, 'message/external-body; access-type=URL; URL="ftp://example.com/"' => { 'type' => 'message', 'subtype' => 'external-body', 'attributes' => { 'access-type' => 'URL', 'url' => 'ftp://example.com/', } }, "application/x-stuff; title*=us-ascii'en-us'This%20is%20f%2Ad" => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => 'This is f*d' } }, "application/x-stuff; title*=us-ascii''This%20is%20f%2Ad" => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => 'This is f*d' } }, "application/x-stuff; title*=''This%20is%20f%2Ad" => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => 'This is f*d' } }, "application/x-stuff; title*='en-us'This%20is%20f%2Ad" => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => 'This is f*d' } }, q(application/x-stuff;) . q( title*0*=us-ascii'en'This%20is%20even%20more%20;) . q(title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => "This is even more ***fun*** isn't it!" } }, q(application/x-stuff;) . q( title*0*='en'This%20is%20even%20more%20;) . q( title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => "This is even more ***fun*** isn't it!" } }, q(application/x-stuff;) . q( title*0*=''This%20is%20even%20more%20;) . q( title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => "This is even more ***fun*** isn't it!" } }, q(application/x-stuff;). q( title*0*=us-ascii''This%20is%20even%20more%20;). q( title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => "This is even more ***fun*** isn't it!" } }, 'text/plain; attribute="v\"v\\\\v\(v\>\<\)\@\,\;\:\/\]\[\?\=v v";' . ' charset=us-ascii' => { 'type' => 'text', 'subtype' => 'plain', 'attributes' => { 'attribute' => 'v"v\\v(v><)@,;:/][?=v v', 'charset' => 'us-ascii', }, }, qq(text/plain;\r charset=us-ascii;\r attribute="\r value1 \r value2\r\n value3\r\n value4\r\n "\r\n ) => { 'type' => 'text', 'subtype' => 'plain', 'attributes' => { 'attribute' => ' value1 value2 value3 value4 ', 'charset' => 'us-ascii', }, }, ); my %non_strict_ct_tests = ( "text/plain;" => { type => "text", subtype => "plain", attributes => {} }, "text/plain; " => { type => "text", subtype => "plain", attributes => {} }, 'image/jpeg;' . ' x-mac-type="3F3F3F3F";'. ' x-mac-creator="3F3F3F3F" name="file name.jpg";' => { type => "image", subtype => "jpeg", attributes => { 'x-mac-type' => "3F3F3F3F", 'x-mac-creator' => "3F3F3F3F", 'name' => "file name.jpg" } }, "text/plain; key=very long value" => { type => "text", subtype => "plain", attributes => { key => "very long value" } }, "text/plain; key=very long value key2=value2" => { type => "text", subtype => "plain", attributes => { key => "very long value", key2 => "value2" } }, 'multipart/mixed; boundary = "--=_Next_Part_24_Nov_2016_08.09.21"' => { type => "multipart", subtype => "mixed", attributes => { boundary => "--=_Next_Part_24_Nov_2016_08.09.21" } }, ); sub test { my ($string, $expect, $info) = @_; local $_; $info =~ s/\r/\\r/g; $info =~ s/\n/\\n/g; is_deeply(parse_content_type($string), $expect, $info); } for (sort keys %ct_tests) { test($_, $ct_tests{$_}, "Can parse C-T <$_>"); } local $PublicInbox::EmlContentFoo::STRICT_PARAMS = 0; for (sort keys %ct_tests) { test($_, $ct_tests{$_}, "Can parse non-strict C-T <$_>"); } for (sort keys %non_strict_ct_tests) { test( $_, $non_strict_ct_tests{$_}, "Can parse non-strict C-T <$_>" ); } done_testing; public-inbox-1.9.0/t/epoll.t000066400000000000000000000013641430031475700156670ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::Syscall qw(:epoll); plan skip_all => 'not Linux' if $^O ne 'linux'; my $epfd = epoll_create(); ok($epfd >= 0, 'epoll_create'); open(my $hnd, '+<&=', $epfd); # for autoclose pipe(my ($r, $w)) or die "pipe: $!"; is(epoll_ctl($epfd, EPOLL_CTL_ADD, fileno($w), EPOLLOUT), 0, 'epoll_ctl socket EPOLLOUT'); my @events; epoll_wait($epfd, 100, 10000, \@events); is(scalar(@events), 1, 'got one event'); is($events[0], fileno($w), 'got expected FD'); close $w; epoll_wait($epfd, 100, 0, \@events); is(scalar(@events), 0, 'epoll_wait timeout'); done_testing; public-inbox-1.9.0/t/extindex-psgi.t000066400000000000000000000057631430031475700173530ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Config; use File::Copy qw(cp); use IO::Handle (); require_git(2.6); require_mods(qw(json DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test URI::Escape Plack::Builder)); use_ok($_) for (qw(HTTP::Request::Common Plack::Test)); use IO::Uncompress::Gunzip qw(gunzip); require PublicInbox::WWW; my ($ro_home, $cfg_path) = setup_public_inboxes; my ($tmpdir, $for_destroy) = tmpdir; my $home = "$tmpdir/home"; mkdir $home or BAIL_OUT $!; mkdir "$home/.public-inbox" or BAIL_OUT $!; my $pi_config = "$home/.public-inbox/config"; cp($cfg_path, $pi_config) or BAIL_OUT; my $env = { HOME => $home }; run_script([qw(-extindex --all), "$tmpdir/eidx"], $env) or BAIL_OUT; { open my $cfgfh, '>>', $pi_config or BAIL_OUT; $cfgfh->autoflush(1); print $cfgfh <new(PublicInbox::Config->new($pi_config)); my $client = sub { my ($cb) = @_; my $res = $cb->(GET('/all/')); is($res->code, 200, '/all/ good'); $res = $cb->(GET('/all/new.atom', Host => 'usethis.example.com')); like($res->content, qr!http://usethis\.example\.com/!s, 'Host: header respected in Atom feed'); unlike($res->content, qr!http://bogus\.example\.com/!s, 'default URL ignored with different host header'); $res = $cb->(GET('/all/_/text/config/')); is($res->code, 200, '/text/config HTML'); $res = $cb->(GET('/all/_/text/config/raw')); is($res->code, 200, '/text/config raw'); my $f = "$tmpdir/extindex.config"; open my $fh, '>', $f or xbail $!; print $fh $res->content or xbail $!; close $fh or xbail $!; my $cfg = PublicInbox::Config->git_config_dump($f); is($?, 0, 'no errors from git-config parsing'); ok($cfg->{'extindex.all.topdir'}, 'extindex.topdir defined'); $res = $cb->(GET('/all/all.mbox.gz')); is($res->code, 200, 'all.mbox.gz'); $res = $cb->(GET('/')); like($res->content, qr!\Qhttp://bogus.example.com/all\E!, '/all listed'); $res = $cb->(GET('/?q=')); is($res->code, 200, 'no query means all inboxes'); $res = $cb->(GET('/?q=nonexistent')); is($res->code, 404, 'no inboxes matched'); unlike($res->content, qr!no inboxes, yet!, 'we have inboxes, just no matches'); my $m = {}; for my $pfx (qw(/t1 /t2), '') { $res = $cb->(GET($pfx.'/manifest.js.gz')); gunzip(\($res->content) => \(my $js)); $m->{$pfx} = json_utf8->decode($js); } is_deeply([sort keys %{$m->{''}}], [ sort(keys %{$m->{'/t1'}}, keys %{$m->{'/t2'}}) ], 't1 + t2 = all'); is_deeply([ sort keys %{$m->{'/t2'}} ], [ '/t2/git/0.git' ], 't2 manifest'); is_deeply([ sort keys %{$m->{'/t1'}} ], [ '/t1' ], 't2 manifest'); }; test_psgi(sub { $www->call(@_) }, $client); %$env = (%$env, TMPDIR => $tmpdir, PI_CONFIG => $pi_config); test_httpd($env, $client); done_testing; public-inbox-1.9.0/t/extsearch.t000066400000000000000000000473311430031475700165460ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Config; use PublicInbox::InboxWritable; use Fcntl qw(:seek); require_git(2.6); require_mods(qw(json DBD::SQLite Search::Xapian)); require PublicInbox::Search; use_ok 'PublicInbox::ExtSearch'; use_ok 'PublicInbox::ExtSearchIdx'; use_ok 'PublicInbox::OverIdx'; my $sock = tcp_server(); my $host_port = tcp_host_port($sock); my ($home, $for_destroy) = tmpdir(); local $ENV{HOME} = $home; mkdir "$home/.public-inbox" or BAIL_OUT $!; my $cfg_path = "$home/.public-inbox/config"; open my $fh, '>', $cfg_path or BAIL_OUT $!; print $fh < $v2addr }; my $eml = eml_load('t/utf8.eml'); $eml->header_set('List-Id', ''); open($fh, '+>', undef) or BAIL_OUT $!; $fh->autoflush(1); print $fh $eml->as_string or BAIL_OUT $!; seek($fh, 0, SEEK_SET) or BAIL_OUT $!; run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT '-mda'; ok(run_script([qw(-init -V1 v1test --newsgroup v1.example), "$home/v1test", 'http://example.com/v1test', $v1addr ]), 'v1test init'); $eml->header_set('List-Id', ''); seek($fh, 0, SEEK_SET) or BAIL_OUT $!; truncate($fh, 0) or BAIL_OUT $!; print $fh $eml->as_string or BAIL_OUT $!; seek($fh, 0, SEEK_SET) or BAIL_OUT $!; $env = { ORIGINAL_RECIPIENT => $v1addr }; run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT '-mda'; run_script([qw(-index -Lbasic), "$home/v1test"]) or BAIL_OUT "index $?"; ok(run_script([qw(-extindex --dangerous --all), "$home/extindex"]), 'extindex init'); { my $es = PublicInbox::ExtSearch->new("$home/extindex"); ok($es->has_threadid, '->has_threadid'); } if ('with boost') { xsys([qw(git config publicinbox.v1test.boost), 10], { GIT_CONFIG => $cfg_path }); ok(run_script([qw(-extindex --all), "$home/extindex-b"]), 'extindex init with boost'); my $es = PublicInbox::ExtSearch->new("$home/extindex-b"); my $smsg = $es->over->get_art(1); ok($smsg, 'got first article'); my $xref3 = $es->over->get_xref3($smsg->{num}); my @v1 = grep(/\Av1/, @$xref3); my @v2 = grep(/\Av2/, @$xref3); like($v1[0], qr/\Av1\.example.*?\b\Q$smsg->{blob}\E\b/, 'smsg->{blob} respected boost'); is(scalar(@$xref3), 2, 'only to entries'); undef $es; xsys([qw(git config publicinbox.v2test.boost), 20], { GIT_CONFIG => $cfg_path }); ok(run_script([qw(-extindex --all --reindex), "$home/extindex-b"]), 'extindex --reindex with altered boost'); $es = PublicInbox::ExtSearch->new("$home/extindex-b"); $smsg = $es->over->get_art(1); like($v2[0], qr/\Av2\.example.*?\b\Q$smsg->{blob}\E\b/, 'smsg->{blob} respects boost after reindex'); # high boost added later my $b2 = "$home/extindex-bb"; ok(run_script([qw(-extindex), $b2, "$home/v1test"]), 'extindex with low boost inbox only'); ok(run_script([qw(-extindex), $b2, "$home/v2test"]), 'extindex with high boost inbox only'); $es = PublicInbox::ExtSearch->new($b2); $smsg = $es->over->get_art(1); $xref3 = $es->over->get_xref3($smsg->{num}); like($v2[0], qr/\Av2\.example.*?\b\Q$smsg->{blob}\E\b/, 'smsg->{blob} respected boost across 2 index runs'); xsys([qw(git config --unset publicinbox.v1test.boost)], { GIT_CONFIG => $cfg_path }); xsys([qw(git config --unset publicinbox.v2test.boost)], { GIT_CONFIG => $cfg_path }); } { # TODO: -extindex should write this to config open $fh, '>>', $cfg_path or BAIL_OUT $!; print $fh <ALL [extindex "all"] topdir = $home/extindex EOF close $fh or BAIL_OUT $!; my $pi_cfg = PublicInbox::Config->new; $pi_cfg->fill_all; ok($pi_cfg->ALL, '->ALL'); my $ibx = $pi_cfg->{-by_newsgroup}->{'v2.example'}; my $ret = $pi_cfg->ALL->nntp_xref_for($ibx, $ibx->over->get_art(1)); is_deeply($ret, { 'v1.example' => 1, 'v2.example' => 1 }, '->nntp_xref_for'); } SKIP: { require_mods(qw(Net::NNTP), 1); my ($out, $err) = ("$home/nntpd.out.log", "$home/nntpd.err.log"); my $cmd = [ '-nntpd', '-W0', "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, undef, { 3 => $sock }); my $n = Net::NNTP->new($host_port); my @xp = $n->xpath(''); is_deeply(\@xp, [ qw(v1.example/1 v2.example/1) ]); $n->group('v1.example'); my $res = $n->head(1); @$res = grep(/^Xref: /, @$res); like($res->[0], qr/ v1\.example:1 v2\.example:1/, 'nntp_xref works'); } my $es = PublicInbox::ExtSearch->new("$home/extindex"); { my $smsg = $es->over->get_art(1); ok($smsg, 'got first article'); is($es->over->get_art(2), undef, 'only one added'); my $xref3 = $es->over->get_xref3(1); like($xref3->[0], qr/\A\Qv2.example\E:1:/, 'order preserved 1'); like($xref3->[1], qr/\A\Qv1.example\E:1:/, 'order preserved 2'); is(scalar(@$xref3), 2, 'only to entries'); } if ('inbox edited') { my ($in, $out, $err); $in = $out = $err = ''; my $opt = { 0 => \$in, 1 => \$out, 2 => \$err }; my $env = { MAIL_EDITOR => "$^X -i -p -e 's/test message/BEST MSG/'" }; my $cmd = [ qw(-edit -Ft/utf8.eml), "$home/v2test" ]; ok(run_script($cmd, $env, $opt), '-edit'); ok(run_script([qw(-extindex --all), "$home/extindex"], undef, $opt), 'extindex again'); like($err, qr/discontiguous range/, 'warned about discontiguous range'); my $msg1 = $es->over->get_art(1) or BAIL_OUT 'msg1 missing'; my $msg2 = $es->over->get_art(2) or BAIL_OUT 'msg2 missing'; is($msg1->{mid}, $msg2->{mid}, 'edited message indexed'); isnt($msg1->{blob}, $msg2->{blob}, 'blobs differ'); my $eml2 = $es->smsg_eml($msg2); like($eml2->body, qr/BEST MSG/, 'edited body in #2'); unlike($eml2->body, qr/test message/, 'old body discarded in #2'); my $eml1 = $es->smsg_eml($msg1); like($eml1->body, qr/test message/, 'original body in #1'); my $x1 = $es->over->get_xref3(1); my $x2 = $es->over->get_xref3(2); is(scalar(@$x1), 1, 'original only has one xref3'); is(scalar(@$x2), 1, 'new message has one xref3'); isnt($x1->[0], $x2->[0], 'xref3 differs'); my $mset = $es->mset('b:"BEST MSG"'); is($mset->size, 1, 'new message found'); $mset = $es->mset('b:"test message"'); is($mset->size, 1, 'old message found'); delete @$es{qw(git over xdb qp)}; # fork preparation my $pi_cfg = PublicInbox::Config->new; $pi_cfg->fill_all; is(scalar($pi_cfg->ALL->mset('s:Testing')->items), 2, '2 results in ->ALL'); my $res = {}; my $nr = 0; $pi_cfg->each_inbox(sub { $nr++; my ($ibx) = @_; local $SIG{__WARN__} = sub {}; # FIXME support --reindex my $mset = $ibx->isrch->mset('s:Testing'); $res->{$ibx->eidx_key} = $ibx->isrch->mset_to_smsg($ibx, $mset); }); is($nr, 2, 'two inboxes'); my $exp = {}; for my $v (qw(v1 v2)) { my $ibx = $pi_cfg->lookup_newsgroup("$v.example"); my $smsg = $ibx->over->get_art(1); $smsg->psgi_cull; $exp->{"$v.example"} = [ $smsg ]; } is_deeply($res, $exp, 'isearch limited results'); $pi_cfg = $res = $exp = undef; open my $rmfh, '+>', undef or BAIL_OUT $!; $rmfh->autoflush(1); print $rmfh $eml2->as_string or BAIL_OUT $!; seek($rmfh, 0, SEEK_SET) or BAIL_OUT $!; $opt->{0} = $rmfh; ok(run_script([qw(-learn rm --all)], undef, $opt), '-learn rm'); ok(run_script([qw(-extindex --all), "$home/extindex"], undef, undef), 'extindex after rm'); is($es->over->get_art(2), undef, 'doc #2 gone'); $mset = $es->mset('b:"BEST MSG"'); is($mset->size, 0, 'new message gone'); } my $misc = $es->misc; my @it = $misc->mset('')->items; is(scalar(@it), 2, 'two inboxes'); like($it[0]->get_document->get_data, qr/v2test/, 'docdata matched v2'); like($it[1]->get_document->get_data, qr/v1test/, 'docdata matched v1'); my $cfg = PublicInbox::Config->new; my $schema_version = PublicInbox::Search::SCHEMA_VERSION(); my $f = "$home/extindex/ei$schema_version/over.sqlite3"; my $oidx = PublicInbox::OverIdx->new($f); if ('inject w/o indexing') { use PublicInbox::Import; my $v1ibx = $cfg->lookup_name('v1test'); my $last_v1_commit = $v1ibx->mm->last_commit; my $v2ibx = $cfg->lookup_name('v2test'); my $last_v2_commit = $v2ibx->mm->last_commit_xap($schema_version, 0); my $git0 = PublicInbox::Git->new("$v2ibx->{inboxdir}/git/0.git"); chomp(my $cmt = $git0->qx(qw(rev-parse HEAD^0))); is($last_v2_commit, $cmt, 'v2 index up-to-date'); my $v2im = PublicInbox::Import->new($git0, undef, undef, $v2ibx); $v2im->{lock_path} = undef; $v2im->{path_type} = 'v2'; $v2im->add(eml_load('t/mda-mime.eml')); $v2im->done; chomp(my $tip = $git0->qx(qw(rev-parse HEAD^0))); isnt($tip, $cmt, '0.git v2 updated'); # inject a message w/o updating index rename("$home/v1test/public-inbox", "$home/v1test/skip-index") or BAIL_OUT $!; open(my $eh, '<', 't/iso-2202-jp.eml') or BAIL_OUT $!; run_script(['-mda', '--no-precheck'], $env, { 0 => $eh}) or BAIL_OUT '-mda'; rename("$home/v1test/skip-index", "$home/v1test/public-inbox") or BAIL_OUT $!; my ($in, $out, $err); $in = $out = $err = ''; my $opt = { 0 => \$in, 1 => \$out, 2 => \$err }; ok(run_script([qw(-extindex -v -v --all), "$home/extindex"], undef, undef), 'extindex noop'); $es->{xdb}->reopen; my $mset = $es->mset('mid:199707281508.AAA24167@hoyogw.example'); is($mset->size, 0, 'did not attempt to index unindexed v1 message'); $mset = $es->mset('mid:multipart-html-sucks@11'); is($mset->size, 0, 'did not attempt to index unindexed v2 message'); ok(run_script([qw(-index --all)]), 'indexed v1 and v2 inboxes'); isnt($v1ibx->mm->last_commit, $last_v1_commit, '-index v1 worked'); isnt($v2ibx->mm->last_commit_xap($schema_version, 0), $last_v2_commit, '-index v2 worked'); ok(run_script([qw(-extindex --all), "$home/extindex"]), 'extindex updates'); $es->{xdb}->reopen; $mset = $es->mset('mid:199707281508.AAA24167@hoyogw.example'); is($mset->size, 1, 'got v1 message'); $mset = $es->mset('mid:multipart-html-sucks@11'); is($mset->size, 1, 'got v2 message'); } if ('reindex catches missed messages') { my $v2ibx = $cfg->lookup_name('v2test'); $v2ibx->{-no_fsync} = 1; my $im = PublicInbox::InboxWritable->new($v2ibx)->importer(0); my $cmt_a = $v2ibx->mm->last_commit_xap($schema_version, 0); my $eml = eml_load('t/data/0001.patch'); $im->add($eml); $im->done; my $cmt_b = $v2ibx->mm->last_commit_xap($schema_version, 0); isnt($cmt_a, $cmt_b, 'v2 0.git HEAD updated'); $oidx->dbh; my $uv = $v2ibx->uidvalidity; my $lc_key = "lc-v2:v2.example//$uv;0"; is($oidx->eidx_meta($lc_key, $cmt_b), $cmt_a, 'update lc-v2 meta, old is as expected'); my $max = $oidx->max; $oidx->dbh_close; ok(run_script([qw(-extindex), "$home/extindex", $v2ibx->{inboxdir}]), '-extindex noop'); is($oidx->max, $max, '->max unchanged'); is($oidx->eidx_meta($lc_key), $cmt_b, 'lc-v2 unchanged'); $oidx->dbh_close; my $opt = { 2 => \(my $err = '') }; ok(run_script([qw(-extindex --reindex), "$home/extindex", $v2ibx->{inboxdir}], undef, $opt), '--reindex for unseen'); is($oidx->max, $max + 1, '->max bumped'); is($oidx->eidx_meta($lc_key), $cmt_b, 'lc-v2 stays unchanged'); my @err = split(/^/, $err); is(scalar(@err), 1, 'only one warning') or diag "err=$err"; like($err[0], qr/I: reindex_unseen/, 'got reindex_unseen message'); my $new = $oidx->get_art($max + 1); is($new->{subject}, $eml->header('Subject'), 'new message added'); $es->{xdb}->reopen; # git patch-id --stable search->mset("patchid:$patchid"); is($mset->size, 1, 'patchid search works'); $mset = $es->mset("mid:$new->{mid}"); is($mset->size, 1, 'previously unseen, now indexed in Xapian'); ok($im->remove($eml), 'remove new message from v2 inbox'); $im->done; my $cmt_c = $v2ibx->mm->last_commit_xap($schema_version, 0); is($oidx->eidx_meta($lc_key, $cmt_c), $cmt_b, 'bump lc-v2 meta again to skip v2 remove'); $err = ''; $oidx->dbh_close; ok(run_script([qw(-extindex --reindex), "$home/extindex", $v2ibx->{inboxdir}], undef, $opt), '--reindex for stale'); @err = split(/^/, $err); is(scalar(@err), 1, 'only one warning') or diag "err=$err"; like($err[0], qr/\(#$new->{num}\): stale/, 'got stale message warning'); is($oidx->get_art($new->{num}), undef, 'stale message gone from over'); is_deeply($oidx->get_xref3($new->{num}), [], 'stale message has no xref3'); $es->{xdb}->reopen; $mset = $es->mset("mid:$new->{mid}"); is($mset->size, 0, 'stale mid gone Xapian'); ok(run_script([qw(-extindex --reindex --all --fast), "$home/extindex"], undef, $opt), '--reindex w/ --fast'); ok(!run_script([qw(-extindex --all --fast), "$home/extindex"], undef, $opt), '--fast alone makes no sense'); } if ('reindex catches content bifurcation') { use PublicInbox::MID qw(mids); my $v2ibx = $cfg->lookup_name('v2test'); $v2ibx->{-no_fsync} = 1; my $im = PublicInbox::InboxWritable->new($v2ibx)->importer(0); my $eml = eml_load('t/data/message_embed.eml'); my $cmt_a = $v2ibx->mm->last_commit_xap($schema_version, 0); $im->add($eml); $im->done; my $cmt_b = $v2ibx->mm->last_commit_xap($schema_version, 0); my $uv = $v2ibx->uidvalidity; my $lc_key = "lc-v2:v2.example//$uv;0"; $oidx->dbh; is($oidx->eidx_meta($lc_key, $cmt_b), $cmt_a, 'update lc-v2 meta, old is as expected'); my $mid = mids($eml)->[0]; my $smsg = $v2ibx->over->next_by_mid($mid, \(my $id), \(my $prev)); my $oldmax = $oidx->max; my $x3_orig = $oidx->get_xref3(3); is(scalar(@$x3_orig), 1, '#3 has one xref'); $oidx->add_xref3(3, $smsg->{num}, $smsg->{blob}, 'v2.example'); my $x3 = $oidx->get_xref3(3); is(scalar(@$x3), 2, 'injected xref3'); $oidx->commit_lazy; my $opt = { 2 => \(my $err = '') }; ok(run_script([qw(-extindex --all), "$home/extindex"], undef, $opt), 'extindex --all is noop'); is($err, '', 'no warnings in index'); $oidx->dbh; is($oidx->max, $oldmax, 'oidx->max unchanged'); $oidx->dbh_close; ok(run_script([qw(-extindex --reindex --all), "$home/extindex"], undef, $opt), 'extindex --reindex') or diag explain($opt); $oidx->dbh; ok($oidx->max > $oldmax, 'oidx->max bumped'); like($err, qr/split into 2 due to deduplication change/, 'bifurcation noted'); my $added = $oidx->get_art($oidx->max); is($added->{blob}, $smsg->{blob}, 'new blob indexed'); is_deeply(["v2.example:$smsg->{num}:$smsg->{blob}"], $oidx->get_xref3($added->{num}), 'xref3 corrected for bifurcated message'); is_deeply($oidx->get_xref3(3), $x3_orig, 'xref3 restored for #3'); } if ('--reindex --rethread') { my $before = $oidx->dbh->selectrow_array(<<''); SELECT MAX(tid) FROM over WHERE num > 0 my $opt = {}; ok(run_script([qw(-extindex --reindex --rethread --all), "$home/extindex"], undef, $opt), '--rethread'); my $after = $oidx->dbh->selectrow_array(<<''); SELECT MIN(tid) FROM over WHERE num > 0 # actual rethread logic is identical to v1/v2 and tested elsewhere ok($after > $before, '--rethread updates MIN(tid)'); } if ('remove v1test and test gc') { xsys([qw(git config --unset publicinbox.v1test.inboxdir)], { GIT_CONFIG => $cfg_path }); my $opt = { 2 => \(my $err = '') }; ok(run_script([qw(-extindex --gc), "$home/extindex"], undef, $opt), 'extindex --gc'); like($err, qr/^I: remove #1 v1\.example /ms, 'removed v1 message'); is(scalar(grep(!/^I:/, split(/^/m, $err))), 0, 'no non-informational messages'); $misc->{xdb}->reopen; @it = $misc->mset('')->items; is(scalar(@it), 1, 'only one inbox left'); } if ('dedupe + dry-run') { my @cmd = ('-extindex', "$home/extindex"); my $opt = { 2 => \(my $err = '') }; ok(run_script([@cmd, '--dedupe'], undef, $opt), '--dedupe'); ok(run_script([@cmd, qw(--dedupe --dry-run)], undef, $opt), '--dry-run --dedupe'); is $err, '', 'no errors'; ok(!run_script([@cmd, qw(--dry-run)], undef, $opt), '--dry-run alone fails'); } # chmod 0755, $home or xbail "chmod: $!"; for my $j (1, 3, 6) { my $o = { 2 => \(my $err = '') }; my $d = "$home/extindex-j$j"; ok(run_script(['-extindex', "-j$j", '--all', $d], undef, $o), "init with -j$j"); my $max = $j - 2; $max = 0 if $max < 0; my @dirs = glob("$d/ei*/?"); like($dirs[-1], qr!/ei[0-9]+/$max\z!, '-j works'); } SKIP: { my $d = "$home/extindex-j1"; my $es = PublicInbox::ExtSearch->new($d); ok(my $nresult0 = $es->mset('z:0..')->size, 'got results'); ok(ref($es->{xdb}), '{xdb} created'); my $nshards1 = $es->{nshard}; is($nshards1, 1, 'correct shard count'); my @ei_dir = glob("$d/ei*/"); chmod 0755, $ei_dir[0] or xbail "chmod: $!"; my $mode = sprintf('%04o', 07777 & (stat($ei_dir[0]))[2]); is($mode, '0755', 'mode set on ei*/ dir'); my $o = { 2 => \(my $err = '') }; ok(run_script([qw(-xcpdb -R4), $d]), 'xcpdb R4'); my @dirs = glob("$d/ei*/?"); for my $i (0..3) { is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] created"); my $m = sprintf('%04o', 07777 & (stat($dirs[$i]))[2]); is($m, $mode, "shard [$i] mode"); } delete @$es{qw(xdb qp)}; is($es->mset('z:0..')->size, $nresult0, 'new shards, same results'); for my $i (4..5) { is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]"); } ok(run_script([qw(-xcpdb -R2), $d]), 'xcpdb -R2'); @dirs = glob("$d/ei*/?"); for my $i (0..1) { is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] kept"); } for my $i (2..3) { is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]"); } skip 'xapian-compact missing', 4 unless have_xapian_compact; ok(run_script([qw(-compact), $d], undef, $o), 'compact'); # n.b. stderr contains xapian-compact output my @d2 = glob("$d/ei*/?"); is_deeply(\@d2, \@dirs, 'dirs consistent after compact'); ok(run_script([qw(-extindex --dedupe --all), $d]), '--dedupe works after compact'); ok(run_script([qw(-extindex --gc), $d], undef, $o), '--gc works after compact'); } { # ensure --gc removes non-xposted messages my $old_size = -s $cfg_path // xbail "stat $cfg_path $!"; my $tmp_addr = 'v2tmp@example.com'; run_script([qw(-init v2tmp --indexlevel basic --newsgroup v2tmp.example), "$home/v2tmp", 'http://example.com/v2tmp', $tmp_addr ]) or xbail '-init'; $env = { ORIGINAL_RECIPIENT => $tmp_addr }; open $fh, '+>', undef or xbail "open $!"; $fh->autoflush(1); my $mid = 'tmpmsg@example.com'; print $fh < Subject: tmpmsg Date: Tue, 19 Jan 2038 03:14:07 +0000 EOM seek $fh, 0, SEEK_SET or xbail "seek $!"; run_script([qw(-mda --no-precheck)], $env, {0 => $fh}) or xbail '-mda'; ok(run_script([qw(-extindex --all), "$home/extindex"]), 'update'); my $nr; { my $es = PublicInbox::ExtSearch->new("$home/extindex"); my ($id, $prv); my $smsg = $es->over->next_by_mid($mid, \$id, \$prv); ok($smsg, 'tmpmsg indexed'); my $mset = $es->search->mset("mid:$mid"); is($mset->size, 1, 'new message found'); $mset = $es->search->mset('z:0..'); $nr = $mset->size; } truncate($cfg_path, $old_size) or xbail "truncate $!"; my $rdr = { 2 => \(my $err) }; ok(run_script([qw(-extindex --gc), "$home/extindex"], undef, $rdr), 'gc to get rid of removed inbox'); is_deeply([ grep(!/^(?:I:|#)/, split(/^/m, $err)) ], [], 'no non-informational errors in stderr'); my $es = PublicInbox::ExtSearch->new("$home/extindex"); my $mset = $es->search->mset("mid:$mid"); is($mset->size, 0, 'tmpmsg gone from search'); my ($id, $prv); is($es->over->next_by_mid($mid, \$id, \$prv), undef, 'tmpmsg gone from over'); $id = $prv = undef; is($es->over->next_by_mid('testmessage@example.com', \$id, \$prv), undef, 'remaining message not indavderover'); $mset = $es->search->mset('z:0..'); is($mset->size, $nr - 1, 'existing messages not clobbered from search'); my $o = $es->over->{dbh}->selectall_arrayref(<size, 'over row count matches Xapian'); my $x = $es->over->{dbh}->selectall_arrayref(< # License: AGPL-3.0+ # # Ensure FakeInotify can pick up rename(2) and link(2) operations # used by Maildir writing tools use strict; use PublicInbox::TestCommon; use_ok 'PublicInbox::FakeInotify'; my $MIN_FS_TICK = 0.011; # for low-res CONFIG_HZ=100 systems my ($tmpdir, $for_destroy) = tmpdir(); mkdir "$tmpdir/new" or BAIL_OUT "mkdir: $!"; mkdir "$tmpdir/new/rmd" or BAIL_OUT "mkdir: $!"; open my $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!"; close $fh or BAIL_OUT "close: $!"; my $fi = PublicInbox::FakeInotify->new; my $mask = PublicInbox::FakeInotify::MOVED_TO_OR_CREATE(); my $w = $fi->watch("$tmpdir/new", $mask); tick $MIN_FS_TICK; rename("$tmpdir/tst", "$tmpdir/new/tst") or BAIL_OUT "rename: $!"; my @events = map { $_->fullname } $fi->read; is_deeply(\@events, ["$tmpdir/new/tst"], 'rename(2) detected'); tick $MIN_FS_TICK; open $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!"; close $fh or BAIL_OUT "close: $!"; link("$tmpdir/tst", "$tmpdir/new/link") or BAIL_OUT "link: $!"; @events = map { $_->fullname } $fi->read; is_deeply(\@events, ["$tmpdir/new/link"], 'link(2) detected'); $w->cancel; tick $MIN_FS_TICK; link("$tmpdir/new/tst", "$tmpdir/new/link2") or BAIL_OUT "link: $!"; @events = map { $_->fullname } $fi->read; is_deeply(\@events, [], 'link(2) not detected after cancel'); $fi->watch("$tmpdir/new", PublicInbox::FakeInotify::IN_DELETE()); tick $MIN_FS_TICK; rmdir("$tmpdir/new/rmd") or xbail "rmdir: $!"; @events = $fi->read; is_deeply([map{ $_->fullname }@events], ["$tmpdir/new/rmd"], 'rmdir detected'); ok($events[0]->IN_DELETE, 'IN_DELETE set on rmdir'); tick $MIN_FS_TICK; unlink("$tmpdir/new/tst") or xbail "unlink: $!"; @events = grep { ref =~ /Gone/ } $fi->read; is_deeply([map{ $_->fullname }@events], ["$tmpdir/new/tst"], 'unlink detected'); ok($events[0]->IN_DELETE, 'IN_DELETE set on unlink'); PublicInbox::DS->Reset; done_testing; public-inbox-1.9.0/t/feed.t000066400000000000000000000053341430031475700154600ustar00rootroot00000000000000#!perl -w # Copyright (C) 2014-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::Feed; use PublicInbox::Inbox; my $have_xml_treepp = eval { require XML::TreePP; 1 }; my ($tmpdir, $for_destroy) = tmpdir(); sub string_feed { my $res = PublicInbox::Feed::generate($_[0]); my $body = $res->[2]; my $str = ''; while (defined(my $chunk = $body->getline)) { $str .= $chunk; } $body->close; $str; } my $git_dir = "$tmpdir/gittest"; my $ibx = create_inbox 'v1', tmpdir => $git_dir, sub { my ($im, $ibx) = @_; foreach my $i (1..6) { $im->add(PublicInbox::Eml->new(< To: U Message-Id: <$i\@example.com> Subject: zzz #$i Date: Thu, 01 Jan 1970 00:00:00 +0000 > drop me msg $i > inline me here > this is a short quote keep me EOF } }; $ibx->{url} = [ 'http://example.com/test' ]; $ibx->{feedmax} = 3; my $im = $ibx->importer(0); # spam check { # check initial feed { my $feed = string_feed({ ibx => $ibx }); SKIP: { skip 'XML::TreePP missing', 3 unless $have_xml_treepp; my $t = XML::TreePP->new->parse($feed); like($t->{feed}->{-xmlns}, qr/\bAtom\b/, 'looks like an an Atom feed'); is(scalar @{$t->{feed}->{entry}}, 3, 'parsed three entries'); is($t->{feed}->{id}, 'mailto:v1@example.com', 'id is set to default'); } like($feed, qr/drop me/, "long quoted text kept"); like($feed, qr/inline me here/, "short quoted text kept"); like($feed, qr/keep me/, "unquoted text saved"); } # add a new spam message my $spam; { $spam = PublicInbox::Eml->new(< To: U Message-Id: Subject: SPAM!!!!!!!! Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF $im->add($spam); $im->done; } # check spam shows up { my $spammy_feed = string_feed({ ibx => $ibx }); SKIP: { skip 'XML::TreePP missing', 2 unless $have_xml_treepp; my $t = XML::TreePP->new->parse($spammy_feed); like($t->{feed}->{-xmlns}, qr/\bAtom\b/, 'looks like an an Atom feed'); is(scalar @{$t->{feed}->{entry}}, 3, 'parsed three entries'); } like($spammy_feed, qr/SPAM/s, "spam showed up :<"); } # nuke spam $im->remove($spam); $im->done; # spam no longer shows up { my $feed = string_feed({ ibx => $ibx }); SKIP: { skip 'XML::TreePP missing', 2 unless $have_xml_treepp; my $t = XML::TreePP->new->parse($feed); like($t->{feed}->{-xmlns}, qr/\bAtom\b/, 'looks like an an Atom feed'); is(scalar @{$t->{feed}->{entry}}, 3, 'parsed three entries'); } unlike($feed, qr/SPAM/, "spam gone :>"); } } done_testing; public-inbox-1.9.0/t/filter_base-junk.eml000066400000000000000000000004601430031475700203060ustar00rootroot00000000000000From: a@example.com Subject: blah Content-Type: multipart/mixed; boundary="b" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit --b Content-Type: application/vnd.ms-excel Content-Transfer-Encoding: base64 anVuaw== --b Content-Type: text/plain Content-Transfer-Encoding: quoted-printable junk= --b-- public-inbox-1.9.0/t/filter_base-xhtml.eml000066400000000000000000000005371430031475700205000ustar00rootroot00000000000000From: a@example.com Subject: blah Content-Type: multipart/alternative; boundary="b" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit --b Content-Type: text/xhtml; charset=UTF-8 Content-Transfer-Encoding: base64 PGh0bWw+PGJvZHk+aGk8L2JvZHk+PC9odG1sPg== --b Content-Type: text/plain Content-Transfer-Encoding: quoted-printable hi =3D "bye"= --b-- public-inbox-1.9.0/t/filter_base.t000066400000000000000000000017051430031475700170320ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use_ok 'PublicInbox::Filter::Base'; { my $f = PublicInbox::Filter::Base->new; ok($f, 'created stock object'); ok(defined $f->{reject_suffix}, 'rejected suffix redefined'); is(ref($f->{reject_suffix}), 'Regexp', 'reject_suffix should be a RE'); } { my $f = PublicInbox::Filter::Base->new(reject_suffix => undef); ok($f, 'created base object q/o reject_suffix'); ok(!defined $f->{reject_suffix}, 'reject_suffix not defined'); } { my $f = PublicInbox::Filter::Base->new; my $email = eml_load 't/filter_base-xhtml.eml'; is($f->delivery($email), 100, "xhtml rejected"); } { my $f = PublicInbox::Filter::Base->new; my $email = eml_load 't/filter_base-junk.eml'; is($f->delivery($email), 100, 'proprietary format rejected on glob'); } done_testing(); public-inbox-1.9.0/t/filter_mirror.t000066400000000000000000000007261430031475700174340ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use_ok 'PublicInbox::Filter::Mirror'; my $f = PublicInbox::Filter::Mirror->new; ok($f, 'created PublicInbox::Filter::Mirror object'); { my $email = eml_load 't/mda-mime.eml'; is($f->ACCEPT, $f->delivery($email), 'accept any trash that comes'); } done_testing(); public-inbox-1.9.0/t/filter_rubylang.t000066400000000000000000000032601430031475700177410ustar00rootroot00000000000000# Copyright (C) 2017-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; use_ok 'PublicInbox::Filter::RubyLang'; my $f = PublicInbox::Filter::RubyLang->new; ok($f, 'created PublicInbox::Filter::RubyLang object'); my $msg = <<'EOF'; Subject: test keep this Unsubscribe: EOF my $mime = PublicInbox::Eml->new($msg); my $ret = $f->delivery($mime); is($ret, $mime, "delivery successful"); is($mime->body, "keep this\n", 'normal message filtered OK'); SKIP: { require_mods('DBD::SQLite', 4); use_ok 'PublicInbox::Inbox'; my ($git_dir, $for_destroy) = tmpdir(); is(mkdir("$git_dir/public-inbox"), 1, "created public-inbox dir"); my $altid = [ # 'serial:ruby-core:file=msgmap.sqlite3' can be used here # for documentation purposes, but Xapian ignores everything # up to and including the '-' 'serial:core:file=msgmap.sqlite3' ]; my $ibx = PublicInbox::Inbox->new({ inboxdir => $git_dir, altid => $altid }); $f = PublicInbox::Filter::RubyLang->new(ibx => $ibx); $msg = <<'EOF'; X-Mail-Count: 12 Message-ID: EOF $mime = PublicInbox::Eml->new($msg); $ret = $f->delivery($mime); is($ret, $mime, "delivery successful"); my $mm = $ibx->mm; is($mm->num_for('a@b'), 12, 'MM entry created based on X-ML-Count'); $msg = <<'EOF'; X-Mail-Cout: 12 Message-ID: EOF $mime = PublicInbox::Eml->new($msg); $ret = $f->delivery($mime); is($ret, 100, "delivery rejected without X-Mail-Count"); } done_testing(); public-inbox-1.9.0/t/filter_subjecttag.t000066400000000000000000000022011430031475700202430ustar00rootroot00000000000000# Copyright (C) 2017-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use_ok 'PublicInbox::Filter::SubjectTag'; my $f = eval { PublicInbox::Filter::SubjectTag->new }; like($@, qr/tag not defined/, 'error without args'); $f = PublicInbox::Filter::SubjectTag->new('-tag', '[foo]'); is(ref $f, 'PublicInbox::Filter::SubjectTag', 'new object created'); my $mime = PublicInbox::Eml->new(< Subject: =?UTF-8?B?UmU6IFtmb29dIEVsw4PCqWFub3I=?= EOF $mime = $f->delivery($mime); is($mime->header('Subject'), "Re: El\xc3\xa9anor", 'filtered with Re:'); $mime->header_str_set('Subject', '[FOO] bar'); $mime = $f->delivery($mime); is($mime->header('Subject'), 'bar', 'filtered non-reply'); $f = PublicInbox::Filter::SubjectTag->new(-tag => '[sox-devel]'); my $eml = PublicInbox::Eml->new(<delivery($eml); my $s = $eml->header('Subject'); utf8::encode($s); # to octets is($s, "Re: \xc5\xa1", 'subject filtered correctly'); done_testing(); public-inbox-1.9.0/t/filter_vger.t000066400000000000000000000022751430031475700170660ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use_ok 'PublicInbox::Filter::Vger'; my $f = PublicInbox::Filter::Vger->new; ok($f, 'created PublicInbox::Filter::Vger object'); { my $lkml = <<'EOF'; From: foo@example.com Subject: test keep this -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ EOF my $mime = PublicInbox::Eml->new($lkml); $mime = $f->delivery($mime); is("keep this\n", $mime->body, 'normal message filtered OK'); } { my $no_nl = <<'EOF'; From: foo@example.com Subject: test OSX users :P-- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html EOF my $mime = PublicInbox::Eml->new($no_nl); $mime = $f->delivery($mime); is('OSX users :P', $mime->body, 'missing trailing LF in original OK'); } done_testing(); public-inbox-1.9.0/t/gcf2.t000066400000000000000000000116231430031475700153740ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use PublicInbox::TestCommon; use Test::More; use Fcntl qw(:seek); use IO::Handle (); use POSIX qw(_exit); use Cwd qw(abs_path); require_mods('PublicInbox::Gcf2'); use_ok 'PublicInbox::Gcf2'; use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); my $gcf2 = PublicInbox::Gcf2::new(); is(ref($gcf2), 'PublicInbox::Gcf2', '::new works'); my $COPYING = 'dba13ed2ddf783ee8118c6a581dbf75305f816a3'; open my $agpl, '<', 'COPYING' or BAIL_OUT "AGPL-3 missing: $!"; $agpl = do { local $/; <$agpl> }; PublicInbox::Import::init_bare($tmpdir); my $fi_data = './t/git.fast-import-data'; my $rdr = {}; open $rdr->{0}, '<', $fi_data or BAIL_OUT $!; xsys([qw(git fast-import --quiet)], { GIT_DIR => $tmpdir }, $rdr); is($?, 0, 'fast-import succeeded'); $gcf2->add_alternate("$tmpdir/objects"); { my ($r, $w); pipe($r, $w) or BAIL_OUT $!; my $tree = 'fdbc43725f21f485051c17463b50185f4c3cf88c'; $gcf2->cat_oid(fileno($w), $tree); close $w; is("$tree tree 30\n", <$r>, 'tree header ok'); $r = do { local $/; <$r> }; is(chop($r), "\n", 'got trailing newline'); is(length($r), 30, 'tree length matches'); } chomp(my $objdir = xqx([qw(git rev-parse --git-path objects)])); if ($objdir =~ /\A--git-path\n/) { # git <2.5 chomp($objdir = xqx([qw(git rev-parse --git-dir)])); $objdir .= '/objects'; } if ($objdir && -d $objdir) { $objdir = abs_path($objdir); open my $alt, '>>', "$tmpdir/objects/info/alternates" or BAIL_OUT $!; print $alt $objdir, "\n" or BAIL_OUT $!; close $alt or BAIL_OUT $!; # calling gcf2->add_alternate on an already-added path won't # cause alternates to be reloaded, so we do # $gcf2->add_alternate($objdir) later on instead of # $gcf2->add_alternate("$tmpdir/objects"); # $objdir = "$tmpdir/objects"; } else { $objdir = undef } my $nr = $ENV{TEST_LEAK_NR}; my $cat = $ENV{TEST_LEAK_CAT} // 10; diag "checking for leaks... (TEST_LEAK_NR=$nr TEST_LEAK_CAT=$cat)" if $nr; SKIP: { skip 'not in git worktree', 21 unless defined($objdir); $gcf2->add_alternate($objdir); eval { $gcf2->add_alternate($objdir) }; ok(!$@, 'no error adding alternate redundantly'); if ($nr) { diag "adding alternate $nr times redundantly"; $gcf2->add_alternate($objdir) for (1..$nr); diag 'done adding redundant alternates'; } open my $fh, '+>', undef or BAIL_OUT "open: $!"; $fh->autoflush(1); ok(!$gcf2->cat_oid(fileno($fh), 'invalid'), 'invalid fails'); seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!"; is(do { local $/; <$fh> }, '', 'nothing written'); open $fh, '+>', undef or BAIL_OUT "open: $!"; ok(!$gcf2->cat_oid(fileno($fh), '0'x40), 'z40 fails'); seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!"; is(do { local $/; <$fh> }, '', 'nothing written for z40'); open $fh, '+>', undef or BAIL_OUT "open: $!"; my $ck_copying = sub { my ($desc) = @_; seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!"; is(<$fh>, "$COPYING blob 34520\n", "got expected header $desc"); my $buf = do { local $/; <$fh> }; is(chop($buf), "\n", 'got trailing \\n'); is($buf, $agpl, "AGPL matches ($desc)"); }; ok($gcf2->cat_oid(fileno($fh), $COPYING), 'cat_oid normal'); $ck_copying->('regular file'); $gcf2 = PublicInbox::Gcf2::new(); $gcf2->add_alternate("$tmpdir/objects"); open $fh, '+>', undef or BAIL_OUT "open: $!"; ok($gcf2->cat_oid(fileno($fh), $COPYING), 'cat_oid alternate'); $ck_copying->('alternates after reopen'); $^O eq 'linux' or skip('pipe tests are Linux-only', 14); for my $blk (1, 0) { my ($r, $w); pipe($r, $w) or BAIL_OUT $!; fcntl($w, 1031, 4096) or skip('Linux too old for F_SETPIPE_SZ', 14); $w->blocking($blk); seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!"; truncate($fh, 0) or BAIL_OUT "truncate: $!"; my $pid = fork // BAIL_OUT "fork: $!"; if ($pid == 0) { close $w; tick; # wait for parent to block on writev my $buf = do { local $/; <$r> }; print $fh $buf or _exit(1); _exit(0); } ok($gcf2->cat_oid(fileno($w), $COPYING), "cat blocking=$blk"); close $w or BAIL_OUT "close: $!"; is(waitpid($pid, 0), $pid, 'child exited'); is($?, 0, 'no error in child'); $ck_copying->("pipe blocking($blk)"); pipe($r, $w) or BAIL_OUT $!; fcntl($w, 1031, 4096) or BAIL_OUT $!; $w->blocking($blk); close $r; local $SIG{PIPE} = 'IGNORE'; eval { $gcf2->cat_oid(fileno($w), $COPYING) }; like($@, qr/writev error:/, 'got writev error'); } } if ($nr) { open my $null, '>', '/dev/null' or BAIL_OUT "open /dev/null: $!"; my $fd = fileno($null); local $SIG{PIPE} = 'IGNORE'; my ($r, $w); pipe($r, $w); close $r; my $broken = fileno($w); for (1..$nr) { my $obj = PublicInbox::Gcf2::new(); if (defined($objdir)) { $obj->add_alternate($objdir); for (1..$cat) { $obj->cat_oid($fd, $COPYING); eval { $obj->cat_oid($broken, $COPYING) }; $obj->cat_oid($fd, '0'x40); $obj->cat_oid($fd, 'invalid'); } } } } done_testing; public-inbox-1.9.0/t/gcf2_client.t000066400000000000000000000057121430031475700167340ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use PublicInbox::TestCommon; use Test::More; use Cwd qw(getcwd); use PublicInbox::Import; use PublicInbox::DS; require_mods('PublicInbox::Gcf2'); use_ok 'PublicInbox::Gcf2Client'; my ($tmpdir, $for_destroy) = tmpdir(); my $git_a = "$tmpdir/a.git"; my $git_b = "$tmpdir/b.git"; PublicInbox::Import::init_bare($git_a); PublicInbox::Import::init_bare($git_b); my $fi_data = './t/git.fast-import-data'; my $rdr = {}; open $rdr->{0}, '<', $fi_data or BAIL_OUT $!; xsys([qw(git fast-import --quiet)], { GIT_DIR => $git_a }, $rdr); is($?, 0, 'fast-import succeeded'); my $tree = 'fdbc43725f21f485051c17463b50185f4c3cf88c'; my $called = 0; my $err_f = "$tmpdir/err"; { PublicInbox::DS->Reset; open my $err, '>>', $err_f or BAIL_OUT $!; my $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err }); $gcf2c->gcf2_async(\"$tree $git_a\n", sub { my ($bref, $oid, $type, $size, $arg) = @_; is($oid, $tree, 'got expected OID'); is($size, 30, 'got expected length'); is($type, 'tree', 'got tree type'); is(length($$bref), 30, 'got a tree'); is($arg, 'hi', 'arg passed'); $called++; }, 'hi'); $gcf2c->cat_async_step($gcf2c->{inflight}); open $err, '<', $err_f or BAIL_OUT $!; my $estr = do { local $/; <$err> }; is($estr, '', 'nothing in stderr'); my $trunc = substr($tree, 0, 39); $gcf2c->gcf2_async(\"$trunc $git_a\n", sub { my ($bref, $oid, $type, $size, $arg) = @_; is(undef, $bref, 'missing bref is undef'); is($oid, $trunc, 'truncated OID printed'); is($type, 'missing', 'type is "missing"'); is($size, undef, 'size is undef'); is($arg, 'bye', 'arg passed when missing'); $called++; }, 'bye'); $gcf2c->cat_async_step($gcf2c->{inflight}); open $err, '<', $err_f or BAIL_OUT $!; $estr = do { local $/; <$err> }; like($estr, qr/retrying/, 'warned about retry'); # try failed alternates lookup PublicInbox::DS->Reset; open $err, '>', $err_f or BAIL_OUT $!; $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err }); $gcf2c->gcf2_async(\"$tree $git_b\n", sub { my ($bref, $oid, $type, $size, $arg) = @_; is(undef, $bref, 'missing bref from alt is undef'); $called++; }); $gcf2c->cat_async_step($gcf2c->{inflight}); open $err, '<', $err_f or BAIL_OUT $!; $estr = do { local $/; <$err> }; like($estr, qr/retrying/, 'warned about retry before alt update'); # now try successful alternates lookup open my $alt, '>>', "$git_b/objects/info/alternates" or BAIL_OUT $!; print $alt "$git_a/objects\n" or BAIL_OUT $!; close $alt or BAIL_OUT; my $expect = xqx(['git', "--git-dir=$git_a", qw(cat-file tree), $tree]); $gcf2c->gcf2_async(\"$tree $git_a\n", sub { my ($bref, $oid, $type, $size, $arg) = @_; is($oid, $tree, 'oid match on alternates retry'); is($$bref, $expect, 'tree content matched'); $called++; }); $gcf2c->cat_async_step($gcf2c->{inflight}); } is($called, 4, 'gcf2_async callbacks hit'); done_testing; public-inbox-1.9.0/t/git-http-backend.psgi000066400000000000000000000013131430031475700203720ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use PublicInbox::GitHTTPBackend; use PublicInbox::Git; use Plack::Builder; use BSD::Resource qw(getrusage); my $git_dir = $ENV{GIANT_GIT_DIR} or die 'GIANT_GIT_DIR not defined in env'; my $git = PublicInbox::Git->new($git_dir); builder { enable 'Head'; sub { my ($env) = @_; if ($env->{PATH_INFO} =~ m!\A/(.+)\z!s) { PublicInbox::GitHTTPBackend::serve($env, $git, $1); } else { my $ru = getrusage(); my $b = $ru->maxrss . "\n"; [ 200, [ qw(Content-Type text/plain Content-Length), length($b) ], [ $b ] ] } } } public-inbox-1.9.0/t/git.fast-import-data000066400000000000000000000026771430031475700202600ustar00rootroot00000000000000blob mark :1 data 6 hello reset refs/heads/header commit refs/heads/header mark :2 author AU Thor 0 +0000 committer AU Thor 0 +0000 data 8 initial M 100644 :1 foo.txt blob mark :3 data 12 hello world commit refs/heads/master mark :4 author AU Thor 0 +0000 committer AU Thor 0 +0000 data 7 second from :2 M 100644 :3 foo.txt blob mark :5 data 12 ----- hello commit refs/heads/header mark :6 author AU Thor 0 +0000 committer AU Thor 0 +0000 data 11 add header from :2 M 100644 :5 foo.txt blob mark :7 data 18 ----- hello world commit refs/heads/master mark :8 author AU Thor 0 +0000 committer AU Thor 0 +0000 data 46 Merge branch 'header' * header: add header from :4 merge :6 M 100644 :7 foo.txt blob mark :9 data 0 blob mark :10 data 16 dir/dur/der/derp commit refs/heads/master mark :11 author AU Thor 0 +0000 committer AU Thor 0 +0000 data 26 add symlink and deep file from :8 M 100644 :9 dir/dur/der/derp M 120000 :10 link blob mark :12 data 78 [submodule "git"] path = git url = git://git.kernel.org/pub/scm/git/git.git commit refs/heads/master mark :13 author AU Thor 0 +0000 committer AU Thor 749520000 +0000 data 18 add git submodule from :11 M 100644 :12 .gitmodules M 160000 f3adf457e046f92f039353762a78dcb3afb2cb13 git reset refs/heads/master from :13 public-inbox-1.9.0/t/git.t000066400000000000000000000170201430031475700153330ustar00rootroot00000000000000# Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; my ($dir, $for_destroy) = tmpdir(); use PublicInbox::Import; use POSIX qw(strftime); use PublicInbox::Git; { PublicInbox::Import::init_bare($dir, 'master'); my $fi_data = './t/git.fast-import-data'; open my $fh, '<', $fi_data or die "fast-import data readable (or run test at top level: $!"; my $rdr = { 0 => $fh }; xsys([qw(git fast-import --quiet)], { GIT_DIR => $dir }, $rdr); is($?, 0, 'fast-import succeeded'); } { my $git = PublicInbox::Git->new("$dir/foo.git"); my $nick = $git->local_nick; # internal sub unlike($nick, qr/\.git\.git\z/, "no doubled `.git.git' suffix"); like($nick, qr/\.git\z/, "one `.git' suffix"); $git = PublicInbox::Git->new($dir); $nick = $git->local_nick; # internal sub like($nick, qr/\.git\z/, "local nick always adds `.git' suffix"); my @s = $git->date_parse('1970-01-01T00:00:00Z'); is($s[0], 0, 'parsed epoch'); local $ENV{TZ} = 'UTC'; @s = $git->date_parse('1993-10-02 01:02:09', '2010-10-02 01:03:04'); is(strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($s[0])), '1993-10-02T01:02:09Z', 'round trips'); is(strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($s[1])), '2010-10-02T01:03:04Z', '2nd arg round trips'); @s = $git->date_parse('1993-10-02'); is(strftime('%Y-%m-%d', gmtime($s[0])), '1993-10-02', 'round trips date-only'); } { my $gcf = PublicInbox::Git->new($dir); is($gcf->modified, 749520000, 'modified time detected from commit'); my $f = 'HEAD:foo.txt'; my @x = $gcf->check($f); is(scalar @x, 3, 'returned 3 element array for existing file'); like($x[0], qr/\A[a-f0-9]{40}\z/, 'returns obj ID in 1st element'); is('blob', $x[1], 'returns obj type in 2nd element'); like($x[2], qr/\A\d+\z/, 'returns obj size in 3rd element'); my $raw = $gcf->cat_file($f); is($x[2], length($$raw), 'length matches'); is(${$gcf->cat_file($f)}, $$raw, 'not broken after failures'); is(${$gcf->cat_file($f)}, $$raw, 'not broken after partial read'); my $oid = $x[0]; my $arg = { 'foo' => 'bar' }; my $res = []; my $missing = []; $gcf->cat_async($oid, sub { my ($bref, $oid_hex, $type, $size, $arg) = @_; $res = [ @_ ]; }, $arg); $gcf->cat_async('non-existent', sub { my ($bref, $oid_hex, $type, $size, $arg) = @_; $missing = [ @_ ]; }, $arg); $gcf->async_wait_all; my ($bref, $oid_hex, $type, $size, $arg_res) = @$res; is_deeply([$oid_hex, $type, $size], \@x, 'got expected header'); is($arg_res, $arg, 'arg passed to cat_async'); is_deeply($raw, $bref, 'blob result matches'); is_deeply($missing, [ undef, 'non-existent', 'missing', undef, $arg], 'non-existent blob gives expected result'); $res = []; $gcf->cat_async($oid, sub { push @$res, \@_ }); $gcf->cat_async($oid, sub { die 'HI' }); $gcf->cat_async($oid, sub { push @$res, \@_ }); eval { $gcf->async_wait_all }; like($@, qr/\bHI\b/, 'die in callback propagates'); is(scalar(@$res), 2, 'two results'); is_deeply($res->[0], [ $raw, @x, undef ], '1st cb result'); is_deeply($res->[1], [ undef, $oid, undef, undef, undef ], '2nd cb aborted '); my @w; local $PublicInbox::Git::async_warn = 1; local $SIG{__WARN__} = sub { push @w, @_ }; $res = []; $gcf->cat_async($oid, sub { push @$res, \@_ }); $gcf->cat_async($oid, sub { die 'HI' }); $gcf->cat_async($oid, sub { push @$res, \@_ }); eval { $gcf->async_wait_all }; is(scalar(@$res), 2, 'two results'); is_deeply($res->[0], [ $raw, @x, undef ], '1st cb result'); is_deeply($res->[1], [ $raw, @x, undef ], '2st cb result'); like("@w", qr/\bHI\b/, 'die turned to warning'); } if (1) { # need a big file, use the AGPL-3.0 :p my $big_data = './COPYING'; ok(-r $big_data, 'COPYING readable'); my $size = -s $big_data; ok($size > 8192, 'file is big enough'); open my $fh, '<', $big_data or die; my $cmd = [ 'git', "--git-dir=$dir", qw(hash-object -w --stdin) ]; my $buf = xqx($cmd, { GIT_DIR => $dir }, { 0 => $fh }); is(0, $?, 'hashed object successfully'); chomp $buf; my $gcf = PublicInbox::Git->new($dir); my @x = $gcf->cat_file($buf); is($x[2], 'blob', 'got blob on wantarray'); is($x[3], $size, 'got correct size ref on big file'); is(length(${$x[0]}), $size, 'read correct number of bytes'); my $ref = $gcf->qx(qw(cat-file blob), $buf); is($?, 0, 'no error on scalar success'); my @ref = $gcf->qx(qw(cat-file blob), $buf); is($?, 0, 'no error on wantarray success'); my $nl = scalar @ref; ok($nl > 1, "qx returned array length of $nl"); is(join('', @ref), $ref, 'qx array and scalar context both work'); $gcf->qx(qw(repack -adq)); ok($gcf->packed_bytes > 0, 'packed size is positive'); my $rdr; open $rdr->{2}, '+>', '/dev/null' or xbail "open $!"; $gcf->qx([qw(rev-parse --verify bogus)], undef, $rdr); isnt($?, 0, '$? set on failure: '.$?); } SKIP: { require_git(2.6, 7) or skip('need git 2.6+ for --batch-all-objects', 7); my ($alt, $alt_obj) = tmpdir(); my $hash_obj = [ 'git', "--git-dir=$alt", qw(hash-object -w --stdin) ]; PublicInbox::Import::init_bare($alt); open my $fh, '<', "$alt/config" or die "open failed: $!\n"; chomp(my $remote = xqx($hash_obj, undef, { 0 => $fh })); my $gcf = PublicInbox::Git->new($dir); is($gcf->cat_file($remote), undef, "remote file not found"); open $fh, '>>', "$dir/objects/info/alternates" or die "open failed: $!\n"; print $fh "$alt/objects\n" or die "print failed: $!\n"; close $fh or die "close failed: $!"; my $found = $gcf->cat_file($remote); open $fh, '<', "$alt/config" or die "open failed: $!\n"; my $config = eval { local $/; <$fh> }; is($$found, $config, 'alternates reloaded'); # with the async interface my ($async_alt, $async_dir_obj) = tmpdir(); PublicInbox::Import::init_bare($async_alt); my @exist = map { chomp; [ split / / ] } (xqx(['git', "--git-dir=$dir", qw(cat-file --batch-all-objects --batch-check)])); my $results = []; my $cb = sub { my ($bref, $oid, $type, $size) = @_; push @$results, [ $oid, $type, $size ]; }; for my $i (0..5) { $gcf->cat_async($exist[$i]->[0], $cb, $results); next if $i != 3; # stick a new alternate into a running async pipeline $hash_obj->[1] = "--git-dir=$async_alt"; $remote = xqx($hash_obj, undef, { 0 => \'async' }); chomp $remote; open $fh, '>>', "$dir/objects/info/alternates" or die "open failed: $!\n"; print $fh "$async_alt/objects\n" or die "print failed: $!\n"; close $fh or die "close failed: $!"; # trigger cat_async_retry: $gcf->cat_async($remote, $cb, $results); } $gcf->async_wait_all; my $expect = [ @exist[0..3], [ $remote, 'blob', 5 ], @exist[4..5] ]; is_deeply($results, $expect, 'got expected results'); ok(!$gcf->cleanup, 'cleanup can expire'); ok(!$gcf->cleanup, 'cleanup idempotent'); my $t = $gcf->modified; ok($t <= time, 'repo not modified in the future'); isnt($t, 0, 'repo not modified in 1970') } use_ok 'PublicInbox::Git', qw(git_unquote git_quote); my $s; is("foo\nbar", git_unquote($s = '"foo\\nbar"'), 'unquoted newline'); is("Eléanor", git_unquote($s = '"El\\303\\251anor"'), 'unquoted octal'); is(git_unquote($s = '"I\"m"'), 'I"m', 'unquoted dq'); is(git_unquote($s = '"I\\m"'), 'I\\m', 'unquoted backslash'); is(git_quote($s = "Eléanor"), '"El\\303\\251anor"', 'quoted octal'); is(git_quote($s = "hello\"world"), '"hello\"world"', 'quoted dq'); is(git_quote($s = "hello\\world"), '"hello\\\\world"', 'quoted backslash'); is(git_quote($s = "hello\nworld"), '"hello\\nworld"', 'quoted LF'); is(git_quote($s = "hello\x06world"), '"hello\\006world"', 'quoted \\x06'); is(git_unquote($s = '"hello\\006world"'), "hello\x06world", 'unquoted \\x06'); done_testing(); public-inbox-1.9.0/t/gzip_filter.t000066400000000000000000000021471430031475700170720ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use IO::Handle (); # autoflush use Fcntl qw(SEEK_SET); use PublicInbox::TestCommon; require_mods(qw(Compress::Zlib IO::Uncompress::Gunzip)); require_ok 'PublicInbox::GzipFilter'; { open my $fh, '+>', undef or die "open: $!"; open my $dup, '>&', $fh or die "dup $!"; $dup->autoflush(1); my $filter = PublicInbox::GzipFilter->new->attach($dup); ok($filter->write("hello"), 'wrote something'); ok($filter->write("world"), 'wrote more'); $filter->close; seek($fh, 0, SEEK_SET) or die; IO::Uncompress::Gunzip::gunzip($fh => \(my $buf)); is($buf, 'helloworld', 'buffer matches'); } { pipe(my ($r, $w)) or die "pipe: $!"; $w->autoflush(1); close $r or die; my $filter = PublicInbox::GzipFilter->new->attach($w); my $sigpipe; local $SIG{PIPE} = sub { $sigpipe = 1 }; open my $fh, '<', 'COPYING' or die "open(COPYING): $!"; my $buf = do { local $/; <$fh> }; while ($filter->write($buf .= rand)) {} ok($sigpipe, 'got SIGPIPE'); close $w; } done_testing; public-inbox-1.9.0/t/hl_mod.t000066400000000000000000000037121430031475700160150ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use IO::Handle; # ->autoflush use Fcntl qw(:seek); require_mods 'highlight'; use_ok 'PublicInbox::HlMod'; my $hls = PublicInbox::HlMod->new; ok($hls, 'initialized OK'); is($hls->_shebang2lang(\"#!/usr/bin/perl -w\n"), 'perl', 'perl shebang OK'); is($hls->{-ext2lang}->{'pm'}, 'perl', '.pm suffix OK'); is($hls->{-ext2lang}->{'pl'}, 'perl', '.pl suffix OK'); like($hls->_path2lang('Makefile'), qr/\Amake/, 'Makefile OK'); my $str = do { local $/; open(my $fh, __FILE__); <$fh> }; my $orig = $str; { my $ref = $hls->do_hl(\$str, 'foo.perl'); is(ref($ref), 'SCALAR', 'got a scalar reference back'); ok(utf8::valid($$ref), 'resulting string is utf8::valid'); like($$ref, qr/I can see you!/, 'we can see ourselves in output'); like($$ref, qr/&&/, 'escaped &&'); my $lref = $hls->do_hl_lang(\$str, 'perl'); is($$ref, $$lref, 'do_hl_lang matches do_hl'); SKIP: { my $w3m = require_cmd('w3m', 1) or skip('w3m(1) missing to check output', 1); my $cmd = [ $w3m, qw(-T text/html -dump -config /dev/null) ]; my $in = '
    ' . $$ref . '
    '; my $out = xqx($cmd, undef, { 0 => \$in }); # expand tabs and normalize whitespace, # w3m doesn't preserve tabs $orig =~ s/\t/ /gs; $out =~ s/\s*\z//sg; $orig =~ s/\s*\z//sg; is($out, $orig, 'w3m output matches'); } } if ('experimental, only for help text') { my $tmp = <<'EOF'; :> ```perl my $foo = 1 & 2; ``` :< EOF $hls->do_hl_text(\$tmp); my @hl = split(/^/m, $tmp); is($hl[0], ":>\n", 'first line escaped'); is($hl[1], "```perl\n", '2nd line preserved'); like($hl[2], qr/ # License: AGPL-3.0+ # corner case tests for the generic PSGI server # Usage: plackup [OPTIONS] /path/to/this/file use v5.12; use warnings; use Plack::Builder; require Digest::SHA; my $pi_config = $ENV{PI_CONFIG} // 'unset'; # capture ASAP my $app = sub { my ($env) = @_; my $path = $env->{PATH_INFO}; my $in = $env->{'psgi.input'}; my $actual = -s $in; my $code = 500; my $h = [ 'Content-Type' => 'text/plain' ]; my $body = []; if ($path eq '/sha1') { my $sha1 = Digest::SHA->new('SHA-1'); my $buf; while (1) { my $r = $in->read($buf, 4096); die "read err: $!" unless defined $r; last if $r == 0; $sha1->add($buf); } $code = 200; push @$body, $sha1->hexdigest; } elsif (my $fifo = $env->{HTTP_X_CHECK_FIFO}) { if ($path eq '/slow-header') { return sub { open my $f, '<', $fifo or die "open $fifo: $!\n"; local $/ = "\n"; my @r = <$f>; $_[0]->([200, $h, \@r ]); }; } elsif ($path eq '/slow-body') { return sub { my $fh = $_[0]->([200, $h]); open my $f, '<', $fifo or die "open $fifo: $!\n"; local $/ = "\n"; while (defined(my $l = <$f>)) { $fh->write($l); } $fh->close; }; } } elsif ($path eq '/host-port') { $code = 200; push @$body, "$env->{REMOTE_ADDR} $env->{REMOTE_PORT}"; } elsif ($path eq '/callback') { return sub { my ($res) = @_; my $buf = "hello world\n"; push @$h, 'Content-Length', length($buf); my $fh = $res->([200, $h]); $fh->write($buf); $fh->close; } } elsif ($path eq '/empty') { $code = 200; } elsif ($path eq '/getline-die') { $code = 200; $body = Plack::Util::inline_object( getline => sub { die 'GETLINE FAIL' }, close => sub { die 'CLOSE FAIL' }, ); } elsif ($path eq '/close-die') { $code = 200; $body = Plack::Util::inline_object( getline => sub { undef }, close => sub { die 'CLOSE FAIL' }, ); } elsif ($path eq '/async-big') { require PublicInbox::Qspawn; open my $null, '>', '/dev/null' or die; my $rdr = { 2 => fileno($null) }; my $cmd = [qw(dd if=/dev/zero count=30 bs=1024k)]; my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr); return $qsp->psgi_return($env, undef, sub { my ($r, $bref) = @_; # make $rd_hdr retry sysread + $parse_hdr in Qspawn: return until length($$bref) > 8000; close $null; [ 200, [ qw(Content-Type application/octet-stream) ]]; }); } elsif ($path eq '/psgi-return-gzip') { require PublicInbox::Qspawn; require PublicInbox::GzipFilter; my $cmd = [qw(echo hello world)]; my $qsp = PublicInbox::Qspawn->new($cmd); $env->{'qspawn.filter'} = PublicInbox::GzipFilter->new; return $qsp->psgi_return($env, undef, sub { [ 200, [ qw(Content-Type application/octet-stream)]] }); } elsif ($path eq '/psgi-return-compressible') { require PublicInbox::Qspawn; my $cmd = [qw(echo goodbye world)]; my $qsp = PublicInbox::Qspawn->new($cmd); return $qsp->psgi_return($env, undef, sub { [200, [qw(Content-Type text/plain)]] }); } elsif ($path eq '/psgi-return-enoent') { require PublicInbox::Qspawn; my $cmd = [ 'this-better-not-exist-in-PATH'.rand ]; my $qsp = PublicInbox::Qspawn->new($cmd); return $qsp->psgi_return($env, undef, sub { [ 200, [ qw(Content-Type application/octet-stream)]] }); } elsif ($path eq '/pid') { $code = 200; push @$body, "$$\n"; } elsif ($path eq '/url_scheme') { $code = 200; push @$body, $env->{'psgi.url_scheme'} } elsif ($path eq '/PI_CONFIG') { $code = 200; push @$body, $pi_config; # show value at ->refresh_groups } [ $code, $h, $body ] }; builder { enable 'ContentLength'; enable 'Head'; $app; } public-inbox-1.9.0/t/httpd-corner.t000066400000000000000000000527541430031475700171760ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # note: our HTTP server should be standalone and capable of running # generic PSGI/Plack apps. use strict; use v5.10.1; use PublicInbox::TestCommon; use Time::HiRes qw(gettimeofday tv_interval); use PublicInbox::Spawn qw(spawn popen_rd); require_mods(qw(Plack::Util Plack::Builder HTTP::Date HTTP::Status)); use Digest::SHA qw(sha1_hex); use IO::Handle (); use IO::Socket::UNIX; use Fcntl qw(:seek); use Socket qw(IPPROTO_TCP TCP_NODELAY SOL_SOCKET); use POSIX qw(mkfifo); use Carp (); my ($tmpdir, $for_destroy) = tmpdir(); my $fifo = "$tmpdir/fifo"; ok(defined mkfifo($fifo, 0777), 'created FIFO'); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $psgi = "./t/httpd-corner.psgi"; my $sock = tcp_server(); my @zmods = qw(PublicInbox::GzipFilter IO::Uncompress::Gunzip); # Make sure we don't clobber socket options set by systemd or similar # using socket activation: my ($defer_accept_val, $accf_arg, $TCP_DEFER_ACCEPT); if ($^O eq 'linux') { $TCP_DEFER_ACCEPT = eval { Socket::TCP_DEFER_ACCEPT() } // 9; setsockopt($sock, IPPROTO_TCP, $TCP_DEFER_ACCEPT, 5) or die; my $x = getsockopt($sock, IPPROTO_TCP, $TCP_DEFER_ACCEPT); defined $x or die "getsockopt: $!"; $defer_accept_val = unpack('i', $x); if ($defer_accept_val <= 0) { die "unexpected TCP_DEFER_ACCEPT value: $defer_accept_val"; } } elsif ($^O eq 'freebsd' && system('kldstat -m accf_data >/dev/null') == 0) { require PublicInbox::Daemon; my $var = $PublicInbox::Daemon::SO_ACCEPTFILTER; $accf_arg = pack('a16a240', 'dataready', ''); setsockopt($sock, SOL_SOCKET, $var, $accf_arg) or die "setsockopt: $!"; } sub unix_server ($) { my $s = IO::Socket::UNIX->new( Listen => 1024, Type => Socket::SOCK_STREAM(), Local => $_[0], ) or BAIL_OUT "bind + listen $_[0]: $!"; $s->blocking(0); $s; } my $upath = "$tmpdir/s"; my $unix = unix_server($upath); my $alt = tcp_server(); my $td; my $spawn_httpd = sub { my (@args) = @_; my $x = tcp_host_port($alt); my $cmd = [ '-httpd', @args, "--stdout=$out", "--stderr=$err", $psgi, '-l', "http://$x/?psgi=t/alt.psgi,env.PI_CONFIG=/path/to/alt". ",err=$tmpdir/alt.err" ]; my $env = { PI_CONFIG => '/dev/null' }; $td = start_script($cmd, $env, { 3 => $sock, 4 => $unix, 5 => $alt }); }; $spawn_httpd->(); { my $conn = conn_for($alt, 'alt PSGI path'); $conn->write("GET / HTTP/1.0\r\n\r\n"); $conn->read(my $buf, 4096); like($buf, qr!^/path/to/alt\z!sm, 'alt.psgi loaded on alt socket with correct env'); $conn = conn_for($sock, 'default PSGI path'); $conn->write("GET /PI_CONFIG HTTP/1.0\r\n\r\n"); $conn->read($buf, 4096); like($buf, qr!^/dev/null\z!sm, 'default PSGI on original socket'); my $log = capture("$tmpdir/alt.err"); ok(grep(/ALT/, @$log), 'alt psgi.errors written to'); $log = capture($err); ok(!grep(/ALT/, @$log), 'STDERR not written to'); is(unlink($err, "$tmpdir/alt.err"), 2, 'unlinked stderr and alt.err'); $td->kill('USR1'); # trigger reopen_logs } if ('test worker death') { my $conn = conn_for($sock, 'killed worker'); $conn->write("GET /pid HTTP/1.1\r\nHost:example.com\r\n\r\n"); my $pid; while (defined(my $line = $conn->getline)) { next unless $line eq "\r\n"; chomp($pid = $conn->getline); last; } like($pid, qr/\A[0-9]+\z/, '/pid response'); is(kill('KILL', $pid), 1, 'killed worker'); is($conn->getline, undef, 'worker died and EOF-ed client'); $conn = conn_for($sock, 'respawned worker'); $conn->write("GET /pid HTTP/1.0\r\n\r\n"); ok($conn->read(my $buf, 8192), 'read response'); my ($head, $body) = split(/\r\n\r\n/, $buf); chomp($body); like($body, qr/\A[0-9]+\z/, '/pid response'); isnt($body, $pid, 'respawned worker'); } { # check on prior USR1 signal ok(-e $err, 'stderr recreated after USR1'); ok(-e "$tmpdir/alt.err", 'alt.err recreated after USR1'); } { my $conn = conn_for($sock, 'Header spaces bogus'); $conn->write("GET /empty HTTP/1.1\r\nSpaced-Out : 3\r\n\r\n"); $conn->read(my $buf, 4096); like($buf, qr!\AHTTP/1\.[0-9] 400 !, 'got 400 response on bad request'); } { my $conn = conn_for($sock, 'streaming callback'); $conn->write("GET /callback HTTP/1.0\r\n\r\n"); ok($conn->read(my $buf, 8192), 'read response'); my ($head, $body) = split(/\r\n\r\n/, $buf); is($body, "hello world\n", 'callback body matches expected'); } { my $conn = conn_for($sock, 'getline-die'); $conn->write("GET /getline-die HTTP/1.1\r\nHost: example.com\r\n\r\n"); ok($conn->read(my $buf, 8192), 'read some response'); like($buf, qr!HTTP/1\.1 200\b[^\r]*\r\n!, 'got some sort of header'); is($conn->read(my $nil, 8192), 0, 'read EOF'); $conn = undef; my $after = capture($err); is(scalar(grep(/GETLINE FAIL/, @$after)), 1, 'failure logged'); is(scalar(grep(/CLOSE FAIL/, @$after)), 1, 'body->close not called'); } { my $conn = conn_for($sock, 'close-die'); $conn->write("GET /close-die HTTP/1.1\r\nHost: example.com\r\n\r\n"); ok($conn->read(my $buf, 8192), 'read some response'); like($buf, qr!HTTP/1\.1 200\b[^\r]*\r\n!, 'got some sort of header'); is($conn->read(my $nil, 8192), 0, 'read EOF'); $conn = undef; my $after = capture($err); is(scalar(grep(/GETLINE FAIL/, @$after)), 0, 'getline not failed'); is(scalar(grep(/CLOSE FAIL/, @$after)), 1, 'body->close not called'); } sub check_400 { my ($conn) = @_; my $r = $conn->read(my $buf, 8192); # ECONNRESET and $r==0 are both observed on FreeBSD 11.2 if (!defined($r)) { ok($!{ECONNRESET}, 'ECONNRESET on read (BSD sometimes)'); } elsif ($r > 0) { like($buf, qr!\AHTTP/1\.\d 400 !, 'got 400 response'); } else { is($r, 0, 'got EOF (BSD sometimes)'); } close($conn); # ensure we don't get SIGPIPE later } { local $SIG{PIPE} = 'IGNORE'; my $conn = conn_for($sock, 'excessive header'); $conn->write("GET /callback HTTP/1.0\r\n"); foreach my $i (1..500000) { last unless $conn->write("X-xxxxxJunk-$i: omg\r\n"); } ok(!$conn->write("\r\n"), 'broken request'); check_400($conn); } { my $conn = conn_for($sock, 'excessive body Content-Length'); my $n = (10 * 1024 * 1024) + 1; $conn->write("PUT /sha1 HTTP/1.0\r\nContent-Length: $n\r\n\r\n"); my $r = $conn->read(my $buf, 8192); ok($r > 0, 'read response'); my ($head, $body) = split(/\r\n\r\n/, $buf); like($head, qr/\b413\b/, 'got 413 response'); } { my $conn = conn_for($sock, 'excessive body chunked'); my $n = (10 * 1024 * 1024) + 1; $conn->write("PUT /sha1 HTTP/1.1\r\nTransfer-Encoding: chunked\r\n"); $conn->write("\r\n".sprintf("%x\r\n", $n)); my $r = $conn->read(my $buf, 8192); ok($r > 0, 'read response'); my ($head, $body) = split(/\r\n\r\n/, $buf); like($head, qr/\b413\b/, 'got 413 response'); } { my $conn = conn_for($sock, '1.1 Transfer-Encoding bogus'); $conn->write("PUT /sha1 HTTP/1.1\r\nTransfer-Encoding: bogus\r\n\r\n"); $conn->read(my $buf, 4096); like($buf, qr!\AHTTP/1\.[0-9] 400 !, 'got 400 response on bogus TE'); } { my $conn = conn_for($sock, '1.1 Content-Length bogus'); $conn->write("PUT /sha1 HTTP/1.1\r\nContent-Length: 3.3\r\n\r\n"); $conn->read(my $buf, 4096); like($buf, qr!\AHTTP/1\.[0-9] 400 !, 'got 400 response on bad length'); } { my $req = "PUT /sha1 HTTP/1.1\r\nContent-Length: 3\r\n" . "Content-Length: 3\r\n\r\n"; # this is stricter than it needs to be. Due to the way # Plack::HTTPParser, PSGI specs, and how hash tables work in common # languages; it's not possible to tell the difference between folded # and intentionally bad commas (e.g. "Content-Length: 3, 3") if (0) { require Plack::HTTPParser; # XS or pure Perl require Data::Dumper; Plack::HTTPParser::parse_http_request($req, my $env = {}); diag Data::Dumper::Dumper($env); # "Content-Length: 3, 3" } my $conn = conn_for($sock, '1.1 Content-Length dupe'); $conn->write($req); $conn->read(my $buf, 4096); like($buf, qr!\AHTTP/1\.[0-9] 400 !, 'got 400 response on dupe length'); } { my $conn = conn_for($sock, 'chunk with pipeline'); my $n = 10; my $payload = 'b'x$n; $conn->write("PUT /sha1 HTTP/1.1\r\nTransfer-Encoding: chunked\r\n"); $conn->write("\r\n".sprintf("%x\r\n", $n)); $conn->write($payload . "\r\n0\r\n\r\nGET /empty HTTP/1.0\r\n\r\n"); $conn->read(my $buf, 4096); my $lim = 0; $lim++ while ($conn->read($buf, 4096, length($buf)) && $lim < 9); my $exp = sha1_hex($payload); like($buf, qr!\r\n\r\n${exp}HTTP/1\.0 200 OK\r\n!s, 'chunk parser can handled pipelined requests'); } # Unix domain sockets { my $u = IO::Socket::UNIX->new(Type => SOCK_STREAM, Peer => $upath); ok($u, 'unix socket connected'); $u->write("GET /host-port HTTP/1.0\r\n\r\n"); $u->read(my $buf, 4096); like($buf, qr!\r\n\r\n127\.0\.0\.1 0\z!, 'set REMOTE_ADDR and REMOTE_PORT for Unix socket'); } sub conn_for { my ($dest, $msg) = @_; my $conn = tcp_connect($dest); ok($conn, "connected for $msg"); setsockopt($conn, IPPROTO_TCP, TCP_NODELAY, 1); return $conn; } { my $conn = conn_for($sock, 'host-port'); $conn->write("GET /host-port HTTP/1.0\r\n\r\n"); $conn->read(my $buf, 4096); my ($head, $body) = split(/\r\n\r\n/, $buf); my ($addr, $port) = split(/ /, $body); is($addr, (tcp_host_port($conn))[0], 'host matches addr'); is($port, $conn->sockport, 'port matches'); } # graceful termination { my $conn = conn_for($sock, 'graceful termination via slow header'); $conn->write("GET /slow-header HTTP/1.0\r\n" . "X-Check-Fifo: $fifo\r\n\r\n"); open my $f, '>', $fifo or die "open $fifo: $!\n"; $f->autoflush(1); ok(print($f "hello\n"), 'wrote something to fifo'); is($td->kill, 1, 'started graceful shutdown'); ok(print($f "world\n"), 'wrote else to fifo'); close $f or die "close fifo: $!\n"; $conn->read(my $buf, 8192); my ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr!\AHTTP/1\.[01] 200 OK!, 'got 200 for slow-header'); is($body, "hello\nworld\n", 'read expected body'); $td->join; is($?, 0, 'no error'); $spawn_httpd->('-W0'); } { my $conn = conn_for($sock, 'graceful termination via slow-body'); $conn->write("GET /slow-body HTTP/1.0\r\n" . "X-Check-Fifo: $fifo\r\n\r\n"); open my $f, '>', $fifo or die "open $fifo: $!\n"; $f->autoflush(1); my $buf; $conn->sysread($buf, 8192); like($buf, qr!\AHTTP/1\.[01] 200 OK!, 'got 200 for slow-body'); like($buf, qr!\r\n\r\n!, 'finished HTTP response header'); foreach my $c ('a'..'c') { $c .= "\n"; ok(print($f $c), 'wrote line to fifo'); $conn->sysread($buf, 8192); is($buf, $c, 'got trickle for reading'); } is($td->kill, 1, 'started graceful shutdown'); ok(print($f "world\n"), 'wrote else to fifo'); close $f or die "close fifo: $!\n"; $conn->sysread($buf, 8192); is($buf, "world\n", 'read expected body'); is($conn->sysread($buf, 8192), 0, 'got EOF from server'); $td->join; is($?, 0, 'no error'); $spawn_httpd->('-W0'); } sub delay { select(undef, undef, undef, shift || rand(0.02)) } my $str = 'abcdefghijklmnopqrstuvwxyz'; my $len = length $str; is($len, 26, 'got the alphabet'); my $check_self = sub { my ($conn) = @_; vec(my $rbits = '', fileno($conn), 1) = 1; select($rbits, undef, undef, 30) or Carp::confess('timed out'); $conn->read(my $buf, 4096); my ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr/\r\nContent-Length: 40\r\n/s, 'got expected length'); is($body, sha1_hex($str), 'read expected body'); }; SKIP: { my $curl = require_cmd('curl', 1) or skip('curl(1) missing', 4); my $base = 'http://'.tcp_host_port($sock); my $url = "$base/sha1"; my ($r, $w); pipe($r, $w) or die "pipe: $!"; my $cmd = [$curl, qw(--tcp-nodelay -T- -HExpect: -sSN), $url]; open my $cout, '+>', undef or die; open my $cerr, '>', undef or die; my $rdr = { 0 => $r, 1 => $cout, 2 => $cerr }; my $pid = spawn($cmd, undef, $rdr); close $r or die "close read pipe: $!"; foreach my $c ('a'..'z') { print $w $c or die "failed to write to curl: $!"; delay(); } close $w or die "close write pipe: $!"; waitpid($pid, 0); is($?, 0, 'curl exited successfully'); is(-s $cerr, 0, 'no errors from curl'); seek($cout, 0, SEEK_SET); is(<$cout>, sha1_hex($str), 'read expected body'); my $fh = popen_rd([$curl, '-sS', "$base/async-big"]); my $n = 0; my $non_zero = 0; while (1) { my $r = sysread($fh, my $buf, 4096) or last; $n += $r; $buf =~ /\A\0+\z/ or $non_zero++; } close $fh or die "close curl pipe: $!"; is($?, 0, 'curl succesful'); is($n, 30 * 1024 * 1024, 'got expected output from curl'); is($non_zero, 0, 'read all zeros'); require_mods(@zmods, 4); my $buf = xqx([$curl, '-sS', "$base/psgi-return-gzip"]); is($?, 0, 'curl succesful'); IO::Uncompress::Gunzip::gunzip(\$buf => \(my $out)); is($out, "hello world\n"); my $curl_rdr = { 2 => \(my $curl_err = '') }; $buf = xqx([$curl, qw(-sSv --compressed), "$base/psgi-return-compressible"], undef, $curl_rdr); is($?, 0, 'curl --compressed successful'); is($buf, "goodbye world\n", 'gzipped response as expected'); like($curl_err, qr/\bContent-Encoding: gzip\b/, 'curl got gzipped response'); } { my $conn = conn_for($sock, 'psgi_return ENOENT'); print $conn "GET /psgi-return-enoent HTTP/1.1\r\n\r\n" or die; my $buf = ''; sysread($conn, $buf, 16384, length($buf)) until $buf =~ /\r\n\r\n/; like($buf, qr!HTTP/1\.[01] 500\b!, 'got 500 error on ENOENT'); } { my $conn = conn_for($sock, '1.1 pipeline together'); $conn->write("PUT /sha1 HTTP/1.1\r\nUser-agent: hello\r\n\r\n" . "PUT /sha1 HTTP/1.1\r\n\r\n"); my $buf = ''; my @r; until (scalar(@r) >= 2) { my $r = $conn->sysread(my $tmp, 4096); die $! unless defined $r; die "EOF <$buf>" unless $r; $buf .= $tmp; @r = ($buf =~ /\r\n\r\n([a-f0-9]{40})/g); } is(2, scalar @r, 'got 2 responses'); my $i = 3; foreach my $hex (@r) { is($hex, sha1_hex(''), "read expected body $i"); $i++; } } { my $conn = conn_for($sock, 'no TCP_CORK on empty body'); $conn->write("GET /empty HTTP/1.1\r\nHost:example.com\r\n\r\n"); my $buf = ''; my $t0 = [ gettimeofday ]; until ($buf =~ /\r\n\r\n/s) { $conn->sysread($buf, 4096, length($buf)); } my $elapsed = tv_interval($t0, [ gettimeofday ]); ok($elapsed < 0.190, 'no 200ms TCP cork delay on empty body'); } { my $conn = conn_for($sock, 'graceful termination during slow request'); $conn->write("PUT /sha1 HTTP/1.0\r\nContent-Length: $len\r\n\r\n"); # XXX ugh, want a reliable and non-intrusive way to detect # that the server has started buffering our partial request so we # can reliably test graceful termination. Maybe making this and # similar tests dependent on Linux strace is a possibility? delay(0.1); is($td->kill, 1, 'start graceful shutdown'); my $n = 0; foreach my $c ('a'..'z') { $n += $conn->write($c); } ok(kill(0, $td->{pid}), 'graceful shutdown did not kill httpd'); is($n, $len, 'wrote alphabet'); $check_self->($conn); $td->join; is($?, 0, 'no error'); $spawn_httpd->('-W0'); } # various DoS attacks against the chunk parser: { local $SIG{PIPE} = 'IGNORE'; my $conn = conn_for($sock, '1.1 chunk header excessive'); $conn->write("PUT /sha1 HTTP/1.1\r\nTransfer-Encoding:chunked\r\n\r\n"); my $n = 0; my $w; while ($w = $conn->write('ffffffff')) { $n += $w; } ok($!, 'got error set in $!'); is($w, undef, 'write error happened'); ok($n > 0, 'was able to write'); check_400($conn); $conn = conn_for($sock, '1.1 chunk trailer excessive'); $conn->write("PUT /sha1 HTTP/1.1\r\nTransfer-Encoding:chunked\r\n\r\n"); is($conn->syswrite("1\r\na"), 4, 'wrote first header + chunk'); delay(); $n = 0; while ($w = $conn->write("\r")) { $n += $w; } ok($!, 'got error set in $!'); ok($n > 0, 'wrote part of chunk end (\r)'); check_400($conn); } { my $conn = conn_for($sock, '1.1 chunked close trickle'); $conn->write("PUT /sha1 HTTP/1.1\r\nConnection:close\r\n"); $conn->write("Transfer-encoding: chunked\r\n\r\n"); foreach my $x ('a'..'z') { delay(); $conn->write('1'); delay(); $conn->write("\r"); delay(); $conn->write("\n"); delay(); $conn->write($x); delay(); $conn->write("\r"); delay(); $conn->write("\n"); } $conn->write('0'); delay(); $conn->write("\r"); delay(); $conn->write("\n"); delay(); $conn->write("\r"); delay(); $conn->write("\n"); delay(); $check_self->($conn); } { my $conn = conn_for($sock, '1.1 chunked close'); $conn->write("PUT /sha1 HTTP/1.1\r\nConnection:close\r\n"); my $xlen = sprintf('%x', $len); $conn->write("Transfer-Encoding: chunked\r\n\r\n$xlen\r\n" . "$str\r\n0\r\n\r\n"); $check_self->($conn); } { my $conn = conn_for($sock, 'chunked body + pipeline'); $conn->write("PUT /sha1 HTTP/1.1\r\n" . "Transfer-Encoding: chunked\r\n"); delay(); $conn->write("\r\n1\r\n"); delay(); $conn->write('a'); delay(); $conn->write("\r\n0\r\n\r\nPUT /sha1 HTTP/1.1\r\n"); delay(); my $buf = ''; until ($buf =~ /\r\n\r\n[a-f0-9]{40}\z/) { $conn->sysread(my $tmp, 4096); $buf .= $tmp; } my ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr/\r\nContent-Length: 40\r\n/s, 'got expected length'); is($body, sha1_hex('a'), 'read expected body'); $conn->write("Connection: close\r\n"); $conn->write("Content-Length: $len\r\n\r\n$str"); $check_self->($conn); } { my $conn = conn_for($sock, 'trickle header, one-shot body + pipeline'); $conn->write("PUT /sha1 HTTP/1.0\r\n" . "Connection: keep-alive\r\n"); delay(); $conn->write("Content-Length: $len\r\n\r\n${str}PUT"); my $buf = ''; until ($buf =~ /\r\n\r\n[a-f0-9]{40}\z/) { $conn->sysread(my $tmp, 4096); $buf .= $tmp; } my ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr/\r\nContent-Length: 40\r\n/s, 'got expected length'); is($body, sha1_hex($str), 'read expected body'); $conn->write(" /sha1 HTTP/1.0\r\nContent-Length: $len\r\n\r\n$str"); $check_self->($conn); } { my $conn = conn_for($sock, 'trickle body'); $conn->write("PUT /sha1 HTTP/1.0\r\n"); $conn->write("Content-Length: $len\r\n\r\n"); my $beg = substr($str, 0, 10); my $end = substr($str, 10); is($beg . $end, $str, 'substr setup correct'); delay(); $conn->write($beg); delay(); $conn->write($end); $check_self->($conn); } { my $conn = conn_for($sock, 'one-shot write'); $conn->write("PUT /sha1 HTTP/1.0\r\n" . "Content-Length: $len\r\n\r\n$str"); $check_self->($conn); } { my $conn = conn_for($sock, 'trickle header, one-shot body'); $conn->write("PUT /sha1 HTTP/1.0\r\n"); delay(); $conn->write("Content-Length: $len\r\n\r\n$str"); $check_self->($conn); } { my $conn = conn_for($sock, '1.1 Connection: close'); $conn->write("PUT /sha1 HTTP/1.1\r\nConnection:close\r\n"); delay(); $conn->write("Content-Length: $len\r\n\r\n$str"); $check_self->($conn); } { my $conn = conn_for($sock, '1.1 pipeline start'); $conn->write("PUT /sha1 HTTP/1.1\r\n\r\nPUT"); my $buf = ''; until ($buf =~ /\r\n\r\n[a-f0-9]{40}\z/) { $conn->sysread(my $tmp, 4096); $buf .= $tmp; } my ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr/\r\nContent-Length: 40\r\n/s, 'got expected length'); is($body, sha1_hex(''), 'read expected body'); # 2nd request $conn->write(" /sha1 HTTP/1.1\r\n\r\n"); $buf = ''; until ($buf =~ /\r\n\r\n[a-f0-9]{40}\z/) { $conn->sysread(my $tmp, 4096); $buf .= $tmp; } ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr/\r\nContent-Length: 40\r\n/s, 'got expected length'); is($body, sha1_hex(''), 'read expected body #2'); } SKIP: { skip 'TCP_DEFER_ACCEPT is Linux-only', 1 if $^O ne 'linux'; my $var = $TCP_DEFER_ACCEPT; defined(my $x = getsockopt($sock, IPPROTO_TCP, $var)) or die; is(unpack('i', $x), $defer_accept_val, 'TCP_DEFER_ACCEPT unchanged if previously set'); }; SKIP: { skip 'SO_ACCEPTFILTER is FreeBSD-only', 1 if $^O ne 'freebsd'; skip 'accf_data not loaded: kldload accf_data' if !defined $accf_arg; my $var = $PublicInbox::Daemon::SO_ACCEPTFILTER; defined(my $x = getsockopt($sock, SOL_SOCKET, $var)) or die; is($x, $accf_arg, 'SO_ACCEPTFILTER unchanged if previously set'); }; SKIP: { skip 'only testing lsof(8) output on Linux', 1 if $^O ne 'linux'; my $lsof = require_cmd('lsof', 1) or skip 'no lsof in PATH', 1; my $null_in = ''; my $rdr = { 2 => \(my $null_err), 0 => \$null_in }; my @lsof = xqx([$lsof, '-p', $td->{pid}], undef, $rdr); my $d = [ grep(/\(deleted\)/, @lsof) ]; is_deeply($d, [], 'no lingering deleted inputs') or diag explain($d); # filter out pipes inherited from the parent my @this = xqx([$lsof, '-p', $$], undef, $rdr); my $bad; my $extract_inodes = sub { map {; my @f = split(' ', $_); my $inode = $f[-2]; $bad = $_ if $inode !~ /\A[0-9]+\z/; $inode => 1; } grep (/\bpipe\b/, @_); }; my %child = $extract_inodes->(@lsof); my %parent = $extract_inodes->(@this); skip("inode not in expected format: $bad", 1) if defined($bad); delete @child{(keys %parent)}; is_deeply([], [keys %child], 'no extra pipes with -W0'); }; # ensure compatibility with other PSGI servers SKIP: { require_mods(@zmods, qw(Plack::Test HTTP::Request::Common), 3); use_ok 'HTTP::Request::Common'; use_ok 'Plack::Test'; STDERR->flush; open my $olderr, '>&', \*STDERR or die "dup stderr: $!"; open my $tmperr, '+>', undef or die; open STDERR, '>&', $tmperr or die; STDERR->autoflush(1); my $app = require $psgi; test_psgi($app, sub { my ($cb) = @_; my $req = GET('http://example.com/psgi-return-gzip'); my $res = $cb->($req); my $buf = $res->content; IO::Uncompress::Gunzip::gunzip(\$buf => \(my $out)); is($out, "hello world\n", 'got expected output'); $req = GET('http://example.com/psgi-return-enoent'); $res = $cb->($req); is($res->code, 500, 'got error on ENOENT'); seek($tmperr, 0, SEEK_SET) or die; my $errbuf = do { local $/; <$tmperr> }; like($errbuf, qr/this-better-not-exist/, 'error logged about missing command'); }); open STDERR, '>&', $olderr or die "restore stderr: $!"; } done_testing(); sub capture { my ($f) = @_; open my $fh, '+<', $f or die "failed to open $f: $!\n"; local $/ = "\n"; my @r = <$fh>; truncate($fh, 0) or die "truncate failed on $f: $!\n"; \@r } 1; public-inbox-1.9.0/t/httpd-https.t000066400000000000000000000113651430031475700170410ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use v5.12; use Socket qw(SOCK_STREAM IPPROTO_TCP SOL_SOCKET); use PublicInbox::TestCommon; use File::Copy qw(cp); # IO::Poll is part of the standard library, but distros may split them off... require_mods(qw(IO::Socket::SSL IO::Poll Plack::Util)); my @certs = qw(certs/server-cert.pem certs/server-key.pem certs/server2-cert.pem certs/server2-key.pem); if (scalar(grep { -r $_ } @certs) != scalar(@certs)) { plan skip_all => "certs/ missing for $0, run $^X ./create-certs.perl in certs/"; } use_ok 'PublicInbox::TLS'; use_ok 'IO::Socket::SSL'; my $psgi = "./t/httpd-corner.psgi"; my ($tmpdir, $for_destroy) = tmpdir(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $https = tcp_server(); my $td; my $https_addr = tcp_host_port($https); my $cert = "$tmpdir/cert.pem"; my $key = "$tmpdir/key.pem"; cp('certs/server-cert.pem', $cert) or xbail $!; cp('certs/server-key.pem', $key) or xbail $!; my $check_url_scheme = sub { my ($s, $line) = @_; $s->print("GET /url_scheme HTTP/1.1\r\n\r\nHost: example.com\r\n\r\n") or xbail "failed to write HTTP request: $! (line $line)"; my $buf = ''; sysread($s, $buf, 2007, length($buf)) until $buf =~ /\r\n\r\nhttps?/; like($buf, qr!\AHTTP/1\.1 200!, "read HTTPS response (line $line)"); like($buf, qr!\r\nhttps\z!, "psgi.url_scheme is 'https' (line $line)"); }; for my $args ( [ "-lhttps://$https_addr/?key=$key,cert=$cert" ], ) { for ($out, $err) { open my $fh, '>', $_ or die "truncate: $!"; } my $cmd = [ '-httpd', '-W0', @$args, "--stdout=$out", "--stderr=$err", $psgi ]; $td = start_script($cmd, undef, { 3 => $https }); my %o = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); # start negotiating a slow TLS connection my $slow = tcp_connect($https, Blocking => 0); $slow = IO::Socket::SSL->start_SSL($slow, SSL_startHandshake => 0, %o); my @poll = (fileno($slow)); my $slow_done = $slow->connect_SSL; if ($slow_done) { diag('W: connect_SSL early OK, slow client test invalid'); push @poll, PublicInbox::Syscall::EPOLLOUT(); } else { push @poll, PublicInbox::TLS::epollbit(); } # normal HTTPS my $c = tcp_connect($https); IO::Socket::SSL->start_SSL($c, %o); $check_url_scheme->($c, __LINE__); # HTTPS with bad hostname $c = tcp_connect($https); $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.fail'; $c = IO::Socket::SSL->start_SSL($c, %o); is($c, undef, 'HTTPS fails with bad hostname'); $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.local'; $c = tcp_connect($https); IO::Socket::SSL->start_SSL($c, %o); ok($c, 'HTTPS succeeds again with valid hostname'); # slow TLS connection did not block the other fast clients while # connecting, finish it off: until ($slow_done) { IO::Poll::_poll(-1, @poll); $slow_done = $slow->connect_SSL and last; @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } $slow->blocking(1); ok($slow->print("GET /empty HTTP/1.1\r\n\r\nHost: example.com\r\n\r\n"), 'wrote HTTP request from slow'); my $buf = ''; sysread($slow, $buf, 666, length($buf)) until $buf =~ /\r\n\r\n/; like($buf, qr!\AHTTP/1\.1 200!, 'read HTTP response from slow'); $slow = undef; SKIP: { skip 'TCP_DEFER_ACCEPT is Linux-only', 2 if $^O ne 'linux'; my $var = eval { Socket::TCP_DEFER_ACCEPT() } // 9; defined(my $x = getsockopt($https, IPPROTO_TCP, $var)) or die; ok(unpack('i', $x) > 0, 'TCP_DEFER_ACCEPT set on https'); }; SKIP: { skip 'SO_ACCEPTFILTER is FreeBSD-only', 2 if $^O ne 'freebsd'; if (system('kldstat -m accf_data >/dev/null')) { skip 'accf_data not loaded? kldload accf_data', 2; } require PublicInbox::Daemon; ok(defined($PublicInbox::Daemon::SO_ACCEPTFILTER), 'SO_ACCEPTFILTER defined'); my $x = getsockopt($https, SOL_SOCKET, $PublicInbox::Daemon::SO_ACCEPTFILTER); like($x, qr/\Adataready\0+\z/, 'got dataready accf for https'); }; # switch cert and key: cp('certs/server2-cert.pem', $cert) or xbail $!; cp('certs/server2-key.pem', $key) or xbail $!; $td->kill('HUP') or xbail "kill: $!"; tick(); # wait for SIGHUP to take effect (hopefully :x) my $d = tcp_connect($https); $d = IO::Socket::SSL->start_SSL($d, %o); is($d, undef, 'HTTPS fails with bad hostname after new cert on HUP'); $d = tcp_connect($https); $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server2.local'; is(IO::Socket::SSL->start_SSL($d, %o), $d, 'new hostname to match cert works after HUP'); $check_url_scheme->($d, __LINE__); # existing connection w/ old cert still works: $check_url_scheme->($c, __LINE__); undef $c; undef $d; $td->kill; $td->join; is($?, 0, 'no error in exited process'); } done_testing(); 1; public-inbox-1.9.0/t/httpd-unix.t000066400000000000000000000140731430031475700166610ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # Tests for binding Unix domain sockets use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use Errno qw(EADDRINUSE); use Cwd qw(abs_path); use Carp qw(croak); require_mods(qw(Plack::Util Plack::Builder HTTP::Date HTTP::Status)); use IO::Socket::UNIX; my ($tmpdir, $for_destroy) = tmpdir(); my $unix = "$tmpdir/unix.sock"; my $psgi = './t/httpd-corner.psgi'; my $out = "$tmpdir/out.log"; my $err = "$tmpdir/err.log"; my $td; my $spawn_httpd = sub { my (@args) = @_; my $cmd = [ '-httpd', @args, "--stdout=$out", "--stderr=$err", $psgi ]; $td = start_script($cmd); }; { require PublicInbox::Daemon; my $l = "$tmpdir/named.sock"; my $s = IO::Socket::UNIX->new(Listen => 5, Local => $l, Type => SOCK_STREAM); is(PublicInbox::Daemon::sockname($s), $l, 'sockname works for UNIX'); } ok(!-S $unix, 'UNIX socket does not exist, yet'); $spawn_httpd->("-l$unix", '-W0'); my %o = (Peer => $unix, Type => SOCK_STREAM); for (1..1000) { last if -S $unix && IO::Socket::UNIX->new(%o); select undef, undef, undef, 0.02 } ok(-S $unix, 'UNIX socket was bound by -httpd'); sub check_sock ($) { my ($unix) = @_; my $sock = IO::Socket::UNIX->new(Peer => $unix, Type => SOCK_STREAM) // BAIL_OUT "E: $! connecting to $unix"; ok($sock->write("GET /host-port HTTP/1.0\r\n\r\n"), 'wrote req to server'); ok($sock->read(my $buf, 4096), 'read response'); like($buf, qr!\r\n\r\n127\.0\.0\.1 0\z!, 'set REMOTE_ADDR and REMOTE_PORT for Unix socket'); } check_sock($unix); { # do not clobber existing socket my %err = ( 'linux' => EADDRINUSE, 'freebsd' => EADDRINUSE ); open my $out, '>>', "$tmpdir/1" or die "redirect failed: $!"; open my $err, '>>', "$tmpdir/2" or die "redirect failed: $!"; my $cmd = ['-httpd', '-l', $unix, '-W0', $psgi]; my $ftd = start_script($cmd, undef, { 1 => $out, 2 => $err }); $ftd->join; isnt($?, 0, 'httpd failure set $?'); SKIP: { my $ec = $err{$^O} or skip("not sure if $^O fails with EADDRINUSE", 1); is($? >> 8, $ec, 'httpd failed with EADDRINUSE'); }; open my $fh, "$tmpdir/2" or die "failed to open $tmpdir/2: $!"; local $/; my $e = <$fh>; like($e, qr/no listeners bound/i, 'got error message'); is(-s "$tmpdir/1", 0, 'stdout was empty'); } { is($td->kill, 1, 'terminate existing process'); $td->join; is($?, 0, 'existing httpd exited successfully'); ok(-S $unix, 'unix socket still exists'); } # portable Perl can delay or miss signal dispatches due to races, # so disable some tests on systems lacking signalfd(2) or EVFILT_SIGNAL my $has_sigfd = PublicInbox::Sigfd->new({}, 0) ? 1 : $ENV{TEST_UNRELIABLE}; sub delay_until { my $cond = shift; my $end = time + 30; do { return if $cond->(); select undef, undef, undef, 0.012; } until (time > $end); Carp::confess('condition failed'); } SKIP: { require_mods('Net::Server::Daemonize', 52); $has_sigfd or skip('signalfd / EVFILT_SIGNAL not available', 52); my $pid_file = "$tmpdir/pid"; my $read_pid = sub { my $f = shift; open my $fh, '<', $f or die "open $f failed: $!"; my $pid = do { local $/; <$fh> }; chomp($pid) or die("pid file not ready $!"); $pid; }; for my $w (qw(-W0 -W1)) { # wait for daemonization $spawn_httpd->("-l$unix", '-D', '-P', $pid_file, $w); $td->join; is($?, 0, "daemonized $w process"); check_sock($unix); ok(-s $pid_file, "$w pid file written"); my $pid = $read_pid->($pid_file); is(kill('TERM', $pid), 1, "signaled daemonized $w process"); delay_until(sub { !kill(0, $pid) }); is(kill(0, $pid), 0, "daemonized $w process exited"); ok(!-e $pid_file, "$w pid file unlinked at exit"); } # try a USR2 upgrade with workers: my $httpd = abs_path('blib/script/public-inbox-httpd'); $psgi = abs_path($psgi); my $opt = { run_mode => 0 }; my @args = ("-l$unix", '-D', '-P', $pid_file, -1, $out, -2, $err); $td = start_script([$httpd, @args, $psgi], undef, $opt); $td->join; is($?, 0, "daemonized process again"); check_sock($unix); ok(-s $pid_file, 'pid file written'); my $pid = $read_pid->($pid_file); # stop worker to ensure check_sock below hits $new_pid kill('TTOU', $pid) or die "TTOU failed: $!"; kill('USR2', $pid) or die "USR2 failed: $!"; delay_until(sub { $pid != (eval { $read_pid->($pid_file) } // $pid) }); my $new_pid = $read_pid->($pid_file); isnt($new_pid, $pid, 'new child started'); ok($new_pid > 0, '$new_pid valid'); delay_until(sub { -s "$pid_file.oldbin" }); my $old_pid = $read_pid->("$pid_file.oldbin"); is($old_pid, $pid, '.oldbin pid file written'); ok($old_pid > 0, '$old_pid valid'); check_sock($unix); # ensures $new_pid is ready to receive signals # first, back out of the upgrade kill('QUIT', $new_pid) or die "kill new PID failed: $!"; delay_until(sub { $pid == (eval { $read_pid->($pid_file) } // 0) }); is($read_pid->($pid_file), $pid, 'old PID file restored'); ok(!-f "$pid_file.oldbin", '.oldbin PID file gone'); # retry USR2 upgrade kill('USR2', $pid) or die "USR2 failed: $!"; delay_until(sub { $pid != (eval { $read_pid->($pid_file) } // $pid) }); $new_pid = $read_pid->($pid_file); isnt($new_pid, $pid, 'new child started again'); $old_pid = $read_pid->("$pid_file.oldbin"); is($old_pid, $pid, '.oldbin pid file written'); # drop the old parent kill('QUIT', $old_pid) or die "QUIT failed: $!"; delay_until(sub { !kill(0, $old_pid) }); ok(!-f "$pid_file.oldbin", '.oldbin PID file gone'); # drop the new child check_sock($unix); kill('QUIT', $new_pid) or die "QUIT failed: $!"; delay_until(sub { !kill(0, $new_pid) }); ok(!-f $pid_file, 'PID file is gone'); # try USR2 without workers (-W0) $td = start_script([$httpd, @args, '-W0', $psgi], undef, $opt); $td->join; is($?, 0, 'daemonized w/o workers'); check_sock($unix); $pid = $read_pid->($pid_file); # replace running process kill('USR2', $pid) or die "USR2 failed: $!"; delay_until(sub { !kill(0, $pid) }); check_sock($unix); $pid = $read_pid->($pid_file); kill('QUIT', $pid) or die "USR2 failed: $!"; delay_until(sub { !kill(0, $pid) }); ok(!-f $pid_file, 'PID file is gone'); } done_testing(); public-inbox-1.9.0/t/httpd.t000066400000000000000000000070321430031475700156750ustar00rootroot00000000000000#!perl -w # Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Eml; use Socket qw(IPPROTO_TCP SOL_SOCKET); require_mods(qw(Plack::Util Plack::Builder HTTP::Date HTTP::Status)); # FIXME: too much setup my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/pi-home"; my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $inboxdir = "$tmpdir/i.git"; my $group = 'test-httpd'; my $addr = $group . '@example.com'; my $sock = tcp_server(); my $td; { create_inbox 'test', tmpdir => $inboxdir, sub { my ($im, $ibx) = @_; $im->add(PublicInbox::Eml->new(< To: You Cc: $addr Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 06:06:06 +0000 nntp EOF }; my $i2 = create_inbox 'test-2', sub { my ($im, $ibx) = @_; $im->add(eml_load('t/plack-qp.eml')) or xbail '->add'; }; local $ENV{HOME} = $home; my $cmd = [ '-init', $group, $inboxdir, 'http://example.com/', $addr ]; ok(run_script($cmd), 'init ran properly'); $cmd = [ '-httpd', '-W0', "--stdout=$out", "--stderr=$err" ]; $td = start_script($cmd, undef, { 3 => $sock }); my $http_pfx = 'http://'.tcp_host_port($sock); { my $bad = tcp_connect($sock); print $bad "GETT / HTTP/1.0\r\n\r\n" or die; like(<$bad>, qr!\AHTTP/1\.[01] 405\b!, 'got 405 on bad req'); } my $conn = tcp_connect($sock); ok($conn->write("GET / HTTP/1.0\r\n\r\n"), 'wrote data to socket'); { my $buf; ok($conn->read($buf, 4096), 'read some bytes'); like($buf, qr!\AHTTP/1\.[01] 404\b!, 'got 404 response'); is($conn->read($buf, 1), 0, "EOF"); } is(xsys(qw(git clone -q --mirror), "$http_pfx/$group", "$tmpdir/clone.git"), 0, 'smart clone successful'); # ensure dumb cloning works, too: is(xsys('git', "--git-dir=$inboxdir", qw(config http.uploadpack false)), 0, 'disable http.uploadpack'); is(xsys(qw(git clone -q --mirror), "$http_pfx/$group", "$tmpdir/dumb.git"), 0, 'clone successful'); # test config reload my $cfg = "$home/.public-inbox/config"; open my $fh, '>>', $cfg or xbail "open: $!"; print $fh <{inboxdir} address = test-2\@example.com url = https://example.com/test-2 EOM close $fh or xbail "close $!"; $td->kill('HUP') or BAIL_OUT "failed to kill -httpd: $!"; tick; # wait for HUP to take effect my $buf = do { my $c2 = tcp_connect($sock); $c2->write("GET /test-2/qp\@example.com/raw HTTP/1.0\r\n\r\n") or xbail "c2 write: $!"; local $/; <$c2> }; like($buf, qr!\AHTTP/1\.0 200\b!s, 'got 200 after reload for test-2'); ok($td->kill, 'killed httpd'); $td->join; is(xsys('git', "--git-dir=$tmpdir/clone.git", qw(fsck --no-verbose)), 0, 'fsck on cloned directory successful'); } SKIP: { skip 'TCP_DEFER_ACCEPT is Linux-only', 1 if $^O ne 'linux'; my $var = eval { Socket::TCP_DEFER_ACCEPT() } // 9; defined(my $x = getsockopt($sock, IPPROTO_TCP, $var)) or die; ok(unpack('i', $x) > 0, 'TCP_DEFER_ACCEPT set'); }; SKIP: { skip 'SO_ACCEPTFILTER is FreeBSD-only', 1 if $^O ne 'freebsd'; if (system('kldstat -m accf_http >/dev/null') != 0) { skip 'accf_http not loaded: kldload accf_http', 1; } require PublicInbox::Daemon; ok(defined($PublicInbox::Daemon::SO_ACCEPTFILTER), 'SO_ACCEPTFILTER defined'); my $x = getsockopt($sock, SOL_SOCKET, $PublicInbox::Daemon::SO_ACCEPTFILTER); like($x, qr/\Ahttpready\0+\z/, 'got httpready accf for HTTP'); }; done_testing; public-inbox-1.9.0/t/hval.t000066400000000000000000000035771430031475700155160ustar00rootroot00000000000000# Copyright (C) 2017-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use_ok 'PublicInbox::Hval', qw(to_attr); # reverse the result of to_attr sub from_attr ($) { my ($str) = @_; my $first = ''; if ($str =~ s/\AZ([a-f0-9]{2})//ms) { $first = chr(hex($1)); } $str =~ s!::([a-f0-9]{2})!chr(hex($1))!egms; $str =~ tr!:!/!; utf8::decode($str); $first . $str; } my $ibx = { -no_obfuscate_re => qr/(?:example\.com)\z/i, -no_obfuscate => { 'meta@public-inbox.org' => 1, } }; my $html = <<'EOF'; hello@example.comm hello@example.com meta@public-inbox.org test@public-inbox.org test@a.b.c.org te.st@example.org EOF PublicInbox::Hval::obfuscate_addrs($ibx, $html); my $exp = <<'EOF'; hello@example•comm hello@example.com meta@public-inbox.org test@public-inbox•org test@a•b.c.org te.st@example•org EOF is($html, $exp, 'only obfuscated relevant addresses'); $exp = 'https://example.net/foo@example.net'; PublicInbox::Hval::obfuscate_addrs($ibx, my $res = $exp); is($res, $exp, 'does not obfuscate URL with Message-ID'); is(PublicInbox::Hval::to_filename('foo bar '), 'foo-bar', 'to_filename has no trailing -'); is(PublicInbox::Hval::to_filename("foo bar\nanother line\n"), 'foo-bar', 'to_filename has no repeated -, and nothing past LF'); is(PublicInbox::Hval::to_filename("foo....bar"), 'foo.bar', 'to_filename squeezes -'); is(PublicInbox::Hval::to_filename(''), undef, 'empty string returns undef'); my $s = "\0\x07\n"; PublicInbox::Hval::src_escape($s); is($s, "\\0\\a\n", 'src_escape works as intended'); foreach my $s ('Hello/World.pm', 'Zcat', 'hello world.c', 'Eléanor', '$at') { my $attr = to_attr($s); is(from_attr($attr), $s, "$s => $attr => $s round trips"); } { my $bad = to_attr('foo//bar'); ok(!$bad, 'double-slash rejected'); } done_testing(); public-inbox-1.9.0/t/idx_stack.t000066400000000000000000000040621430031475700165230ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use_ok 'PublicInbox::IdxStack'; my $oid_a = '03c21563cf15c241687966b5b2a3f37cdc193316'; my $oid_b = '963caad026055ab9bcbe3ee9550247f9d8840feb'; my $cmt_a = 'df8e4a0612545d53672036641e9f076efc94c2f6'; my $cmt_b = '3ba7c9fa4a083c439e768882c571c2026a981ca5'; my $stk = PublicInbox::IdxStack->new; is($stk->read_prepare, $stk, 'nothing'); is($stk->num_records, 0, 'no records'); is($stk->pop_rec, undef, 'undef on empty'); $stk = PublicInbox::IdxStack->new; $stk->push_rec('m', 1234, 5678, $oid_a, $cmt_a); is($stk->read_prepare, $stk, 'read_prepare'); is($stk->num_records, 1, 'num_records'); is_deeply([$stk->pop_rec], ['m', 1234, 5678, $oid_a, $cmt_a], 'pop once'); is($stk->pop_rec, undef, 'undef on empty'); $stk = PublicInbox::IdxStack->new; $stk->push_rec('m', 1234, 5678, $oid_a, $cmt_a); $stk->push_rec('d', 1234, 5678, $oid_b, $cmt_b); is($stk->read_prepare, $stk, 'read_prepare'); is($stk->num_records, 2, 'num_records'); is_deeply([$stk->pop_rec], ['d', 1234, 5678, $oid_b, $cmt_b], 'pop'); is_deeply([$stk->pop_rec], ['m', 1234, 5678, $oid_a, $cmt_a], 'pop-pop'); is($stk->pop_rec, undef, 'empty'); SKIP: { $stk = undef; my $nr = $ENV{TEST_GIT_LOG} or skip 'TEST_GIT_LOG unset', 3; open my $fh, '-|', qw(git log --pretty=tformat:%at.%ct.%H), "-$nr" or die "git log: $!"; my @expect; while (<$fh>) { chomp; my ($at, $ct, $H) = split(/\./); $stk //= PublicInbox::IdxStack->new; # not bothering to parse blobs here, just using commit OID # as a blob OID since they're the same size + format $stk->push_rec('m', $at + 0, $ct + 0, $H, $H); push(@expect, [ 'm', $at, $ct, $H, $H ]); } $stk or skip('nothing from git log', 3); is($stk->read_prepare, $stk, 'read_prepare'); is($stk->num_records, scalar(@expect), 'num_records matches expected'); my @result; while (my @tmp = $stk->pop_rec) { unshift @result, \@tmp; } is_deeply(\@result, \@expect, 'results match expected'); } done_testing; public-inbox-1.9.0/t/imap.t000066400000000000000000000124151430031475700155010ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # unit tests (no network) for IMAP, see t/imapd.t for end-to-end tests use strict; use v5.10.1; use PublicInbox::TestCommon; require_git 2.6; require_mods(qw(-imapd)); require_ok 'PublicInbox::IMAP'; require_ok 'PublicInbox::IMAPD'; my ($tmpdir, $for_destroy) = tmpdir(); my $cfgfile = "$tmpdir/config"; { open my $fh, '>', $cfgfile or BAIL_OUT $!; print $fh <new; my @w; local $SIG{__WARN__} = sub { push @w, @_ }; $imapd->refresh_groups; my $self = { imapd => $imapd }; is(scalar(@w), 1, 'got a warning for upper-case'); like($w[0], qr/IGNORE\.THIS/, 'warned about upper-case'); my $res = PublicInbox::IMAP::cmd_list($self, 'tag', 'x', '%'); is(scalar($$res =~ tr/\n/\n/), 2, 'only one result'); like($$res, qr/ x\r\ntag OK/, 'saw expected'); $res = PublicInbox::IMAP::cmd_list($self, 'tag', 'x.', '%'); is(scalar($$res =~ tr/\n/\n/), 3, 'only one result'); is(scalar(my @x = ($$res =~ m/ x\.[zy]\r\n/g)), 2, 'match expected'); $res = PublicInbox::IMAP::cmd_list($self, 't', 'x.(?{die "RCE"})', '%'); like($$res, qr/\At OK /, 'refname does not match attempted RCE'); $res = PublicInbox::IMAP::cmd_list($self, 't', '', '(?{die "RCE"})%'); like($$res, qr/\At OK /, 'wildcard does not match attempted RCE'); } { my $partial_prepare = \&PublicInbox::IMAP::partial_prepare; my $x = {}; my $n = 0; my $r = $partial_prepare->(\$n, $x, [], my $p = 'BODY[9]'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[9]<5>'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[9]<5.1>'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[1.1]'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[HEADER.FIELDS (DATE FROM)]'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[HEADER.FIELDS.NOT (TO)]'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[HEDDER.FIELDS.NOT (TO)]'); ok(!$r, "rejected misspelling $p"); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[1.1.HEADER.FIELDS (TO)]'); ok($r, $p); my $partial_body = \&PublicInbox::IMAP::partial_body; my $partial_hdr_get = \&PublicInbox::IMAP::partial_hdr_get; my $partial_hdr_not = \&PublicInbox::IMAP::partial_hdr_not; my $hdrs_regexp = \&PublicInbox::IMAP::hdrs_regexp; is_deeply($x, { 'BODY[9]' => [ $partial_body, 9, undef, undef, undef ], 'BODY[9]<5>' => [ $partial_body, 9, undef, 5, undef ], 'BODY[9]<5.1>' => [ $partial_body, 9, undef, 5, 1 ], 'BODY[1.1]' => [ $partial_body, '1.1', undef, undef, undef ], 'BODY[HEADER.FIELDS (DATE FROM)]' => [ $partial_hdr_get, undef, $hdrs_regexp->('DATE FROM'), undef, undef ], 'BODY[HEADER.FIELDS.NOT (TO)]' => [ $partial_hdr_not, undef, $hdrs_regexp->('TO'), undef, undef ], 'BODY[1.1.HEADER.FIELDS (TO)]' => [ $partial_hdr_get, '1.1', $hdrs_regexp->('TO'), undef, undef ], }, 'structure matches expected'); } { my $fetch_compile = \&PublicInbox::IMAP::fetch_compile; my ($cb, $ops, $partial) = $fetch_compile->(['BODY[]']); is($partial, undef, 'no partial fetch data'); is_deeply($ops, [ undef, \&PublicInbox::IMAP::op_crlf_bref, 'BODY[]', \&PublicInbox::IMAP::emit_rfc822 ], 'proper key and op compiled for BODY[]'); ($cb, $ops, $partial) = $fetch_compile->(['BODY', 'BODY[]']); is_deeply($ops, [ undef, \&PublicInbox::IMAP::op_crlf_bref, 'BODY[]', \&PublicInbox::IMAP::emit_rfc822, undef, \&PublicInbox::IMAP::op_eml_new, 'BODY', \&PublicInbox::IMAP::emit_body, ], 'placed op_eml_new before emit_body'); } # UID <=> MSN mapping sub uo2m_str_new ($) { no warnings 'uninitialized'; # uom2m_ary_new may have may have undef pack('S*', @{$_[0]->uo2m_ary_new}); # 2 bytes per-MSN } { my $ibx = bless { uid_range => [ 1, 2, 4 ] }, 'Uo2mTestInbox'; my $imap = bless { uid_base => 0, ibx => $ibx }, 'PublicInbox::IMAP'; my $uo2m = $imap->uo2m_ary_new; is_deeply($uo2m, [ 1, 2, undef, 3 ], 'uo2m ary'); $uo2m = uo2m_str_new($imap); is_deeply([ unpack('S*', $uo2m) ], [ 1, 2, 0, 3 ], 'uo2m str'); $ibx->{uid_range} = [ 1, 2, 4, 5, 6 ]; for ([ 1, 2, undef, 3 ], $uo2m) { $imap->{uo2m} = $_; is($imap->uid2msn(1), 1, 'uid2msn'); is($imap->uid2msn(4), 3, 'uid2msn'); is($imap->uo2m_last_uid, 4, 'uo2m_last_uid'); $imap->uo2m_extend(6); is($imap->uid2msn(5), 4, 'uid2msn 5 => 4'); is($imap->uid2msn(6), 5, 'uid2msn 6 => 5'); is($imap->uo2m_last_uid, 6, 'uo2m_last_uid'); my $msn2uid = $imap->msn2uid; my $range = '1,4:5'; $imap->can('msn_to_uid_range')->($msn2uid, $range); is($range, '1,5:6', 'range converted'); } } done_testing; package Uo2mTestInbox; use strict; require PublicInbox::DummyInbox; our @ISA = qw(PublicInbox::DummyInbox); sub over { shift } sub uid_range { my ($self, $beg, $end, undef) = @_; [ grep { $_ >= $beg && $_ <= $end } @{$self->{uid_range}} ]; } public-inbox-1.9.0/t/imap_searchqp.t000066400000000000000000000074641430031475700173770ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Time::Local qw(timegm); use PublicInbox::TestCommon; require_mods(qw(-imapd)); use_ok 'PublicInbox::IMAPsearchqp'; use_ok 'PublicInbox::IMAP'; my $imap = bless {}, 'PublicInbox::IMAP'; my $q; my $parse = sub { PublicInbox::IMAPsearchqp::parse($imap, $_[0]) }; $q = $parse->(qq{BODY oops}); is($q->{xap}, 'b:"oops"', 'BODY key supported'); $q = $parse->(qq{OR HEADER TO Brian (OR FROM Ryan (OR TO Joe CC Scott))}); is($q->{sql}, undef, 'not using SQLite for complex query'); is($q->{xap}, '(t:"brian" OR (f:"ryan" OR (t:"joe" OR c:"scott")))', 'complex query matches Xapian query string'); $q = $parse->(qq{HEADER CC b SENTSINCE 2-Oct-1993}); is($q->{xap}, 'c:"b" d:19931002..', 'compound query'); $q = $parse->(qq{CHARSET UTF-8 From b}); is($q->{xap}, 'f:"b"', 'charset handled'); $q = $parse->(qq{CHARSET WTF-8 From b}); like($q, qr/\ANO \[/, 'bad charset rejected'); { # TODO: squelch errors by default? clients could flood logs open my $fh, '>:scalar', \(my $buf) or die; local *STDERR = $fh; $q = $parse->(qq{CHARSET}); } like($q, qr/\ABAD /, 'bad charset rejected'); $q = $parse->(qq{HEADER CC B (SENTBEFORE 2-Oct-1993)}); is($q->{xap}, 'c:"b" d:..19931002', 'compound query w/ parens'); { # limit recursion, stack and CPU cycles ain't free my $n = 10; my $s = ('('x$n ). 'To a' . ( ')'x$n ); $q = $parse->($s); is($q->{xap}, 't:"a"', 'nesting works'); ++$n; $s = ('('x$n ). 'To a' . ( ')'x$n ); my $err = $parse->($s); like($err, qr/\ABAD /, 'reject deep nesting'); } # IMAP has at least 6 ways of interpreting a date { my $t0 = timegm(0, 0, 0, 2, 10 - 1, 1993); my $t1 = $t0 + 86399; # no leap (day|second) support my $s; $q = $parse->($s = qq{SENTBEFORE 2-Oct-1993}); is_deeply($q->{sql}, \" AND ds <= $t0", 'SENTBEFORE SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, 'f:"z" d:..19931002', 'SENTBEFORE Xapian'); $q = $parse->($s = qq{SENTSINCE 2-Oct-1993}); is_deeply($q->{sql}, \" AND ds >= $t0", 'SENTSINCE SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, 'f:"z" d:19931002..', 'SENTSINCE Xapian'); $q = $parse->($s = qq{SENTON 2-Oct-1993}); is_deeply($q->{sql}, \" AND ds >= $t0 AND ds <= $t1", 'SENTON SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, 'f:"z" dt:19931002000000..19931002235959', 'SENTON Xapian'); $q = $parse->($s = qq{BEFORE 2-Oct-1993}); is_deeply($q->{sql}, \" AND ts <= $t0", 'BEFORE SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, qq{f:"z" rt:..$t0}, 'BEFORE Xapian'); $q = $parse->($s = qq{SINCE 2-Oct-1993}); is_deeply($q->{sql}, \" AND ts >= $t0", 'SINCE SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, qq{f:"z" rt:$t0..}, 'SINCE Xapian'); $q = $parse->($s = qq{ON 2-Oct-1993}); is_deeply($q->{sql}, \" AND ts >= $t0 AND ts <= $t1", 'ON SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, qq{f:"z" rt:$t0..$t1}, 'ON Xapian'); } { $imap->{uo2m} = pack('S*', (1..50000)); $imap->{uid_base} = 50000; my $err = $parse->(qq{9:}); my $s; like($err, qr/\ABAD /, 'bad MSN range'); $err = $parse->(qq{UID 9:}); like($err, qr/\ABAD /, 'bad UID range'); $err = $parse->(qq{FROM x UID 9:}); like($err, qr/\ABAD /, 'bad UID range with Xapian'); $err = $parse->(qq{FROM x 9:}); like($err, qr/\ABAD /, 'bad UID range with Xapian'); $q = $parse->($s = qq{UID 50009:50099}); is_deeply($q->{sql}, \' AND (num >= 50009 AND num <= 50099)', 'SQL generated for UID range'); $q = $parse->("CC x $s"); is($q->{xap}, qq{c:"x" uid:50009..50099}, 'Xapian generated for UID range'); $q = $parse->($s = qq{9:99}); is_deeply($q->{sql}, \' AND (num >= 50009 AND num <= 50099)', 'SQL generated for MSN range'); $q = $parse->("CC x $s"); is($q->{xap}, qq{c:"x" uid:50009..50099}, 'Xapian generated for MSN range'); } done_testing; public-inbox-1.9.0/t/imap_tracker.t000066400000000000000000000030531430031475700172120ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use Test::More; use strict; use PublicInbox::TestCommon; require_mods 'DBD::SQLite'; use_ok 'PublicInbox::IMAPTracker'; my ($tmpdir, $for_destroy) = tmpdir(); mkdir "$tmpdir/old" or die "mkdir $tmpdir/old: $!"; my $old = "$tmpdir/old/imap.sqlite3"; my $cur = "$tmpdir/data/public-inbox/imap.sqlite3"; local $ENV{XDG_DATA_HOME} = "$tmpdir/data"; { local $ENV{PI_DIR} = "$tmpdir/old"; my $tracker = PublicInbox::IMAPTracker->new; ok(-f $cur, '->new creates file'); $tracker = undef; ok(-f $cur, 'file persists after DESTROY'); link $cur, $old or die "link $cur => $old: $!"; unlink $cur or die "unlink $cur: $!"; $tracker = PublicInbox::IMAPTracker->new; ok(!-f $cur, '->new does not create new file if old is present'); } SKIP: { my $nproc = $ENV{TEST_STRESS_NPROC}; skip 'TEST_STRESS_NPROC= not set', 1 unless $nproc; my $nr = $ENV{TEST_STRESS_NR} // 10000; diag "TEST_STRESS_NPROC=$nproc TEST_STRESS_NR=$nr"; require POSIX; for my $n (1..$nproc) { my $pid = fork // BAIL_OUT "fork: $!"; if ($pid == 0) { my $url = "imap://example.com/INBOX.$$"; my $uidval = time; eval { my $itrk = PublicInbox::IMAPTracker->new($url); for my $uid (1..$nr) { $itrk->update_last($uidval, $uid); my ($uv, $u) = $itrk->get_last; } }; warn "E: $n $$ - $@\n" if $@; POSIX::_exit($@ ? 1 : 0); } } while (1) { my $pid = waitpid(-1, 0); last if $pid < 0; is($?, 0, "$pid exited"); } } done_testing; public-inbox-1.9.0/t/imapd-tls.t000066400000000000000000000154361430031475700164530ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Socket qw(IPPROTO_TCP SOL_SOCKET); use PublicInbox::TestCommon; # IO::Poll is part of the standard library, but distros may split it off... require_mods(qw(-imapd IO::Socket::SSL Mail::IMAPClient IO::Poll)); my $imap_client = 'Mail::IMAPClient'; $imap_client->can('starttls') or plan skip_all => 'Mail::IMAPClient does not support TLS'; my $can_compress = $imap_client->can('compress'); if ($can_compress) { # hope this gets fixed upstream, soon require PublicInbox::IMAPClient; $imap_client = 'PublicInbox::IMAPClient'; } my $cert = 'certs/server-cert.pem'; my $key = 'certs/server-key.pem'; unless (-r $key && -r $cert) { plan skip_all => "certs/ missing for $0, run $^X ./create-certs.perl in certs/"; } use_ok 'PublicInbox::TLS'; use_ok 'IO::Socket::SSL'; my $version = 1; # v2 needs newer git require_git('2.6') if $version >= 2; my ($tmpdir, $for_destroy) = tmpdir(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $pi_config; my $group = 'test-imapd-tls'; my $addr = $group . '@example.com'; my $starttls = tcp_server(); my $imaps = tcp_server(); my $ibx = create_inbox 'imapd-tls', version => $version, -primary_address => $addr, indexlevel => 'basic', sub { my ($im, $ibx) = @_; $im->add(eml_load('t/data/0001.patch')) or BAIL_OUT '->add'; $pi_config = "$ibx->{inboxdir}/pi_config"; open my $fh, '>', $pi_config or BAIL_OUT "open: $!"; print $fh <{inboxdir} address = $addr indexlevel = basic newsgroup = $group EOF close $fh or BAIL_OUT "close: $!\n"; }; $pi_config //= "$ibx->{inboxdir}/pi_config"; my $imaps_addr = tcp_host_port($imaps); my $starttls_addr = tcp_host_port($starttls); my $env = { PI_CONFIG => $pi_config }; my $td; for my $args ( [ "--cert=$cert", "--key=$key", "-limaps://$imaps_addr", "-limap://$starttls_addr" ], ) { for ($out, $err) { open my $fh, '>', $_ or BAIL_OUT "truncate: $!"; } my $cmd = [ '-imapd', '-W0', @$args, "--stdout=$out", "--stderr=$err" ]; $td = start_script($cmd, $env, { 3 => $starttls, 4 => $imaps }); my %o = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); # start negotiating a slow TLS connection my $slow = tcp_connect($imaps, Blocking => 0); $slow = IO::Socket::SSL->start_SSL($slow, SSL_startHandshake => 0, %o); my $slow_done = $slow->connect_SSL; my @poll; if ($slow_done) { diag('W: connect_SSL early OK, slow client test invalid'); use PublicInbox::Syscall qw(EPOLLIN EPOLLOUT); @poll = (fileno($slow), EPOLLIN | EPOLLOUT); } else { @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } # we should call connect_SSL much later... my %imaps_opt = (User => 'a', Password => 'b', Server => $imaps->sockhost, Port => $imaps->sockport); # IMAPS my $c = $imap_client->new(%imaps_opt, Ssl => [ %o ]); ok($c && $c->IsAuthenticated, 'authenticated'); ok($c->select($group), 'SELECT works'); ok(!(scalar $c->has_capability('STARTTLS')), 'starttls not advertised with IMAPS'); ok(!$c->starttls, "starttls fails"); ok($c->has_capability('COMPRESS') || $c->has_capability('COMPRESS=DEFLATE'), 'compress advertised'); ok($c->compress, 'compression enabled with IMAPS'); ok(!$c->starttls, 'starttls still fails'); ok($c->noop, 'noop succeeds'); ok($c->logout, 'logout succeeds'); # STARTTLS my %imap_opt = (Server => $starttls->sockhost, Port => $starttls->sockport); $c = $imap_client->new(%imap_opt); ok(scalar $c->has_capability('STARTTLS'), 'starttls advertised'); ok($c->Starttls([ %o ]), 'set starttls options'); ok($c->starttls, '->starttls works'); ok(!(scalar($c->has_capability('STARTTLS'))), 'starttls not advertised'); ok(!$c->starttls, '->starttls again fails'); ok(!(scalar($c->has_capability('STARTTLS'))), 'starttls still not advertised'); ok($c->examine($group), 'EXAMINE works'); ok($c->noop, 'NOOP works'); ok($c->compress, 'compression enabled with IMAPS'); ok($c->noop, 'NOOP works after compress'); ok($c->logout, 'logout succeeds after compress'); # STARTTLS with bad hostname $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.invalid'; $c = $imap_client->new(%imap_opt); ok(scalar $c->has_capability('STARTTLS'), 'starttls advertised'); ok($c->Starttls([ %o ]), 'set starttls options'); ok(!$c->starttls, '->starttls fails with bad hostname'); $c = $imap_client->new(%imap_opt); ok($c->noop, 'NOOP still works from plain IMAP'); # IMAPS with bad hostname $c = $imap_client->new(%imaps_opt, Ssl => [ %o ]); is($c, undef, 'IMAPS fails with bad hostname'); # make hostname valid $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.local'; $c = $imap_client->new(%imaps_opt, Ssl => [ %o ]); ok($c, 'IMAPS succeeds again with valid hostname'); # slow TLS connection did not block the other fast clients while # connecting, finish it off: until ($slow_done) { IO::Poll::_poll(-1, @poll); $slow_done = $slow->connect_SSL and last; @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } $slow->blocking(1); ok(sysread($slow, my $greet, 4096) > 0, 'slow got a greeting'); like($greet, qr/\A\* OK \[CAPABILITY IMAP4rev1 /, 'got greeting'); is(syswrite($slow, "1 LOGOUT\r\n"), 10, 'slow wrote LOGOUT'); ok(sysread($slow, my $end, 4096) > 0, 'got end'); is(sysread($slow, my $eof, 4096), 0, 'got EOF'); test_lei(sub { lei_ok qw(ls-mail-source), "imap://$starttls_addr", \'STARTTLS not used by default'; ok(!lei(qw(ls-mail-source -c imap.starttls=true), "imap://$starttls_addr"), 'STARTTLS verify fails'); }); SKIP: { skip 'TCP_DEFER_ACCEPT is Linux-only', 2 if $^O ne 'linux'; my $var = eval { Socket::TCP_DEFER_ACCEPT() } // 9; defined(my $x = getsockopt($imaps, IPPROTO_TCP, $var)) or die; ok(unpack('i', $x) > 0, 'TCP_DEFER_ACCEPT set on IMAPS'); defined($x = getsockopt($starttls, IPPROTO_TCP, $var)) or die; is(unpack('i', $x), 0, 'TCP_DEFER_ACCEPT is 0 on plain IMAP'); }; SKIP: { skip 'SO_ACCEPTFILTER is FreeBSD-only', 2 if $^O ne 'freebsd'; if (system('kldstat -m accf_data >/dev/null')) { skip 'accf_data not loaded? kldload accf_data', 2; } require PublicInbox::Daemon; my $x = getsockopt($imaps, SOL_SOCKET, $PublicInbox::Daemon::SO_ACCEPTFILTER); like($x, qr/\Adataready\0+\z/, 'got dataready accf for IMAPS'); $x = getsockopt($starttls, IPPROTO_TCP, $PublicInbox::Daemon::SO_ACCEPTFILTER); is($x, undef, 'no BSD accept filter for plain IMAP'); }; $c = undef; $td->kill; $td->join; is($?, 0, 'no error in exited process'); open my $fh, '<', $err or BAIL_OUT "open $err failed: $!"; my $eout = do { local $/; <$fh> }; unlike($eout, qr/wide/i, 'no Wide character warnings'); } done_testing; public-inbox-1.9.0/t/imapd.t000066400000000000000000000474651430031475700156620ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # end-to-end IMAP tests, see unit tests in t/imap.t, too use strict; use Test::More; use Time::HiRes (); use PublicInbox::TestCommon; use PublicInbox::Config; require_mods(qw(-imapd Mail::IMAPClient)); my $imap_client = 'Mail::IMAPClient'; my $can_compress = $imap_client->can('compress'); if ($can_compress) { # hope this gets fixed upstream, soon require PublicInbox::IMAPClient; $imap_client = 'PublicInbox::IMAPClient'; } require_ok 'PublicInbox::IMAP'; my $first_range = '0'; my $level = 'basic'; SKIP: { require_mods('Search::Xapian', 1); $level = 'medium'; }; my @V = (1); push(@V, 2) if require_git('2.6', 1); my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/home"; BAIL_OUT "mkdir: $!" unless (mkdir($home) and mkdir("$home/.public-inbox")); my @ibx; open my $cfgfh, '>', "$home/.public-inbox/config" or BAIL_OUT; print $cfgfh < "$tmpdir/i$V", version => $V, indexlevel => $level, sub { my ($im) = @_; $im->add($eml //= eml_load('t/utf8.eml')) or BAIL_OUT; }); push @ibx, $ibx; $ibx->{newsgroup} = "inbox.i$V"; print $cfgfh <{inboxdir} address = $ibx->{-primary_address}; newsgroup = inbox.i$V url = http://example.com/i$V indexlevel = $level EOF } close $cfgfh or BAIL_OUT; local $ENV{HOME} = $home; my $sock = tcp_server(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $cmd = [ '-imapd', '-W0', "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, undef, { 3 => $sock }) or BAIL_OUT("-imapd: $?"); my ($ihost, $iport) = tcp_host_port($sock); my %mic_opt = ( Server => $ihost, Port => $iport, Uid => 1 ); my $mic = $imap_client->new(%mic_opt); my $pre_login_capa = $mic->capability; is(grep(/\AAUTH=ANONYMOUS\z/, @$pre_login_capa), 1, 'AUTH=ANONYMOUS advertised pre-login'); $mic->User('lorelei'); $mic->Password('Hunter2'); ok($mic->login && $mic->IsAuthenticated, 'LOGIN works'); my $post_login_capa = $mic->capability; ok(join("\n", @$pre_login_capa) ne join("\n", @$post_login_capa), 'got different capabilities post-login'); $mic_opt{Authmechanism} = 'ANONYMOUS'; $mic_opt{Authcallback} = sub { '' }; $mic = $imap_client->new(%mic_opt); ok($mic && $mic->login && $mic->IsAuthenticated, 'AUTHENTICATE ANONYMOUS'); my $post_auth_anon_capa = $mic->capability; is_deeply($post_auth_anon_capa, $post_login_capa, 'auth anon has same capabilities'); my $e; ok($mic->noop, 'NOOP'); ok($mic->noop, 'NOOP (again)'); # for warnings ok(!$mic->examine('foo') && ($e = $@), 'EXAMINE non-existent'); like($e, qr/\bNO\b/, 'got a NO on EXAMINE for non-existent'); ok(!$mic->select('foo') && ($e = $@), 'EXAMINE non-existent'); like($e, qr/\bNO\b/, 'got a NO on EXAMINE for non-existent'); my $mailbox1 = "inbox.i1.$first_range"; ok($mic->select('inbox.i1'), 'SELECT on parent succeeds'); ok($mic->noop, 'NOOP while selected'); ok($mic->noop, 'NOOP again while selected'); # check warnings later ok($mic->select($mailbox1), 'SELECT succeeds'); ok($mic->examine($mailbox1), 'EXAMINE succeeds'); my @raw = $mic->status($mailbox1, qw(Messages uidnext uidvalidity)); is(scalar(@raw), 2, 'got status response'); like($raw[0], qr/\A\*\x20STATUS\x20inbox\.i1\.$first_range\x20 \(MESSAGES\x20[1-9][0-9]*\x20 UIDNEXT\x20\d+\x20UIDVALIDITY\x20\d+\)\r\n/sx); like($raw[1], qr/\A\S+ OK /, 'finished status response'); my @orig_list = @raw = $mic->list; like($raw[0], qr/^\* LIST \(.*?\) "\." INBOX/, 'got an inbox'); like($raw[-1], qr/^\S+ OK /, 'response ended with OK'); is(scalar(@raw), scalar(@V) * 2 + 2, 'default LIST response'); @raw = $mic->list('', 'inbox.i1'); is(scalar(@raw), 2, 'limited LIST response'); like($raw[0], qr/^\* LIST \(.*?\) "\." INBOX/, 'got an inbox.i1'); like($raw[-1], qr/^\S+ OK /, 'response ended with OK'); my $ret = $mic->search('all') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [ 1 ], 'search all works'); $ret = $mic->search('uid 1') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [ 1 ], 'search UID 1 works'); $ret = $mic->search('uid 1:1') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [ 1 ], 'search UID 1:1 works'); $ret = $mic->search('uid 1:*') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [ 1 ], 'search UID 1:* works'); $ret = $mic->search('DELETED') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [], 'searching for DELETED returns nothing'); SKIP: { skip 'Xapian missing', 8 if $level eq 'basic'; my $x = $mic->search(qw(smaller 99999)); is_deeply($x, [1], 'SMALLER works with Xapian (hit)'); $x = $mic->search(qw(smaller 9)); is_deeply($x, [], 'SMALLER works with Xapian (miss)'); $x = $mic->search(qw(larger 99999)); is_deeply($x, [], 'LARGER works with Xapian (miss)'); $x = $mic->search(qw(larger 9)); is_deeply($x, [1], 'LARGER works with Xapian (hit)'); $x = $mic->search(qw(HEADER Message-ID testmessage@example.com)); is_deeply($x, [1], 'HEADER Message-ID works'); $x = $mic->search(qw(DRAFT HEADER Message-ID testmessage@example.com)); is_deeply($x, [], 'impossible (DRAFT) key filters out match'); $x = $mic->search(qw(HEADER Message-ID miss)); is_deeply($x, [], 'HEADER Message-ID can miss'); my @q = qw[OR HEADER Message-ID testmessage@example.com (OR FROM Ryan (OR TO Joe CC Scott))]; $x = $mic->search(join(' ', @q)); is_deeply($x, [1], 'nested query works'); } is_deeply(scalar $mic->flags('1'), [], '->flags works'); { # RFC 3501 section 6.4.8 states: # Also note that a UID range of 559:* always includes the # UID of the last message in the mailbox, even if 559 is # higher than any assigned UID value. my $exp = $mic->fetch_hash(1, 'UID'); $ret = $mic->fetch_hash('559:*', 'UID'); is_deeply($ret, $exp, 'beginning range too big'); { my @w; # Mail::IMAPClient hits a warning via overload local $SIG{__WARN__} = sub { push @w, @_ }; $ret = $mic->fetch_hash(my $r = '559:558', 'UID'); is_deeply($ret, {}, "out-of-range UID FETCH $r"); @w = grep(!/\boverload\.pm\b/, @w); is_deeply(\@w, [], 'no unexpected warning'); } $ret = $mic->fetch_hash(my $r = '558:559', 'UID'); is_deeply($ret, {}, "out-of-range UID FETCH $r"); } for my $r ('1:*', '1') { $ret = $mic->fetch_hash($r, 'RFC822') or BAIL_OUT "FETCH $@"; is_deeply([keys %$ret], [1]); like($ret->{1}->{RFC822}, qr/\r\n\r\nThis is a test/, 'read full'); # ensure Mail::IMAPClient behaves my $str = $mic->message_string($r) or BAIL_OUT "->message_string: $@"; is($str, $ret->{1}->{RFC822}, '->message_string works as expected'); my $sz = $mic->fetch_hash($r, 'RFC822.size') or BAIL_OUT "FETCH $@"; is($sz->{1}->{'RFC822.SIZE'}, length($ret->{1}->{RFC822}), 'RFC822.SIZE'); $ret = $mic->fetch_hash($r, 'RFC822.HEADER') or BAIL_OUT "FETCH $@"; is_deeply([keys %$ret], [1]); like($ret->{1}->{'RFC822.HEADER'}, qr/^Message-ID: /ms, 'read header'); $ret = $mic->fetch_hash($r, 'INTERNALDATE') or BAIL_OUT "FETCH $@"; is($ret->{1}->{'INTERNALDATE'}, '01-Jan-1970 00:00:00 +0000', 'internaldate matches'); ok(!$mic->fetch_hash($r, 'INFERNALDATE'), 'bogus attribute fails'); my $envelope = $mic->get_envelope($r) or BAIL_OUT("get_envelope: $@"); is($envelope->{bcc}, 'NIL', 'empty bcc'); is($envelope->{messageid}, '', 'messageid'); is(scalar @{$envelope->{to}}, 1, 'one {to} header'); # *sigh* too much to verify... #use Data::Dumper; diag Dumper($envelope); $ret = $mic->fetch_hash($r, 'FLAGS') or BAIL_OUT "FETCH $@"; is_deeply($ret->{1}->{FLAGS}, '', 'no flags'); $ret = $mic->fetch_hash($r, 'BODY[1]') or BAIL_OUT "FETCH $@"; like($ret->{1}->{'BODY[1]'}, qr/\AThis is a test message/, 'BODY[1]'); $ret = $mic->fetch_hash($r, 'BODY[1]<1>') or BAIL_OUT "FETCH $@"; like($ret->{1}->{'BODY[1]<1>'}, qr/\Ahis is a test message/, 'BODY[1]<1>'); $ret = $mic->fetch_hash($r, 'BODY[1]<2.3>') or BAIL_OUT "FETCH $@"; is($ret->{1}->{'BODY[1]<2>'}, "is ", 'BODY[1]<2.3>'); $ret = $mic->bodypart_string($r, 1, 3, 2) or BAIL_OUT "bodypart_string $@"; is($ret, "is ", 'bodypart string'); $ret = $mic->fetch_hash($r, 'BODY[HEADER.FIELDS.NOT (Message-ID)]') or BAIL_OUT "FETCH $@"; $ret = $ret->{1}->{'BODY[HEADER.FIELDS.NOT (MESSAGE-ID)]'}; unlike($ret, qr/message-id/i, 'Message-ID excluded'); like($ret, qr/\r\n\r\n\z/s, 'got header end'); $ret = $mic->fetch_hash($r, 'BODY[HEADER.FIELDS (Message-ID)]') or BAIL_OUT "FETCH $@"; is($ret->{1}->{'BODY[HEADER.FIELDS (MESSAGE-ID)]'}, 'Message-ID: '."\r\n\r\n", 'got only Message-ID'); my $bs = $mic->get_bodystructure($r) or BAIL_OUT("bodystructure: $@"); ok($bs, 'got a bodystructure'); is(lc($bs->bodytype), 'text', '->bodytype'); is(lc($bs->bodyenc), '8bit', '->bodyenc'); } ok($mic->has_capability('COMPRESS') || $mic->has_capability('COMPRESS=DEFLATE'), 'deflate cap'); SKIP: { skip 'Mail::IMAPClient too old for ->compress', 2 if !$can_compress; my $c = $imap_client->new(%mic_opt); ok($c && $c->compress, 'compress enabled'); ok($c->examine($mailbox1), 'EXAMINE succeeds after COMPRESS'); $ret = $c->search('uid 1:*') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [ 1 ], 'search UID 1:* works after compression'); } ok($mic->logout, 'logout works'); my $have_inotify = eval { require Linux::Inotify2; 1 }; for my $ibx (@ibx) { my $name = $ibx->{name}; my $ng = $ibx->{newsgroup}; my $mic = $imap_client->new(%mic_opt); ok($mic && $mic->login && $mic->IsAuthenticated, "authed $name"); my $mb = "$ng.$first_range"; my $uidnext = $mic->uidnext($mb); # we'll fetch BODYSTRUCTURE on this ok($uidnext, 'got uidnext for later fetch'); ok($mic->has_capability('IDLE'), "IDLE capa $name"); ok(!$mic->idle, "IDLE fails w/o SELECT/EXAMINE $name"); ok($mic->examine($mb), "EXAMINE $ng succeeds"); ok(my $idle_tag = $mic->idle, "IDLE succeeds on $ng"); my $im = $ibx->importer(0); $im->add(eml_load 't/data/message_embed.eml') or BAIL_OUT; $im->done; my $t0 = Time::HiRes::time(); ok(my @res = $mic->idle_data(11), "IDLE succeeds on $ng"); is(grep(/\A\* [0-9] EXISTS\b/, @res), 1, 'got EXISTS message'); ok((Time::HiRes::time() - $t0) < 10, 'IDLE client notified'); my (@ino_info, $ino_fdinfo); SKIP: { skip 'no inotify support', 1 unless $have_inotify; skip 'missing /proc/$PID/fd', 1 if !-d "/proc/$td->{pid}/fd"; my @ino = grep { readlink($_) =~ /\binotify\b/ } glob("/proc/$td->{pid}/fd/*"); is(scalar(@ino), 1, 'only one inotify FD'); my $ino_fd = (split('/', $ino[0]))[-1]; $ino_fdinfo = "/proc/$td->{pid}/fdinfo/$ino_fd"; if (open my $fh, '<', $ino_fdinfo) { local $/ = "\n"; @ino_info = grep(/^inotify wd:/, <$fh>); ok(scalar(@ino_info), 'inotify has watches'); } else { skip "$ino_fdinfo missing: $!", 1; } }; # ensure IDLE persists across HUP, w/o extra watches or FDs $td->kill('HUP') or BAIL_OUT "failed to kill -imapd: $!"; for my $n (1..2) { # kick the event loop so we know HUP is done my $m = $imap_client->new(%mic_opt); ok($m->login && $m->IsAuthenticated && $m->logout, "connection $n works after HUP"); } $im->add(eml_load 't/data/0001.patch') or BAIL_OUT; $im->done; $t0 = Time::HiRes::time(); ok(@res = $mic->idle_data(11), "IDLE succeeds on $ng after HUP"); is(grep(/\A\* [0-9] EXISTS\b/, @res), 1, 'got EXISTS message'); ok((Time::HiRes::time() - $t0) < 10, 'IDLE client notified'); ok($mic->done($idle_tag), 'IDLE DONE'); my $bs = $mic->get_bodystructure($uidnext); ok($bs, 'BODYSTRUCTURE ok for deeply nested'); $ret = $mic->fetch_hash($uidnext, 'BODY') or BAIL_OUT "FETCH $@"; ok($ret->{$uidnext}->{BODY}, 'got something in BODY'); # this matches dovecot behavior $ret = $mic->fetch_hash($uidnext, 'BODY[1]') or BAIL_OUT "FETCH $@"; is($ret->{$uidnext}->{'BODY[1]'}, "testing embedded message harder\r\n", 'BODY[1]'); $ret = $mic->fetch_hash($uidnext, 'BODY[2]') or BAIL_OUT "FETCH $@"; like($ret->{$uidnext}->{'BODY[2]'}, qr/\ADate: Sat, 18 Apr 2020 22:20:20 /, 'BODY[2]'); $ret = $mic->fetch_hash($uidnext, 'BODY[2.1.1]') or BAIL_OUT "FETCH $@"; is($ret->{$uidnext}->{'BODY[2.1.1]'}, "testing embedded message\r\n", 'BODY[2.1.1]'); $ret = $mic->fetch_hash($uidnext, 'BODY[2.1.2]') or BAIL_OUT "FETCH $@"; like($ret->{$uidnext}->{'BODY[2.1.2]'}, qr/\AFrom: /, 'BODY[2.1.2] tip matched'); like($ret->{$uidnext}->{'BODY[2.1.2]'}, # trailing CRLF may vary depending on MIME parser qr/done_testing;(?:\r\n){1,2}\z/, 'BODY[2.1.2] tail matched'); $ret = $mic->fetch_hash("1:$uidnext", 'BODY[2.HEADER]') or BAIL_OUT "2.HEADER $@"; like($ret->{$uidnext}->{'BODY[2.HEADER]'}, qr/\ADate: Sat, 18 Apr 2020 22:20:20 /, '2.HEADER of message/rfc822'); $ret = $mic->fetch_hash($uidnext, 'BODY[2.MIME]') or BAIL_OUT "2.MIME $@"; is($ret->{$uidnext}->{'BODY[2.MIME]'}, <search('SENTON' => '18-Apr-2020'); is_deeply(\@hits, [ $uidnext ], 'search with date condition works'); ok($mic->examine($ng), 'EXAMINE on dummy'); @hits = $mic->search('SENTSINCE' => '18-Apr-2020'); is_deeply(\@hits, [], 'search on dummy with condition works'); ok(!$mic->search('SENTSINCE' => '18-Abr-2020'), 'bad month fails'); $mic->Uid(0); my $ret = $mic->fetch_hash(2, 'RFC822'); is_deeply($ret, {}, 'MSN FETCH on empty dummy will not trigger warnings, later'); }; # for @ibx # message sequence numbers :< is($mic->Uid(0), 0, 'disable UID on '.ref($mic)); ok($mic->reconnect, 'reconnected'); $ret = $mic->fetch_hash('1,2:3', 'RFC822') or BAIL_OUT "FETCH $@"; is(scalar keys %$ret, 3, 'got all 3 messages with comma-separated sequence'); $ret = $mic->fetch_hash('1:*', 'RFC822') or BAIL_OUT "FETCH $@"; is(scalar keys %$ret, 3, 'got all 3 messages'); SKIP: { # do any clients use non-UID IMAP SEARCH? skip 'Xapian missing', 3 if $level eq 'basic'; my $x = $mic->search('all'); is_deeply($x, [1, 2, 3], 'MSN SEARCH works before rm'); $x = $mic->search(qw(header subject embedded)); is_deeply($x, [2], 'MSN SEARCH on Subject works before rm'); $x = $mic->search('FROM scraper@example.com'); is_deeply($x, [], "MSN SEARCH miss won't trigger warnings"); } { my $rdr = { 0 => \($ret->{1}->{RFC822}) }; my $env = { HOME => $ENV{HOME} }; my @cmd = qw(-learn rm --all); run_script(\@cmd, $env, $rdr) or BAIL_OUT('-learn rm'); } SKIP: { # do any clients use non-UID IMAP SEARCH? We only ensure # MSN "SEARCH" can return a result which can be retrieved # via MSN "FETCH" skip 'Xapian missing', 3 if $level eq 'basic'; my $x = $mic->search(qw(header subject embedded)); is(scalar(@$x), 1, 'MSN SEARCH on Subject works after rm'); SKIP: { skip 'previous test failed', 1 if !scalar(@$x); $x = $mic->message_string($x->[0]); is($x, $ret->{2}->{RFC822}, 'message 2 unchanged'); } $x = $mic->search(qw(text embedded)); is(scalar(@$x), 1, 'MSN SEARCH on TEXT works after rm'); } # FIXME? no EXPUNGE response, yet my $r2 = $mic->fetch_hash('1:*', 'BODY.PEEK[]') or BAIL_OUT "FETCH $@"; is(scalar keys %$r2, 2, 'did not get all 3 messages'); is($r2->{2}->{'BODY[]'}, $ret->{2}->{RFC822}, 'message 2 unchanged'); is($r2->{3}->{'BODY[]'}, $ret->{3}->{RFC822}, 'message 3 unchanged'); $r2 = $mic->fetch_hash(2, 'BODY.PEEK[HEADER.FIELDS (message-id)]') or BAIL_OUT "FETCH $@"; is($r2->{2}->{'BODY[HEADER.FIELDS (MESSAGE-ID)]'}, 'Message-ID: <20200418222508.GA13918@dcvr>'."\r\n\r\n", 'BODY.PEEK[HEADER.FIELDS ...] drops .PEEK'); { my @new_list = $mic->list; # tag differs in [-1] like($orig_list[-1], qr/\A\S+ OK List done\r\n/, 'orig LIST'); like($new_list[-1], qr/\A\S+ OK List done\r\n/, 'new LIST'); pop @new_list; pop @orig_list; # TODO: not sure if sort order matters, imapd_refresh_finalize # doesn't sort, hopefully clients don't care... @new_list = sort @new_list; @orig_list = sort @orig_list; is_deeply(\@new_list, \@orig_list, 'LIST identical'); } ok($mic->close, 'CLOSE works'); ok(!$mic->close, 'CLOSE not idempotent'); ok($mic->logout, 'logged out'); { my $c = tcp_connect($sock); $c->autoflush(1); like(<$c>, qr/\* OK/, 'got a greeting'); print $c "\r\n"; like(<$c>, qr/\A\* BAD Error in IMAP command/, 'empty line'); print $c "tagonly\r\n"; like(<$c>, qr/\Atagonly BAD Error in IMAP command/, 'tag-only line'); } SKIP: { use_ok 'PublicInbox::InboxIdle'; require_git('1.8.5', 1) or skip('git 1.8.5+ needed for --urlmatch', 4); my $old_env = { HOME => $ENV{HOME} }; my $home = "$tmpdir/watch_home"; mkdir $home or BAIL_OUT $!; mkdir "$home/.public-inbox" or BAIL_OUT $!; local $ENV{HOME} = $home; my $name = 'watchimap'; my $addr = "i1-$level\@example.com"; my $url = "http://example.com/i1"; my $inboxdir = "$tmpdir/watchimap"; my $cmd = ['-init', '-V2', '-Lbasic', $name, $inboxdir, $url, $addr]; my $imapurl = "imap://$ihost:$iport/inbox.i1.0"; run_script($cmd) or BAIL_OUT("init $name"); xsys(qw(git config), "--file=$home/.public-inbox/config", "publicinbox.$name.watch", $imapurl) == 0 or BAIL_OUT "git config $?"; my $cfg = PublicInbox::Config->new; PublicInbox::DS->Reset; my $ii = PublicInbox::InboxIdle->new($cfg); my $cb = sub { PublicInbox::DS->SetPostLoopCallback(sub {}) }; my $obj = bless \$cb, 'PublicInbox::TestCommon::InboxWakeup'; $cfg->each_inbox(sub { $_[0]->subscribe_unlock('ident', $obj) }); my $watcherr = "$tmpdir/watcherr"; open my $err_wr, '>>', $watcherr or BAIL_OUT $!; open my $err, '<', $watcherr or BAIL_OUT $!; my $w = start_script(['-watch'], undef, { 2 => $err_wr }); diag 'waiting for initial fetch...'; PublicInbox::DS::event_loop(); diag 'inbox unlocked on initial fetch, waiting for IDLE'; tick until (grep(/I: \S+ idling/, <$err>)); open my $fh, '<', 't/iso-2202-jp.eml' or BAIL_OUT $!; $old_env->{ORIGINAL_RECIPIENT} = $addr; ok(run_script([qw(-mda --no-precheck)], $old_env, { 0 => $fh }), 'delivered a message for IDLE to kick -watch') or diag "mda error \$?=$?"; diag 'waiting for IMAP IDLE wakeup'; PublicInbox::DS->SetPostLoopCallback(undef); PublicInbox::DS::event_loop(); diag 'inbox unlocked on IDLE wakeup'; # try again with polling xsys(qw(git config), "--file=$home/.public-inbox/config", 'imap.PollInterval', 0.11) == 0 or BAIL_OUT "git config $?"; $w->kill('HUP'); diag 'waiting for -watch reload + initial fetch'; tick until (grep(/I: will check/, <$err>)); open $fh, '<', 't/psgi_attach.eml' or BAIL_OUT $!; ok(run_script([qw(-mda --no-precheck)], $old_env, { 0 => $fh }), 'delivered a message for -watch PollInterval'); diag 'waiting for PollInterval wakeup'; PublicInbox::DS->SetPostLoopCallback(undef); PublicInbox::DS::event_loop(); diag 'inbox unlocked (poll)'; $w->kill; $w->join; is($?, 0, 'no error in exited -watch process'); $cfg->each_inbox(sub { shift->unsubscribe_unlock('ident') }); $ii->close; PublicInbox::DS->Reset; seek($err, 0, 0); my @err = grep(!/^(?:I:|#)/, <$err>); is(@err, 0, 'no warnings/errors from -watch'.join(' ', @err)); if ($ENV{TEST_KILL_IMAPD}) { # not sure how reliable this test can be xsys(qw(git config), "--file=$home/.public-inbox/config", qw(--unset imap.PollInterval)) == 0 or BAIL_OUT "git config $?"; truncate($err_wr, 0) or BAIL_OUT $!; my @t0 = times; $w = start_script(['-watch'], undef, { 2 => $err_wr }); seek($err, 0, 0); tick until (grep(/I: \S+ idling/, <$err>)); diag 'killing imapd, waiting for CPU spins'; my $delay = 0.11; $td->kill(9); tick $delay; $w->kill; $w->join; is($?, 0, 'no error in exited -watch process'); my @t1 = times; my $c = $t1[2] + $t1[3] - $t0[2] - $t0[3]; my $thresh = (0.9 * $delay); diag "c=$c, threshold=$thresh"; ok($c < $thresh, 'did not burn much CPU'); is_deeply([grep(/ line \d+$/m, <$err>)], [], 'no backtraces from errors'); } } $td->kill; $td->join; is($?, 0, 'no error in exited process') if !$ENV{TEST_KILL_IMAPD}; open my $fh, '<', $err or BAIL_OUT("open $err failed: $!"); my $eout = do { local $/; <$fh> }; unlike($eout, qr/wide/i, 'no Wide character warnings'); unlike($eout, qr/uninitialized/i, 'no uninitialized warnings'); done_testing; public-inbox-1.9.0/t/import.t000066400000000000000000000077371430031475700161000ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::Smsg; use PublicInbox::Git; use PublicInbox::Import; use Fcntl qw(:DEFAULT SEEK_SET); use PublicInbox::TestCommon; use MIME::Base64 3.05; # Perl 5.10.0 / 5.9.2 my ($dir, $for_destroy) = tmpdir(); my $git = PublicInbox::Git->new($dir); my $im = PublicInbox::Import->new($git, 'testbox', 'test@example'); $im->init_bare; my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: b@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $v2 = require_git(2.6, 1); my $smsg = bless {}, 'PublicInbox::Smsg' if $v2; like($im->add($mime, undef, $smsg), qr/\A:[0-9]+\z/, 'added one message'); if ($v2) { like($smsg->{blob}, qr/\A[a-f0-9]{40}\z/, 'got last object_id'); my @cmd = ('git', "--git-dir=$git->{git_dir}", qw(hash-object --stdin)); open my $in, '+<', undef or BAIL_OUT "open(+<): $!"; print $in $mime->as_string or die "write failed: $!"; $in->flush or die "flush failed: $!"; seek($in, 0, SEEK_SET) or die "seek: $!"; chomp(my $hashed_obj = xqx(\@cmd, undef, { 0 => $in })); is($?, 0, 'hash-object'); is($hashed_obj, $smsg->{blob}, "blob object_id matches exp"); } $im->done; my @revs = $git->qx(qw(rev-list HEAD)); is(scalar @revs, 1, 'one revision created'); my $odd = '"=?iso-8859-1?Q?J_K=FCpper?= header_set('From', $odd); $mime->header_set('Message-ID', ''); $mime->header_set('Subject', 'msg2'); like($im->add($mime, sub { $mime }), qr/\A:\d+\z/, 'added 2nd message'); $im->done; @revs = $git->qx(qw(rev-list HEAD)); is(scalar @revs, 2, '2 revisions exist'); is($im->add($mime), undef, 'message only inserted once'); $im->done; @revs = $git->qx(qw(rev-list HEAD)); is(scalar @revs, 2, '2 revisions exist'); foreach my $c ('c'..'z') { $mime->header_set('Message-ID', "<$c\@example.com>"); $mime->header_set('Subject', "msg - $c"); like($im->add($mime), qr/\A:\d+\z/, "added $c message"); } $im->done; @revs = $git->qx(qw(rev-list HEAD)); is(scalar @revs, 26, '26 revisions exist after mass import'); my ($mark, $msg) = $im->remove($mime); like($mark, qr/\A:\d+\z/, 'got mark'); like(ref($msg), qr/\bPublicInbox::(?:Eml|MIME)\b/, 'got old message deleted'); is(undef, $im->remove($mime), 'remove is idempotent'); # mismatch on identical Message-ID $mime->header_set('Message-ID', ''); ($mark, $msg) = $im->remove($mime); is($mark, 'MISMATCH', 'mark == MISMATCH on mismatch'); is($msg->header('Message-ID'), '', 'Message-ID matches'); isnt($msg->header('Subject'), $mime->header('Subject'), 'subject mismatch'); $mime->header_set('Message-Id', ''); is($im->add($mime, sub { undef }), undef, 'check callback fails'); is($im->remove($mime), undef, 'message not added, so not removed'); is(undef, $im->checkpoint, 'checkpoint works before ->done'); $im->done; is(undef, $im->checkpoint, 'checkpoint works after ->done'); $im->checkpoint; my $nogit = PublicInbox::Git->new("$dir/non-existent/dir"); eval { my $nope = PublicInbox::Import->new($nogit, 'nope', 'no@example.com'); $nope->add($mime); }; ok($@, 'Import->add fails on non-existent dir'); my @cls = qw(PublicInbox::Eml); SKIP: { require_mods('PublicInbox::MIME', 1); push @cls, 'PublicInbox::MIME'; }; $main::badchars = "\n\0\r"; my $from = '=?UTF-8?B?'. encode_base64("B\ra\nd\0\$main::badchars", ''). '?='; for my $cls (@cls) { my $eml = $cls->new(< Message-ID: <$cls\@example.com> EOF ok($im->add($eml), "added $cls message with nasty char in From"); } $im->done; my $bref = $git->cat_file('HEAD'); like($$bref, qr/^author Ba d \$main::badchars /sm, 'latest commit accepted by spammer'); $git->qx(qw(fsck --no-progress --strict)); is($?, 0, 'fsck reported no errors'); $main::badchars = undef; done_testing(); public-inbox-1.9.0/t/inbox.t000066400000000000000000000032761430031475700156770ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use_ok 'PublicInbox::Inbox'; use File::Temp 0.19 (); my $x = PublicInbox::Inbox->new({url => [ '//example.com/test/' ]}); is($x->base_url, 'https://example.com/test/', 'expanded protocol-relative'); $x = PublicInbox::Inbox->new({url => [ 'http://example.com/test' ]}); is($x->base_url, 'http://example.com/test/', 'added trailing slash'); $x = PublicInbox::Inbox->new({}); is($x->base_url, undef, 'undef base_url allowed'); my $tmpdir = File::Temp->newdir('pi-inbox-XXXX', TMPDIR => 1); $x->{inboxdir} = $tmpdir->dirname; is_deeply($x->cloneurl, [], 'no cloneurls'); is($x->description, '($INBOX_DIR/description missing)', 'default description'); { open my $fh, '>', "$x->{inboxdir}/cloneurl" or die; print $fh "https://example.com/inbox\n" or die; close $fh or die; open $fh, '>', "$x->{inboxdir}/description" or die; print $fh "\xc4\x80blah\n" or die; close $fh or die; } is_deeply($x->cloneurl, ['https://example.com/inbox'], 'cloneurls update'); ok(utf8::valid($x->description), 'description is utf8::valid'); is($x->description, "\x{100}blah", 'description updated'); is(unlink(glob("$x->{inboxdir}/*")), 2, 'unlinked cloneurl & description'); is_deeply($x->cloneurl, ['https://example.com/inbox'], 'cloneurls memoized'); is($x->description, "\x{100}blah", 'description memoized'); $x->{name} = "2\x{100}wide"; $x->{newsgroup} = '2.wide'; like($x->mailboxid, qr/\AM32c48077696465-[0-9a-f]+\z/, '->mailboxid w/o slice (JMAP)'); like($x->mailboxid(78), qr/\AM322e77696465-4e-[0-9a-f]+\z/, '->mailboxid w/ slice (IMAP)'); done_testing(); public-inbox-1.9.0/t/inbox_idle.t000066400000000000000000000035411430031475700166670ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Config; require_git 2.6; require_mods(qw(DBD::SQLite)); require PublicInbox::SearchIdx; use_ok 'PublicInbox::InboxIdle'; my ($tmpdir, $for_destroy) = tmpdir(); for my $V (1, 2) { my $inboxdir = "$tmpdir/$V"; my $ibx = create_inbox "idle$V", tmpdir => $inboxdir, version => $V, indexlevel => 'basic', -no_gc => 1, sub { my ($im, $ibx) = @_; # capture $im->done; $ibx->init_inbox(0); $_[0] = undef; return if $V != 1; my $sidx = PublicInbox::SearchIdx->new($ibx, 1); $sidx->idx_acquire; $sidx->set_metadata_once; $sidx->idx_release; # allow watching on lockfile }; my $obj = InboxIdleTestObj->new; my $pi_cfg = PublicInbox::Config->new(\<{-primary_address} EOF my $ident = 'whatever'; $pi_cfg->each_inbox(sub { shift->subscribe_unlock($ident, $obj) }); my $ii = PublicInbox::InboxIdle->new($pi_cfg); ok($ii, 'InboxIdle created'); SKIP: { skip('inotify or kqueue missing', 1) unless $ii->{sock}; ok(fileno($ii->{sock}) >= 0, 'fileno() gave valid FD'); } my $im = $ibx->importer(0); ok($im->add(eml_load('t/utf8.eml')), "$V added"); $im->done; $ii->event_step; is(scalar @{$obj->{called}}, 1, 'called on unlock'); $pi_cfg->each_inbox(sub { shift->unsubscribe_unlock($ident) }); ok($im->add(eml_load('t/data/0001.patch')), "$V added #2"); $im->done; $ii->event_step; is(scalar @{$obj->{called}}, 1, 'not called when unsubbed'); $ii->close; } done_testing; package InboxIdleTestObj; use strict; sub new { bless {}, shift } sub on_inbox_unlock { my ($self, $ibx) = @_; push @{$self->{called}}, $ibx; } public-inbox-1.9.0/t/index-git-times.t000066400000000000000000000066631430031475700175720ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Config; use PublicInbox::Admin; use PublicInbox::Import; use File::Path qw(remove_tree); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::Over'; my ($tmpdir, $for_destroy) = tmpdir(); local $ENV{PI_CONFIG} = "$tmpdir/cfg"; my $v1dir = "$tmpdir/v1"; my $addr = 'x@example.com'; my $default_branch = PublicInbox::Import::default_branch; run_script(['-init', '--indexlevel=medium', 'v1', $v1dir, 'http://example.com/x', $addr]) or die "init failed"; { my $data = <<"EOF"; blob mark :1 data 133 From: timeless To: x Subject: can I haz the time? Message-ID: <19700101000000-1234\@example.com> plz reset $default_branch commit $default_branch mark :2 author timeless 749520000 +0100 committer x 1285977600 -0100 data 20 can I haz the time? M 100644 :1 53/256f6177504c2878d3a302ef5090dacf5e752c EOF pipe(my($r, $w)) or die; length($data) <= 512 or die "data too large to fit in POSIX pipe"; print $w $data or die; close $w or die; my $cmd = ['git', "--git-dir=$v1dir", 'fast-import', '--quiet']; xsys_e($cmd, undef, { 0 => $r }); } run_script(['-index', '--skip-docdata', $v1dir]) or die 'v1 index failed'; my $smsg; { my $cfg = PublicInbox::Config->new; my $ibx = $cfg->lookup($addr); my $lvl = PublicInbox::Admin::detect_indexlevel($ibx); is($lvl, 'medium', 'indexlevel detected'); is($ibx->{-skip_docdata}, 1, '--skip-docdata flag set on -index'); $smsg = $ibx->over->get_art(1); is($smsg->{ds}, 749520000, 'datestamp from git author time'); is($smsg->{ts}, 1285977600, 'timestamp from git committer time'); my $mset = $ibx->search->mset("m:$smsg->{mid}"); is($mset->size, 1, 'got one result for m:'); my $res = $ibx->search->mset_to_smsg($ibx, $mset); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian stored datestamp'); $mset = $ibx->search->mset('d:19931002..19931002'); $res = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar @$res, 1, 'got one result for d:'); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian search on datestamp'); } SKIP: { require_git(2.6, 1) or skip('git 2.6+ required for v2', 10); my $v2dir = "$tmpdir/v2"; run_script(['-convert', $v1dir, $v2dir]) or die 'v2 conversion failed'; my $check_v2 = sub { my $ibx = PublicInbox::Inbox->new({inboxdir => $v2dir, address => $addr}); my $lvl = PublicInbox::Admin::detect_indexlevel($ibx); is($lvl, 'medium', 'indexlevel detected after convert'); is($ibx->{-skip_docdata}, 1, '--skip-docdata preserved after convert'); my $v2smsg = $ibx->over->get_art(1); is($v2smsg->{ds}, $smsg->{ds}, 'v2 datestamp from git author time'); is($v2smsg->{ts}, $smsg->{ts}, 'v2 timestamp from git committer time'); my $mset = $ibx->search->mset("m:$smsg->{mid}"); my $res = $ibx->search->mset_to_smsg($ibx, $mset); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian stored datestamp'); $mset = $ibx->search->mset('d:19931002..19931002'); $res = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar @$res, 1, 'got one result for d:'); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian search on datestamp'); }; $check_v2->(); remove_tree($v2dir); # test non-parallelized conversion run_script(['-convert', '-j0', $v1dir, $v2dir]) or die 'v2 conversion failed'; $check_v2->(); } done_testing; public-inbox-1.9.0/t/indexlevels-mirror-v1.t000066400000000000000000000003111430031475700207210ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ local $ENV{PI_TEST_VERSION} = 1; require './t/indexlevels-mirror.t'; public-inbox-1.9.0/t/indexlevels-mirror.t000066400000000000000000000133631430031475700204100ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::Inbox; require PublicInbox::Admin; my $PI_TEST_VERSION = $ENV{PI_TEST_VERSION} || 2; require_git('2.6') if $PI_TEST_VERSION == 2; require_mods(qw(DBD::SQLite)); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $import_index_incremental = sub { my ($v, $level, $mime) = @_; my $err = ''; my $this = "pi-$v-$level-indexlevels"; my ($tmpdir, $for_destroy) = tmpdir(); my $ibx = create_inbox "testbox$v", indexlevel => $level, version => $v, tmpdir => "$tmpdir/v$v", sub { $mime->header_set('Message-ID', ''); $_[0]->add($mime) or BAIL_OUT; }; my $im = $ibx->importer(0); local $ENV{PI_CONFIG} = "$tmpdir/config"; # index master (required for v1) my @cmd = (qw(-index -j0 --dangerous), $ibx->{inboxdir}, "-L$level"); push @cmd, '-c' if have_xapian_compact; ok(run_script(\@cmd, undef, { 2 => \$err }), 'index master'); my $ro_master = PublicInbox::Inbox->new({ inboxdir => $ibx->{inboxdir}, indexlevel => $level }); my $msgs = $ro_master->recent; is(scalar(@$msgs), 1, 'only one message in master, so far'); is($msgs->[0]->{mid}, 'm@1', 'first message in master indexed'); # clone @cmd = (qw(git clone --mirror -q)); my $mirror = "$tmpdir/mirror-$v"; if ($v == 1) { push @cmd, $ibx->{inboxdir}, $mirror; } else { push @cmd, "$ibx->{inboxdir}/git/0.git", "$mirror/git/0.git"; } my $fetch_dir = $cmd[-1]; is(xsys(@cmd), 0, "v$v clone OK"); # inbox init local $ENV{PI_CONFIG} = "$tmpdir/.picfg"; @cmd = ('-init', '-L', $level, 'mirror', $mirror, '//example.com/test', 'test@example.com'); push @cmd, '-V2' if $v == 2; ok(run_script(\@cmd), "v$v init OK"); # index mirror ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror OK"); # read-only access my $ro_mirror = PublicInbox::Inbox->new({ inboxdir => $mirror, indexlevel => $level, }); $msgs = $ro_mirror->recent; is(scalar(@$msgs), 1, 'only one message, so far'); is($msgs->[0]->{mid}, 'm@1', 'read first message'); # update master $mime->header_set('Message-ID', ''); ok($im->add($mime), '2nd message added'); $im->done; # mirror updates is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK"); $msgs = $ro_mirror->recent; is(scalar(@$msgs), 2, '2nd message seen in mirror'); is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], ['m@1','m@2'], 'got both messages in mirror'); # incremental index master (required for v1) ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]), 'index master OK'); $msgs = $ro_master->recent; is(scalar(@$msgs), 2, '2nd message seen in master'); is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], ['m@1','m@2'], 'got both messages in master'); my @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; is_deeply(\@rw_nums, [1, 2], 'master has expected NNTP articles'); my @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)}; is_deeply(\@ro_nums, [1, 2], 'mirror has expected NNTP articles'); # remove message from master ok($im->remove($mime), '2nd message removed'); $im->done; @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; is_deeply(\@rw_nums, [1], 'unindex NNTP article'.$v.$level); if ($level ne 'basic') { ok(run_script(['-xcpdb', '-q', $mirror]), "v$v xcpdb OK"); is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level, 'indexlevel detectable by Admin after xcpdb v' .$v.$level); delete $ro_mirror->{$_} for (qw(over search)); my $mset = $ro_mirror->search->mset('m:m@2'); is($mset->size, 1, "v$v found m\@2 via Xapian on $level"); } # sync the mirror is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK"); $msgs = $ro_mirror->recent; is(scalar(@$msgs), 1, '2nd message gone from mirror'); is_deeply([map { $_->{mid} } @$msgs], ['m@1'], 'message unavailable in mirror'); if ($v == 2 && $level eq 'basic') { is_deeply([glob("$ibx->{inboxdir}/xap*/?/")], [], 'no Xapian shard directories for v2 basic'); } if ($level ne 'basic') { my $mset = $ro_mirror->search->reopen->mset('m:m@2'); is($mset->size, 0, "v$v m\@2 gone from Xapian in mirror on $level"); } # add another message to master and have the mirror # sync and reindex it my @expect = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; foreach my $i (3..5) { $mime->header_set('Message-ID', ""); ok($im->add($mime), "#$i message added"); push @expect, $i; } $im->done; is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script([qw(-index -j0 --reindex), $mirror]), "v$v index --reindex mirror OK"); @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)}; @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; is_deeply(\@rw_nums, \@expect, "v$v master has expected NNTP articles"); is_deeply(\@ro_nums, \@expect, "v$v mirror matches master articles"); is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level, 'indexlevel detectable by Admin '.$v.$level); SKIP: { skip 'xapian-compact missing', 1 if !have_xapian_compact; my $cmd = [ qw(-compact), $mirror ]; ok(run_script($cmd, undef, { 2 => \$err}), "compact $level") or diag $err; } }; # we can probably cull some other tests $import_index_incremental->($PI_TEST_VERSION, 'basic', $mime); SKIP: { require_mods(qw(Search::Xapian), 2); foreach my $l (qw(medium full)) { $import_index_incremental->($PI_TEST_VERSION, $l, $mime); } } done_testing(); public-inbox-1.9.0/t/init.t000066400000000000000000000211761430031475700155220ustar00rootroot00000000000000# Copyright (C) 2014-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Config; use PublicInbox::TestCommon; use PublicInbox::Admin; my ($tmpdir, $for_destroy) = tmpdir(); sub quiet_fail { my ($cmd, $msg) = @_; my $err = ''; ok(!run_script($cmd, undef, { 2 => \$err, 1 => \$err }), $msg); } { local $ENV{PI_DIR} = "$tmpdir/.public-inbox/"; my $cfgfile = "$ENV{PI_DIR}/config"; my $cmd = [ '-init', 'blist', "$tmpdir/blist", qw(http://example.com/blist blist@example.com) ]; ok(run_script($cmd), 'public-inbox-init OK'); is(read_indexlevel('blist'), '', 'indexlevel unset by default'); ok(-e $cfgfile, "config exists, now"); ok(run_script($cmd), 'public-inbox-init OK (idempotent)'); chmod 0666, $cfgfile or die "chmod failed: $!"; $cmd = [ '-init', 'clist', "$tmpdir/clist", qw(http://example.com/clist clist@example.com)]; ok(run_script($cmd), 'public-inbox-init clist OK'); is((stat($cfgfile))[2] & 07777, 0666, "permissions preserved"); $cmd = [ '-init', 'clist', '-V2', "$tmpdir/clist", qw(http://example.com/clist clist@example.com) ]; quiet_fail($cmd, 'attempting to init V2 from V1 fails'); ok(!-e "$cfgfile.lock", 'no lock leftover after init'); open my $lock, '+>', "$cfgfile.lock" or die; $cmd = [ '-init', 'lock', "$tmpdir/lock", qw(http://example.com/lock lock@example.com) ]; ok(-e "$cfgfile.lock", 'lock exists'); # this calls exit(): my $err = ''; ok(!run_script($cmd, undef, {2 => \$err}), 'lock init failed'); is($? >> 8, 255, 'got expected exit code on lock failure'); ok(unlink("$cfgfile.lock"), '-init did not unlink lock on failure'); my @init_args = ('i', "$tmpdir/i", qw(http://example.com/i i@example.com)); $cmd = [ qw(-init -c .bogus=val), @init_args ]; quiet_fail($cmd, 'invalid -c KEY=VALUE fails'); $cmd = [ qw(-init -c .bogus=val), @init_args ]; quiet_fail($cmd, '-c KEY-only fails'); $cmd = [ qw(-init -c address=clist@example.com), @init_args ]; quiet_fail($cmd, '-c address=CONFLICTING-VALUE fails'); $cmd = [ qw(-init -c no=problem -c no=problemo), @init_args ]; ok(run_script($cmd), '-c KEY=VALUE runs'); my $env = { GIT_CONFIG => "$ENV{PI_DIR}/config" }; chomp(my @v = xqx([qw(git config --get-all publicinbox.i.no)], $env)); is_deeply(\@v, [ qw(problem problemo) ]) or xbail(\@v); ok(run_script($cmd), '-c KEY=VALUE runs idempotently'); chomp(my @v2 = xqx([qw(git config --get-all publicinbox.i.no)], $env)); is_deeply(\@v, \@v2, 'nothing repeated') or xbail(\@v2); ok(run_script([@$cmd, '-c', 'no=more']), '-c KEY=VALUE addendum'); chomp(@v = xqx([qw(git config --get-all publicinbox.i.no)], $env)); is_deeply(\@v, [ qw(problem problemo more) ]) or xbail(\@v); ok(run_script([@$cmd, '-c', 'no=problem']), '-c KEY=VALUE repeated'); chomp(@v = xqx([qw(git config --get-all publicinbox.i.no)], $env)); is_deeply(\@v, [ qw(problem problemo more) ]) or xbail(\@v); ok(run_script([@$cmd, '-c', 'address=j@example.com']), '-c KEY=VALUE address'); chomp(@v = xqx([qw(git config --get-all publicinbox.i.address)], $env)); is_deeply(\@v, [ qw(i@example.com j@example.com) ], 'extra address added via -c KEY=VALUE'); } { my $env = { PI_DIR => "$tmpdir/.public-inbox/" }; my $rdr = { 2 => \(my $err = '') }; my $cmd = [ '-init', 'alist', "$tmpdir/a\nlist", qw(http://example.com/alist alist@example.com) ]; ok(!run_script($cmd, $env, $rdr), 'public-inbox-init rejects LF in inboxdir'); like($err, qr/`\\n' not allowed in `/s, 'reported \\n'); is_deeply([glob("$tmpdir/.public-inbox/pi-init-*")], [], 'no junk files left behind'); # "git init" does this, too $cmd = [ '-init', 'deep-non-existent', "$tmpdir/a/b/c/d", qw(http://example.com/abcd abcd@example.com) ]; $err = ''; my $umask = umask(022) // xbail "umask: $!"; ok(run_script($cmd, $env, $rdr), 'initializes non-existent hierarchy'); umask($umask) // xbail "umask: $!"; ok(-d "$tmpdir/a/b/c/d", 'directory created'); my $desc = "$tmpdir/a/b/c/d/description"; is(PublicInbox::Inbox::try_cat($desc), "public inbox for abcd\@example.com\n", 'description set'); my $mode = (stat($desc))[2]; is(sprintf('0%03o', $mode & 0777), '0644', 'description respects umask'); open my $fh, '>', "$tmpdir/d" or BAIL_OUT "open: $!"; close $fh; $cmd = [ '-init', 'd-f-conflict', "$tmpdir/d/f/conflict", qw(http://example.com/conflict onflict@example.com) ]; ok(!run_script($cmd, $env, $rdr), 'fails on D/F conflict'); } SKIP: { require_mods(qw(DBD::SQLite Search::Xapian), 2); require_git(2.6, 1) or skip "git 2.6+ required", 2; use_ok 'PublicInbox::Msgmap'; local $ENV{PI_DIR} = "$tmpdir/.public-inbox/"; local $ENV{PI_EMERGENCY} = "$tmpdir/.public-inbox/emergency"; my $cfgfile = "$ENV{PI_DIR}/config"; my $cmd = [ '-init', '-V2', 'v2list', "$tmpdir/v2list", qw(http://example.com/v2list v2list@example.com) ]; ok(run_script($cmd), 'public-inbox-init -V2 OK'); ok(-d "$tmpdir/v2list", 'v2list directory exists'); ok(-f "$tmpdir/v2list/msgmap.sqlite3", 'msgmap exists'); ok(-d "$tmpdir/v2list/all.git", 'catch-all.git directory exists'); $cmd = [ '-init', 'v2list', "$tmpdir/v2list", qw(http://example.com/v2list v2list@example.com) ]; ok(run_script($cmd), 'public-inbox-init is idempotent'); ok(! -d "$tmpdir/public-inbox" && !-d "$tmpdir/objects", 'idempotent invocation w/o -V2 does not make inbox v1'); is(read_indexlevel('v2list'), '', 'indexlevel unset by default'); $cmd = [ '-init', 'v2list', "-V1", "$tmpdir/v2list", qw(http://example.com/v2list v2list@example.com) ]; quiet_fail($cmd, 'initializing V2 as V1 fails'); foreach my $lvl (qw(medium basic)) { my $dir = "$tmpdir/v2$lvl"; $cmd = [ '-init', "v2$lvl", '-V2', '-L', $lvl, $dir, "http://example.com/v2$lvl", "v2$lvl\@example.com" ]; ok(run_script($cmd), "-init -L $lvl"); is(read_indexlevel("v2$lvl"), $lvl, "indexlevel set to '$lvl'"); my $ibx = PublicInbox::Inbox->new({ inboxdir => $dir }); is(PublicInbox::Admin::detect_indexlevel($ibx), $lvl, 'detected expected level w/o config'); ok(!$ibx->{-skip_docdata}, 'docdata written by default'); } for my $v (1, 2) { my $name = "v$v-skip-docdata"; my $dir = "$tmpdir/$name"; $cmd = [ '-init', $name, "-V$v", '--skip-docdata', $dir, "http://example.com/$name", "$name\@example.com" ]; ok(run_script($cmd), "-init -V$v --skip-docdata"); my $ibx = PublicInbox::Inbox->new({ inboxdir => $dir }); is(PublicInbox::Admin::detect_indexlevel($ibx), 'full', "detected default indexlevel -V$v"); ok($ibx->{-skip_docdata}, "docdata skip set -V$v"); ok($ibx->search->has_threadid, 'has_threadid flag set on new inbox'); } # loop for idempotency for (1..2) { $cmd = [ '-init', '-V2', '-S1', 'skip1', "$tmpdir/skip1", qw(http://example.com/skip1 skip1@example.com) ]; ok(run_script($cmd), "--skip-epoch 1"); my $gits = [ glob("$tmpdir/skip1/git/*.git") ]; is_deeply($gits, ["$tmpdir/skip1/git/1.git"], 'skip OK'); } $cmd = [ '-init', '-V2', '--skip-epoch=2', 'skip2', "$tmpdir/skip2", qw(http://example.com/skip2 skip2@example.com) ]; ok(run_script($cmd), "--skip-epoch 2"); my $gits = [ glob("$tmpdir/skip2/git/*.git") ]; is_deeply($gits, ["$tmpdir/skip2/git/2.git"], 'skipping 2 works, too'); xsys(qw(git config), "--file=$ENV{PI_DIR}/config", 'publicinboxmda.spamcheck', 'none') == 0 or BAIL_OUT "git config $?"; my $addr = 'skip3@example.com'; $cmd = [ qw(-init -V2 -Lbasic --skip-artnum=12 skip3), "$tmpdir/skip3", qw(http://example.com/skip3), $addr ]; ok(run_script($cmd), '--skip-artnum -V2'); my $env = { ORIGINAL_RECIPIENT => $addr }; my $mid = 'skip-artnum@example.com'; my $msg = "Message-ID: <$mid>\n\n"; my $rdr = { 0 => \$msg, 2 => \(my $err = '') }; ok(run_script([qw(-mda --no-precheck)], $env, $rdr), 'deliver V1'); diag "err=$err" if $err; my $mm = PublicInbox::Msgmap->new_file("$tmpdir/skip3/msgmap.sqlite3"); my $n = $mm->num_for($mid); is($n, 13, 'V2 NNTP article numbers skipped via --skip-artnum'); $addr = 'skip4@example.com'; $env = { ORIGINAL_RECIPIENT => $addr }; $cmd = [ qw(-init -V1 --skip-artnum 12 -Lmedium skip4), "$tmpdir/skip4", qw(http://example.com/skip4), $addr ]; ok(run_script($cmd), '--skip-artnum -V1'); $err = ''; ok(run_script([qw(-mda --no-precheck)], $env, $rdr), 'deliver V1'); diag "err=$err" if $err; $mm = PublicInbox::Msgmap->new_file( "$tmpdir/skip4/public-inbox/msgmap.sqlite3"); $n = $mm->num_for($mid); is($n, 13, 'V1 NNTP article numbers skipped via --skip-artnum'); } done_testing(); sub read_indexlevel { my ($inbox) = @_; my $cmd = [ qw(git config), "publicinbox.$inbox.indexlevel" ]; my $env = { GIT_CONFIG => "$ENV{PI_DIR}/config" }; chomp(my $lvl = xqx($cmd, $env)); $lvl; } public-inbox-1.9.0/t/ipc.t000066400000000000000000000135211430031475700153250ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use Fcntl qw(SEEK_SET); use Digest::SHA qw(sha1_hex); require_mods(qw(Storable||Sereal)); require_ok 'PublicInbox::IPC'; my ($tmpdir, $for_destroy) = tmpdir(); state $once = eval <<''; package PublicInbox::IPC; use strict; use Digest::SHA qw(sha1_hex); sub test_array { qw(test array) } sub test_scalar { 'scalar' } sub test_scalarref { \'scalarref' } sub test_undef { undef } sub test_die { shift; die @_; 'unreachable' } sub test_pid { $$ } sub test_write_each_fd { my ($self, @args) = @_; for my $fd (0..2) { print { $self->{$fd} } "i=$fd $$ ", @args, "\n"; $self->{$fd}->flush; } } sub test_sha { my ($self, $buf) = @_; print { $self->{1} } sha1_hex($buf), "\n"; $self->{1}->flush; } sub test_append_pid { my ($self, $file) = @_; open my $fh, '>>', $file or die "open: $!"; $fh->autoflush(1); print $fh "$$\n" or die "print: $!"; } 1; my $ipc = bless {}, 'PublicInbox::IPC'; my @t = qw(array scalar scalarref undef); my $test = sub { my $x = shift; for my $type (@t) { my $m = "test_$type"; my @ret = $ipc->ipc_do($m); my @exp = $ipc->$m; is_deeply(\@ret, \@exp, "wantarray $m $x"); $ipc->ipc_do($m); my $ret = $ipc->ipc_do($m); my $exp = $ipc->$m; is_deeply($ret, $exp, "!wantarray $m $x"); } my $ret = eval { $ipc->test_die('phail') }; my $exp = $@; $ret = eval { $ipc->ipc_do('test_die', 'phail') }; my $err = $@; my %lines; for ($err, $exp) { s/ line (\d+).*//s and $lines{$1}++; } is(scalar keys %lines, 1, 'line numbers match'); is((values %lines)[0], 2, '2 hits on same line number'); is($err, $exp, "$x die matches"); is($ret, undef, "$x die did not return"); eval { $ipc->test_die(['arrayref']) }; $exp = $@; $ret = eval { $ipc->ipc_do('test_die', ['arrayref']) }; $err = $@; is_deeply($err, $exp, 'die with unblessed ref'); is(ref($err), 'ARRAY', 'got an array ref'); $exp = bless ['blessed'], 'PublicInbox::WTF'; $ret = eval { $ipc->ipc_do('test_die', $exp) }; $err = $@; is_deeply($err, $exp, 'die with blessed ref'); is(ref($err), 'PublicInbox::WTF', 'got blessed ref'); }; $test->('local'); { my $pid = $ipc->ipc_worker_spawn('test worker'); ok($pid > 0 && kill(0, $pid), 'worker spawned and running'); defined($pid) or BAIL_OUT 'no spawn, no test'; is($ipc->ipc_do('test_pid'), $pid, 'worker pid returned'); $test->('worker'); $ipc->ipc_lock_init("$tmpdir/lock"); is($ipc->ipc_do('test_pid'), $pid, 'worker pid returned'); $ipc->ipc_worker_stop; ok(!kill(0, $pid) && $!{ESRCH}, 'worker stopped'); } $ipc->ipc_worker_stop; # idempotent # work queues pipe(my ($ra, $wa)) or BAIL_OUT $!; pipe(my ($rb, $wb)) or BAIL_OUT $!; pipe(my ($rc, $wc)) or BAIL_OUT $!; open my $warn, '+>', undef or BAIL_OUT; $warn->autoflush(0); local $SIG{__WARN__} = sub { print $warn "PID:$$ ", @_ }; my @ppids; open my $agpl, '<', 'COPYING' or BAIL_OUT "AGPL-3 missing: $!"; my $big = do { local $/; <$agpl> } // BAIL_OUT "read: $!"; close $agpl or BAIL_OUT "close: $!"; for my $t ('local', 'worker', 'worker again') { $ipc->wq_io_do('test_write_each_fd', [ $wa, $wb, $wc ], 'hello world'); my $i = 0; for my $fh ($ra, $rb, $rc) { my $buf = readline($fh); is(chop($buf), "\n", "trailing CR ($t)"); like($buf, qr/\Ai=$i \d+ hello world\z/, "got expected ($t)"); $i++; } $ipc->wq_io_do('test_die', [ $wa, $wb, $wc ]); $ipc->wq_io_do('test_sha', [ $wa, $wb ], 'hello world'); is(readline($rb), sha1_hex('hello world')."\n", "SHA small ($t)"); { my $bigger = $big x 10; # to hit EMSGSIZE $ipc->wq_io_do('test_sha', [ $wa, $wb ], $bigger); my $exp = sha1_hex($bigger)."\n"; is(readline($rb), $exp, "SHA big for EMSGSIZE ($t)"); # to hit the WQWorker recv_and_run length substr($bigger, my $MY_MAX_ARG_STRLEN = 4096 * 33, -1) = ''; $ipc->wq_io_do('test_sha', [ $wa, $wb ], $bigger); $exp = sha1_hex($bigger)."\n"; is(readline($rb), $exp, "SHA WQWorker limit ($t)"); } my $ppid = $ipc->wq_workers_start('wq', 1); push(@ppids, $ppid); } # wq_io_do works across fork (siblings can feed) SKIP: { skip 'Socket::MsgHdr or Inline::C missing', 3 if !$ppids[0]; is_deeply(\@ppids, [$$, undef, undef], 'parent pid returned in wq_workers_start'); my $pid = fork // BAIL_OUT $!; if ($pid == 0) { use POSIX qw(_exit); $ipc->wq_io_do('test_write_each_fd', [ $wa, $wb, $wc ], $$); _exit(0); } else { my $i = 0; my ($wpid, @rest) = keys %{$ipc->{-wq_workers}}; is(scalar(@rest), 0, 'only one worker'); for my $fh ($ra, $rb, $rc) { my $buf = readline($fh); is(chop($buf), "\n", "trailing CR #$i"); like($buf, qr/^i=$i $wpid $pid\z/, 'got expected from sibling'); $i++; } is(waitpid($pid, 0), $pid, 'waitpid complete'); is($?, 0, 'child wq producer exited'); } my @ary = $ipc->wq_do('test_array'); is_deeply(\@ary, [ qw(test array) ], 'wq_do wantarray'); is(my $s = $ipc->wq_do('test_scalar'), 'scalar', 'defined wantarray'); my $exp = bless ['blessed'], 'PublicInbox::WTF'; my $ret = eval { $ipc->wq_do('test_die', $exp) }; is_deeply($@, $exp, 'die with blessed ref'); } $ipc->wq_close; SKIP: { skip 'Socket::MsgHdr or Inline::C missing', 11 if !$ppids[0]; seek($warn, 0, SEEK_SET) or BAIL_OUT; my @warn = <$warn>; is(scalar(@warn), 3, 'warned 3 times'); like($warn[0], qr/ wq_io_do: /, '1st warned from wq_do'); like($warn[1], qr/ wq_worker: /, '2nd warned from wq_worker'); is($warn[2], $warn[1], 'worker did not die'); $SIG{__WARN__} = 'DEFAULT'; is($ipc->wq_workers_start('wq', 2), $$, 'workers started again'); $ipc->wq_broadcast('test_append_pid', "$tmpdir/append_pid"); $ipc->wq_close; open my $fh, '<', "$tmpdir/append_pid" or BAIL_OUT "open: $!"; chomp(my @pids = <$fh>); my %pids = map { $_ => 1 } grep(/\A[0-9]+\z/, @pids); is(scalar keys %pids, 2, 'broadcast hit both PIDs'); } done_testing; public-inbox-1.9.0/t/iso-2202-jp.eml000066400000000000000000000004071430031475700166470ustar00rootroot00000000000000Message-Id: <199707281508.AAA24167@hoyogw.example> Date: Tue, 29 Jul 97 00:08:29 +0900 From: matz@example.com Subject: [ruby-dev:4] To: ruby-dev@example Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-2022-JP |$B$1$$$8$e!w:#$O%U%j!<(B(^^;;;$B$G$9(B. public-inbox-1.9.0/t/kqnotify.t000066400000000000000000000026141430031475700164170ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # # Ensure KQNotify can pick up rename(2) and link(2) operations # used by Maildir writing tools use strict; use Test::More; use PublicInbox::TestCommon; plan skip_all => 'KQNotify is only for *BSD systems' if $^O !~ /bsd/; require_mods('IO::KQueue'); use_ok 'PublicInbox::KQNotify'; my ($tmpdir, $for_destroy) = tmpdir(); mkdir "$tmpdir/new" or BAIL_OUT "mkdir: $!"; open my $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!"; close $fh or BAIL_OUT "close: $!"; my $kqn = PublicInbox::KQNotify->new; my $mask = PublicInbox::KQNotify::MOVED_TO_OR_CREATE(); my $w = $kqn->watch("$tmpdir/new", $mask); rename("$tmpdir/tst", "$tmpdir/new/tst") or BAIL_OUT "rename: $!"; my $hit = [ map { $_->fullname } $kqn->read ]; is_deeply($hit, ["$tmpdir/new/tst"], 'rename(2) detected (via NOTE_EXTEND)'); open $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!"; close $fh or BAIL_OUT "close: $!"; link("$tmpdir/tst", "$tmpdir/new/link") or BAIL_OUT "link: $!"; $hit = [ grep m!/link$!, map { $_->fullname } $kqn->read ]; is_deeply($hit, ["$tmpdir/new/link"], 'link(2) detected (via NOTE_WRITE)'); $w->cancel; link("$tmpdir/new/tst", "$tmpdir/new/link2") or BAIL_OUT "link: $!"; $hit = [ map { $_->fullname } $kqn->read ]; is_deeply($hit, [], 'link(2) not detected after cancel'); done_testing; public-inbox-1.9.0/t/lei-auto-watch.t000066400000000000000000000034111430031475700173720ustar00rootroot00000000000000#!perl -w # Copyright all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use File::Basename qw(basename); plan skip_all => "TEST_FLAKY not enabled for $0" if !$ENV{TEST_FLAKY}; my $have_fast_inotify = eval { require Linux::Inotify2 } || eval { require IO::KQueue }; $have_fast_inotify or diag("$0 IO::KQueue or Linux::Inotify2 missing, test will be slow"); test_lei(sub { my ($ro_home, $cfg_path) = setup_public_inboxes; my $x = "$ENV{HOME}/x"; my $y = "$ENV{HOME}/y"; lei_ok qw(add-external), "$ro_home/t1"; lei_ok qw(q mid:testmessage@example.com -o), $x; lei_ok qw(q mid:testmessage@example.com -o), $y; my @x = glob("$x/cur/*"); my @y = glob("$y/cur/*"); scalar(@x) == 1 or xbail 'expected 1 file', \@x; scalar(@y) == 1 or xbail 'expected 1 file', \@y; my $oid = '9bf1002c49eb075df47247b74d69bcd555e23422'; lei_ok qw(inspect), "blob:$oid"; my $ins = json_utf8->decode($lei_out); my $exp = { "maildir:$x" => [ map { basename($_) } @x ], "maildir:$y" => [ map { basename($_) } @y ] }; is_deeply($ins->{'mail-sync'}, $exp, 'inspect as expected'); lei_ok qw(add-watch), $x; my $dst = $x[0] . 'S'; rename($x[0], $dst) or xbail "rename($x[0], $dst): $!"; my $ys = "$y[0]S"; for (0..50) { last if -f $ys; tick; # wait for inotify or kevent } my @y2 = glob("$y/*/*"); is_deeply(\@y2, [ $ys ], "`seen' kw propagated to `y' dir"); lei_ok qw(note-event done); lei_ok qw(inspect), "blob:$oid"; $ins = json_utf8->decode($lei_out); $exp = { "maildir:$x" => [ map { basename($_) } glob("$x/*/*") ], "maildir:$y" => [ map { basename($_) } glob("$y/*/*") ] }; is_deeply($ins->{'mail-sync'}, $exp, 'mail_sync matches FS') or diag explain($ins); }); done_testing; public-inbox-1.9.0/t/lei-convert.t000066400000000000000000000112511430031475700167770ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::MboxReader; use PublicInbox::MdirReader; use PublicInbox::NetReader; use PublicInbox::Eml; use IO::Uncompress::Gunzip; require_mods(qw(lei -imapd -nntpd Mail::IMAPClient Net::NNTP)); my ($tmpdir, $for_destroy) = tmpdir; my $sock = tcp_server; my $cmd = [ '-imapd', '-W0', "--stdout=$tmpdir/i1", "--stderr=$tmpdir/i2" ]; my ($ro_home, $cfg_path) = setup_public_inboxes; my $env = { PI_CONFIG => $cfg_path }; my $tdi = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-imapd: $?"); my $imap_host_port = tcp_host_port($sock); $sock = tcp_server; $cmd = [ '-nntpd', '-W0', "--stdout=$tmpdir/n1", "--stderr=$tmpdir/n2" ]; my $tdn = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-nntpd: $?"); my $nntp_host_port = tcp_host_port($sock); undef $sock; test_lei({ tmpdir => $tmpdir }, sub { my $d = $ENV{HOME}; lei_ok('convert', '-o', "mboxrd:$d/foo.mboxrd", "imap://$imap_host_port/t.v2.0"); ok(-f "$d/foo.mboxrd", 'mboxrd created from imap://'); lei_ok('convert', '-o', "mboxrd:$d/nntp.mboxrd", "nntp://$nntp_host_port/t.v2"); ok(-f "$d/nntp.mboxrd", 'mboxrd created from nntp://'); my (@mboxrd, @mboxcl2); open my $fh, '<', "$d/foo.mboxrd" or BAIL_OUT $!; PublicInbox::MboxReader->mboxrd($fh, sub { push @mboxrd, shift }); ok(scalar(@mboxrd) > 1, 'got multiple messages'); open $fh, '<', "$d/nntp.mboxrd" or BAIL_OUT $!; my $i = 0; PublicInbox::MboxReader->mboxrd($fh, sub { my ($eml) = @_; is($eml->body, $mboxrd[$i]->body, "body matches #$i"); $i++; }); lei_ok('convert', '-o', "mboxcl2:$d/cl2", "mboxrd:$d/foo.mboxrd"); ok(-s "$d/cl2", 'mboxcl2 non-empty') or diag $lei_err; open $fh, '<', "$d/cl2" or BAIL_OUT $!; PublicInbox::MboxReader->mboxcl2($fh, sub { my $eml = shift; $eml->header_set($_) for (qw(Content-Length Lines)); push @mboxcl2, $eml; }); is_deeply(\@mboxcl2, \@mboxrd, 'mboxrd and mboxcl2 have same mail'); lei_ok('convert', '-o', "$d/md", "mboxrd:$d/foo.mboxrd"); ok(-d "$d/md", 'Maildir created'); my @md; PublicInbox::MdirReader->new->maildir_each_eml("$d/md", sub { push @md, $_[2]; }); is(scalar(@md), scalar(@mboxrd), 'got expected emails in Maildir') or diag $lei_err; @md = sort { ${$a->{bdy}} cmp ${$b->{bdy}} } @md; @mboxrd = sort { ${$a->{bdy}} cmp ${$b->{bdy}} } @mboxrd; my @rd_nostatus = map { my $eml = PublicInbox::Eml->new(\($_->as_string)); $eml->header_set('Status'); $eml; } @mboxrd; is_deeply(\@md, \@rd_nostatus, 'Maildir output matches mboxrd'); my @bar; lei_ok('convert', '-o', "mboxrd:$d/bar.mboxrd", "$d/md"); open $fh, '<', "$d/bar.mboxrd" or BAIL_OUT $!; PublicInbox::MboxReader->mboxrd($fh, sub { push @bar, shift }); @bar = sort { ${$a->{bdy}} cmp ${$b->{bdy}} } @bar; is_deeply(\@mboxrd, \@bar, 'mboxrd round-tripped through Maildir w/ flags'); open my $in, '<', "$d/foo.mboxrd" or BAIL_OUT; my $rdr = { 0 => $in, 1 => \(my $out), 2 => \$lei_err }; lei_ok([qw(convert --stdin -F mboxrd -o mboxrd:/dev/stdout)], undef, $rdr); open $fh, '<', "$d/foo.mboxrd" or BAIL_OUT; my $exp = do { local $/; <$fh> }; is($out, $exp, 'stdin => stdout'); lei_ok qw(convert -F eml -o mboxcl2:/dev/fd/1 t/plack-qp.eml); open $fh, '<', \$lei_out or BAIL_OUT; @bar = (); PublicInbox::MboxReader->mboxcl2($fh, sub { my $eml = shift; for my $h (qw(Content-Length Lines)) { ok(defined($eml->header_raw($h)), "$h defined for mboxcl2"); $eml->header_set($h); } push @bar, $eml; }); my $qp_eml = eml_load('t/plack-qp.eml'); $qp_eml->header_set('Status', 'O'); is_deeply(\@bar, [ $qp_eml ], 'eml => mboxcl2'); lei_ok qw(convert t/plack-qp.eml -o), "mboxrd:$d/qp.gz"; open $fh, '<', "$d/qp.gz" or xbail $!; ok(-s $fh, 'not empty'); $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1); @bar = (); PublicInbox::MboxReader->mboxrd($fh, sub { push @bar, shift }); is_deeply(\@bar, [ $qp_eml ], 'wrote gzipped mboxrd'); lei_ok qw(convert -o mboxrd:/dev/stdout), "mboxrd:$d/qp.gz"; open $fh, '<', \$lei_out or xbail; @bar = (); PublicInbox::MboxReader->mboxrd($fh, sub { push @bar, shift }); is_deeply(\@bar, [ $qp_eml ], 'readed gzipped mboxrd'); # Status => Maildir flag => Status round trip $lei_out =~ s/^Status: O/Status: RO/sm or xbail "`seen' Status"; $rdr = { 0 => \($in = $lei_out), %$lei_opt }; lei_ok([qw(convert -F mboxrd -o), "$d/md2"], undef, $rdr); @md = glob("$d/md2/*/*"); is(scalar(@md), 1, 'one message'); like($md[0], qr/:2,S\z/, "`seen' flag set in Maildir"); lei_ok(qw(convert -o mboxrd:/dev/stdout), "$d/md2"); like($lei_out, qr/^Status: RO/sm, "`seen' flag preserved"); }); done_testing; public-inbox-1.9.0/t/lei-daemon.t000066400000000000000000000051621430031475700165660ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use Socket qw(AF_UNIX SOCK_SEQPACKET MSG_EOR pack_sockaddr_un); test_lei({ daemon_only => 1 }, sub { my $send_cmd = PublicInbox::Spawn->can('send_cmd4') // do { require PublicInbox::CmdIPC4; PublicInbox::CmdIPC4->can('send_cmd4'); } // do { require PublicInbox::Syscall; PublicInbox::Syscall->can('send_cmd4'); }; $send_cmd or BAIL_OUT 'started testing lei-daemon w/o send_cmd4!'; my $sock = "$ENV{XDG_RUNTIME_DIR}/lei/5.seq.sock"; my $err_log = "$ENV{XDG_RUNTIME_DIR}/lei/errors.log"; lei_ok('daemon-pid'); ignore_inline_c_missing($lei_err); is($lei_err, '', 'no error from daemon-pid'); like($lei_out, qr/\A[0-9]+\n\z/s, 'pid returned') or BAIL_OUT; chomp(my $pid = $lei_out); ok(kill(0, $pid), 'pid is valid'); ok(-S $sock, 'sock created'); is(-s $err_log, 0, 'nothing in errors.log'); lei_ok('daemon-pid'); chomp(my $pid_again = $lei_out); is($pid, $pid_again, 'daemon-pid idempotent'); SKIP: { skip 'only testing open files on Linux', 1 if $^O ne 'linux'; my $d = "/proc/$pid/fd"; skip "no $d on Linux" unless -d $d; my @before = sort(glob("$d/*")); my $addr = pack_sockaddr_un($sock); open my $null, '<', '/dev/null' or BAIL_OUT "/dev/null: $!"; my @fds = map { fileno($null) } (0..2); for (0..10) { socket(my $c, AF_UNIX, SOCK_SEQPACKET, 0) or BAIL_OUT "socket: $!"; connect($c, $addr) or BAIL_OUT "connect: $!"; $send_cmd->($c, \@fds, 'hi', MSG_EOR); } lei_ok('daemon-pid'); chomp($pid = $lei_out); is($pid, $pid_again, 'pid unchanged after failed reqs'); my @after = sort(glob("$d/*")); is_deeply(\@before, \@after, 'open files unchanged') or diag explain([\@before, \@after]);; } lei_ok(qw(daemon-kill)); is($lei_out, '', 'no output from daemon-kill'); is($lei_err, '', 'no error from daemon-kill'); for (0..100) { kill(0, $pid) or last; tick(); } ok(-S $sock, 'sock still exists'); ok(!kill(0, $pid), 'pid gone after stop'); lei_ok(qw(daemon-pid)); chomp(my $new_pid = $lei_out); ok(kill(0, $new_pid), 'new pid is running'); ok(-S $sock, 'sock still exists'); for my $sig (qw(-0 -CHLD)) { lei_ok('daemon-kill', $sig, \"handles $sig"); } is($lei_out.$lei_err, '', 'no output on innocuous signals'); lei_ok('daemon-pid'); chomp $lei_out; is($lei_out, $new_pid, 'PID unchanged after -0/-CHLD'); unlink $sock or BAIL_OUT "unlink($sock) $!"; for (0..100) { kill('CHLD', $new_pid) or last; tick(); } ok(!kill(0, $new_pid), 'daemon exits after unlink'); }); done_testing; public-inbox-1.9.0/t/lei-export-kw.t000066400000000000000000000031431430031475700172600ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use File::Copy qw(cp); use File::Path qw(make_path); require_mods(qw(lei)); # see lei-import-imap.t for IMAP tests my ($tmpdir, $for_destroy) = tmpdir; my $expect = eml_load('t/data/0001.patch'); my $do_export_kw = 1; my $wait_for = sub { my ($f) = @_; lei_ok qw(export-kw --all=local) if $do_export_kw; my $x = $f; $x =~ s!\Q$tmpdir\E/!\$TMPDIR/!; for (0..10) { last if -f $f; diag "tick #$_ $x"; tick(0.1); } ok(-f $f, "$x exists") or xbail; }; test_lei({ tmpdir => $tmpdir }, sub { my $home = $ENV{HOME}; my $md = "$home/md"; my $f; make_path("$md/new", "$md/cur", "$md/tmp"); cp('t/data/0001.patch', "$md/new/y") or xbail "cp $md $!"; cp('t/data/message_embed.eml', "$md/cur/x:2,S") or xbail "cp $md $!"; lei_ok qw(index), $md; lei_ok qw(tag t/data/0001.patch +kw:seen); $wait_for->($f = "$md/cur/y:2,S"); ok(!-e "$md/new/y", 'original gone') or diag explain([glob("$md/*/*")]); is_deeply(eml_load($f), $expect, "`seen' kw exported"); lei_ok qw(tag t/data/0001.patch +kw:answered); $wait_for->($f = "$md/cur/y:2,RS"); ok(!-e "$md/cur/y:2,S", 'seen-only file gone') or diag explain([glob("$md/*/*")]); is_deeply(eml_load($f), $expect, "`R' added"); lei_ok qw(tag t/data/0001.patch -kw:answered -kw:seen); $wait_for->($f = "$md/cur/y:2,"); ok(!-e "$md/cur/y:2,RS", 'seen+answered file gone') or diag explain([glob("$md/*/*")]); is_deeply(eml_load($f), $expect, 'no keywords left'); }); done_testing; public-inbox-1.9.0/t/lei-externals.t000066400000000000000000000252241430031475700173310ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use Fcntl qw(SEEK_SET); require_git 2.6; require_mods(qw(json DBD::SQLite Search::Xapian)); use POSIX qw(WTERMSIG WIFSIGNALED SIGPIPE); my @onions = map { "http://$_.onion/meta/" } qw( 4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd 7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd); my $test_external_remote = sub { my ($url, $k) = @_; SKIP: { skip "$k unset", 1 if !$url; require_cmd 'curl', 1 or skip 'curl missing', 1; if ($url =~ m!\.onion/!) { require_cmd 'torsocks', 1 or skip 'no torsocks', 1; } my $mid = '20140421094015.GA8962@dcvr.yhbt.net'; my @cmd = ('q', '--only', $url, '-q', "m:$mid"); lei_ok(@cmd, \"query $url"); is($lei_err, '', "no errors on $url"); my $res = json_utf8->decode($lei_out); is($res->[0]->{'m'}, $mid, "got expected mid from $url") or skip 'further remote tests', 1; lei_ok(@cmd, 'd:..20101002', \'no results, no error'); is($lei_err, '', 'no output on 404, matching local FS behavior'); is($lei_out, "[null]\n", 'got null results'); my ($pid_before, $pid_after); if (-d $ENV{XDG_RUNTIME_DIR} && -w _) { lei_ok 'daemon-pid'; chomp($pid_before = $lei_out); ok($pid_before, 'daemon is live'); } for my $out ([], [qw(-f mboxcl2)]) { pipe(my ($r, $w)) or BAIL_OUT $!; open my $err, '+>', undef or BAIL_OUT $!; my $opt = { run_mode => 0, 1 => $w, 2 => $err }; my $cmd = [qw(lei q -qt), @$out, 'z:1..']; my $tp = start_script($cmd, undef, $opt); close $w; sysread($r, my $buf, 1); close $r; # trigger SIGPIPE $tp->join; ok(WIFSIGNALED($?), "signaled @$out"); is(WTERMSIG($?), SIGPIPE, "got SIGPIPE @$out"); seek($err, 0, 0); my @err = <$err>; is_deeply(\@err, [], "no errors @$out"); } if (-d $ENV{XDG_RUNTIME_DIR} && -w _) { lei_ok 'daemon-pid'; chomp(my $pid_after = $lei_out); is($pid_after, $pid_before, 'pid unchanged') or skip 'daemon died', 1; skip 'not killing persistent lei-daemon', 2 if $ENV{TEST_LEI_DAEMON_PERSIST_DIR}; lei_ok 'daemon-kill'; my $alive = 1; for (1..100) { $alive = kill(0, $pid_after) or last; tick(); } ok(!$alive, 'daemon-kill worked'); } } # /SKIP }; # /sub my ($ro_home, $cfg_path) = setup_public_inboxes; test_lei(sub { my $home = $ENV{HOME}; my $config_file = "$home/.config/lei/config"; my $store_dir = "$home/.local/share/lei"; lei_ok 'ls-external', \'ls-external on fresh install'; ignore_inline_c_missing($lei_err); is($lei_out.$lei_err, '', 'ls-external no output, yet'); ok(!-e $config_file && !-e $store_dir, 'nothing created by ls-external'); ok(!lei('add-external', "$home/nonexistent"), "fails on non-existent dir"); like($lei_err, qr/not a directory/, 'noted non-existence'); mkdir "$home/new\nline" or BAIL_OUT "mkdir: $!"; ok(!lei('add-external', "$home/new\nline"), "fails on newline"); like($lei_err, qr/`\\n' not allowed/, 'newline noted in error'); lei_ok('ls-external', \'ls-external works after add failure'); is($lei_out.$lei_err, '', 'ls-external still has no output'); my $cfg = PublicInbox::Config->new($cfg_path); $cfg->each_inbox(sub { my ($ibx) = @_; lei_ok(qw(add-external -q), $ibx->{inboxdir}, \'added external'); is($lei_out.$lei_err, '', 'no output'); }); ok(-s $config_file, 'add-external created config'); my $lcfg = PublicInbox::Config->new($config_file); $cfg->each_inbox(sub { my ($ibx) = @_; is($lcfg->{"external.$ibx->{inboxdir}.boost"}, 0, "configured boost on $ibx->{name}"); }); lei_ok 'ls-external'; like($lei_out, qr/boost=0\n/s, 'ls-external has output'); lei_ok qw(add-external -q https://EXAMPLE.com/ibx), \'add remote'; is($lei_err, '', 'no warnings after add-external'); { lei_ok qw(ls-external --remote); my $r_only = +{ map { $_ => 1 } split(/^/m, $lei_out) }; lei_ok qw(ls-external --local); my $l_only = +{ map { $_ => 1 } split(/^/m, $lei_out) }; lei_ok 'ls-external'; is_deeply([grep { $l_only->{$_} } keys %$r_only], [], 'no locals in --remote'); is_deeply([grep { $r_only->{$_} } keys %$l_only], [], 'no remotes in --local'); my $all = +{ map { $_ => 1 } split(/^/m, $lei_out) }; is_deeply($all, { %$r_only, %$l_only }, 'default output combines remote + local'); lei_ok qw(ls-external --remote --local); my $both = +{ map { $_ => 1 } split(/^/m, $lei_out) }; is_deeply($all, $both, '--remote --local == no args'); } lei_ok qw(_complete lei forget-external), \'complete for externals'; my %comp = map { $_ => 1 } split(/\s+/, $lei_out); ok($comp{'https://example.com/ibx/'}, 'forget external completion'); my @dirs; $cfg->each_inbox(sub { my ($ibx) = @_; push @dirs, $ibx->{inboxdir}; ok($comp{$ibx->{inboxdir}}, "local $ibx->{name} completion"); }); for my $u (qw(h http https https: https:/ https:// https://e https://example https://example. https://example.co https://example.com https://example.com/ https://example.com/i https://example.com/ibx)) { lei_ok(qw(_complete lei forget-external), $u, \"partial completion for URL $u"); is($lei_out, "https://example.com/ibx/\n", "completed partial URL $u"); for my $qo (qw(-I --include --exclude --only)) { lei_ok(qw(_complete lei q), $qo, $u, \"partial completion for URL q $qo $u"); is($lei_out, "https://example.com/ibx/\n", "completed partial URL $u on q $qo"); } } lei_ok(qw(_complete lei add-external), 'https://', \'add-external hostname completion'); is($lei_out, "https://example.com/\n", 'completed up to hostname'); lei_ok('ls-external'); like($lei_out, qr!https://example\.com/ibx/!s, 'added canonical URL'); is($lei_err, '', 'no warnings on ls-external'); lei_ok(qw(forget-external -q https://EXAMPLE.com/ibx)); lei_ok('ls-external'); unlike($lei_out, qr!https://example\.com/ibx/!s, 'removed canonical URL'); # do some queries ok(!lei(qw(q s:prefix -o maildir:/dev/null)), 'bad maildir'); like($lei_err, qr!/dev/null exists and is not a directory!, 'error shown'); is($? >> 8, 1, 'errored out with exit 1'); ok(!lei(qw(q s:prefix -o), "mboxcl2:$home"), 'bad mbox'); like($lei_err, qr!\Q$home\E exists and is not a writable file!, 'error shown'); is($? >> 8, 1, 'errored out with exit 1'); ok(!lei(qw(q s:prefix -o Mbox2:/dev/stdout)), 'bad format'); like($lei_err, qr/bad mbox format: mbox2/, 'error shown'); is($? >> 8, 1, 'errored out with exit 1'); # note, on a Bourne shell users should be able to use either: # s:"use boolean prefix" # "s:use boolean prefix" # or use single quotes, it should not matter. Users only need # to know shell quoting rules, not Xapian quoting rules. # No double-quoting should be imposed on users on the CLI lei_ok('q', 's:use boolean prefix'); like($lei_out, qr/search: use boolean prefix/, 'phrase search got result'); my $res = json_utf8->decode($lei_out); is(scalar(@$res), 2, 'only 2 element array (1 result)'); is($res->[1], undef, 'final element is undef'); # XXX should this be? is(ref($res->[0]), 'HASH', 'first element is hashref'); lei_ok('q', '--pretty', 's:use boolean prefix'); my $pretty = json_utf8->decode($lei_out); is_deeply($res, $pretty, '--pretty is identical after decode'); { open my $fh, '+>', undef or BAIL_OUT $!; $fh->autoflush(1); print $fh 's:use d:..5.days.from.now' or BAIL_OUT $!; seek($fh, 0, SEEK_SET) or BAIL_OUT $!; lei_ok([qw(q -q --stdin)], undef, { %$lei_opt, 0 => $fh }, \'--stdin on regular file works'); like($lei_out, qr/use boolean/, '--stdin on regular file'); } { pipe(my ($r, $w)) or BAIL_OUT $!; print $w 's:use' or BAIL_OUT $!; close $w or BAIL_OUT $!; lei_ok([qw(q -q --stdin)], undef, { %$lei_opt, 0 => $r }, \'--stdin on pipe file works'); like($lei_out, qr/use boolean prefix/, '--stdin on pipe'); } ok(!lei(qw(q -q --stdin s:use)), "--stdin and argv don't mix"); like($lei_err, qr/no query allowed.*--stdin/, '--stdin conflict error message'); for my $fmt (qw(ldjson ndjson jsonl)) { lei_ok('q', '-f', $fmt, 's:use boolean prefix'); is($lei_out, json_utf8->encode($pretty->[0])."\n", "-f $fmt"); } require IO::Uncompress::Gunzip; for my $sfx ('', '.gz') { my $f = "$home/mbox$sfx"; lei_ok('q', '-o', "mboxcl2:$f", 's:use boolean prefix'); my $cat = $sfx eq '' ? sub { open my $mb, '<', $f or fail "no mbox: $!"; <$mb> } : sub { my $z = IO::Uncompress::Gunzip->new($f, MultiStream=>1); <$z>; }; my @s = grep(/^Subject:/, $cat->()); is(scalar(@s), 1, "1 result in mbox$sfx"); lei_ok('q', '-a', '-o', "mboxcl2:$f", 's:see attachment'); is(grep(!/^#/, $lei_err), 0, 'no errors from augment') or diag $lei_err; @s = grep(/^Subject:/, my @wtf = $cat->()); is(scalar(@s), 2, "2 results in mbox$sfx"); lei_ok('q', '-a', '-o', "mboxcl2:$f", 's:nonexistent'); is(grep(!/^#/, $lei_err), 0, "no errors on no results ($sfx)") or diag $lei_err; my @s2 = grep(/^Subject:/, $cat->()); is_deeply(\@s2, \@s, "same 2 old results w/ --augment and bad search $sfx"); lei_ok('q', '-o', "mboxcl2:$f", 's:nonexistent'); my @res = $cat->(); is_deeply(\@res, [], "clobber w/o --augment $sfx"); } ok(!lei('q', '-o', "$home/mbox", 's:nope'), 'fails if mbox format unspecified'); like($lei_err, qr/unable to determine mbox/, 'mbox-related message'); ok(!lei(qw(q --no-local s:see)), '--no-local'); is($? >> 8, 1, 'proper exit code'); like($lei_err, qr/no local or remote.+? to search/, 'no inbox'); for my $no (['--no-local'], ['--no-external'], [qw(--no-local --no-external)]) { lei_ok(qw(q mid:testmessage@example.com), @$no, '-I', $dirs[0], \"-I and @$no combine"); $res = json_utf8->decode($lei_out); is($res->[0]->{'m'}, 'testmessage@example.com', "-I \$DIR got results regardless of @$no"); } { skip 'TEST_LEI_DAEMON_PERSIST_DIR in use', 1 if $ENV{TEST_LEI_DAEMON_PERSIST_DIR}; my @q = qw(q -o mboxcl2:rel.mboxcl2 bye); lei_ok('-C', $home, @q); is(unlink("$home/rel.mboxcl2"), 1, '-C works before q'); # we are more flexible than git, here: lei_ok(@q, '-C', $home); is(unlink("$home/rel.mboxcl2"), 1, '-C works after q'); mkdir "$home/deep" or BAIL_OUT $!; lei_ok('-C', $home, @q, '-C', 'deep'); is(unlink("$home/deep/rel.mboxcl2"), 1, 'multiple -C works'); lei_ok('-C', '', '-C', $home, @q, '-C', 'deep', '-C', ''); is(unlink("$home/deep/rel.mboxcl2"), 1, "-C '' accepted"); ok(!-f "$home/rel.mboxcl2", 'wrong path not created'); } my %e = ( TEST_LEI_EXTERNAL_HTTPS => 'https://public-inbox.org/meta/', TEST_LEI_EXTERNAL_ONION => $onions[int(rand(scalar(@onions)))], ); for my $k (keys %e) { my $url = $ENV{$k} // ''; $url = $e{$k} if $url eq '1'; $test_external_remote->($url, $k); } }); # test_lei done_testing; public-inbox-1.9.0/t/lei-import-http.t000066400000000000000000000034601430031475700176110ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods(qw(lei -httpd)); require_cmd('curl'); my ($ro_home, $cfg_path) = setup_public_inboxes; my ($tmpdir, $for_destroy) = tmpdir; my $sock = tcp_server; my $cmd = [ '-httpd', '-W0', "--stdout=$tmpdir/1", "--stderr=$tmpdir/2" ]; my $env = { PI_CONFIG => $cfg_path }; my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-httpd $?"); my $host_port = tcp_host_port($sock); undef $sock; test_lei({ tmpdir => $tmpdir }, sub { my $url = "http://$host_port/t2"; for my $p (qw(bogus@x/t.mbox.gz bogus@x/raw ?q=noresultever)) { ok(!lei('import', "$url/$p"), "/$p fails properly"); like($lei_err, qr/curl.*404/, 'got curl 404'); } for my $p (qw(/ /T/ /t/ /t.atom)) { ok(!lei('import', "$url/m\@example$p"), "/$p fails"); like($lei_err, qr/did you mean/, "gave hint for $p"); } lei_ok 'import', "$url/testmessage\@example.com/raw"; lei_ok 'q', 'm:testmessage@example.com'; my $res = json_utf8->decode($lei_out); is($res->[0]->{'m'}, 'testmessage@example.com', 'imported raw') or diag explain($res); lei_ok 'import', "$url/qp\@example.com/t.mbox.gz"; lei_ok 'q', 'm:qp@example.com'; $res = json_utf8->decode($lei_out); is($res->[0]->{'m'}, 'qp@example.com', 'imported t.mbox.gz') or diag explain($res); lei_ok 'import', "$url/?q=s:boolean"; lei_ok 'q', 'm:20180720072141.GA15957@example'; $res = json_utf8->decode($lei_out); is($res->[0]->{'m'}, '20180720072141.GA15957@example', 'imported search result') or diag explain($res); ok(!lei(qw(import --mail-sync), "$url/x\@example.com/raw"), '--mail-sync fails on HTTP'); like($lei_err, qr/--mail-sync/, 'error message notes --mail-sync'); }); done_testing; public-inbox-1.9.0/t/lei-import-imap.t000066400000000000000000000107151430031475700175610ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods(qw(lei -imapd Mail::IMAPClient)); my ($ro_home, $cfg_path) = setup_public_inboxes; my ($tmpdir, $for_destroy) = tmpdir; my $sock = tcp_server; my $cmd = [ '-imapd', '-W0', "--stdout=$tmpdir/1", "--stderr=$tmpdir/2" ]; my $env = { PI_CONFIG => $cfg_path }; my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-imapd: $?"); my $host_port = tcp_host_port($sock); undef $sock; test_lei({ tmpdir => $tmpdir }, sub { my $url = "imap://$host_port/t.v2.0"; my $url_orig = $url; lei_ok(qw(ls-mail-source), "imap://$host_port/"); like($lei_out, qr/^t\.v2\.0$/ms, 'shows mailbox'); lei_ok(qw(ls-mail-source), $url); is($lei_out, "t.v2.0\n", 'shows only mailbox with filter'); lei_ok(qw(ls-mail-source -l), "imap://$host_port/"); is(ref(json_utf8->decode($lei_out)), 'ARRAY', 'ls-mail-source JSON'); lei_ok(qw(q z:1..)); my $out = json_utf8->decode($lei_out); is_deeply($out, [ undef ], 'nothing imported, yet'); lei_ok('inspect', $url); is_deeply(json_utf8->decode($lei_out), {}, 'no inspect stats, yet'); lei_ok('import', $url); lei_ok('inspect', $url); my $res = json_utf8->decode($lei_out); is(scalar keys %$res, 1, 'got one key in inspect URL'); my $re = qr!\Aimap://;AUTH=ANONYMOUS\@\Q$host_port\E /t\.v2\.0;UIDVALIDITY=\d+!x; like((keys %$res)[0], qr/$re\z/, 'got expanded key'); lei_ok 'ls-mail-sync'; like($lei_out, qr!$re\n\z!, 'ls-mail-sync'); chomp(my $u = $lei_out); lei_ok('import', $u, \'UIDVALIDITY match in URL'); $url = $u; $u =~ s/;UIDVALIDITY=(\d+)\s*/;UIDVALIDITY=9$1/s; ok(!lei('import', $u), 'UIDVALIDITY mismatch in URL rejected'); like($lei_err, qr/UIDVALIDITY mismatch/, 'mismatch noted'); lei_ok('inspect', $url); my $inspect = json_utf8->decode($lei_out); my @k = keys %$inspect; is(scalar(@k), 1, 'one URL resolved'); is($k[0], $url, 'inspect URL matches'); my $stats = $inspect->{$k[0]}; is_deeply([ sort keys %$stats ], [ qw(uid.count uid.max uid.min) ], 'keys match'); ok($stats->{'uid.min'} < $stats->{'uid.max'}, 'min < max'); ok($stats->{'uid.count'} > 0, 'count > 0'); lei_ok('lcat', $url); is(scalar(grep(/^# blob:/, split(/\n/ms, $lei_out))), $stats->{'uid.count'}, 'lcat on URL dumps folder'); lei_ok qw(lcat -f json), $url; $out = json_utf8->decode($lei_out); is(scalar(@$out) - 1, $stats->{'uid.count'}, 'lcat JSON dumps folder'); lei_ok(qw(q z:1..)); $out = json_utf8->decode($lei_out); ok(scalar(@$out) > 1, 'got imported messages'); is(pop @$out, undef, 'trailing JSON null element was null'); my %r; for (@$out) { $r{ref($_)}++ } is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes'); lei_ok([qw(tag +kw:seen), $url], undef, undef); my $f = "$ENV{HOME}/.local/share/lei/store/mail_sync.sqlite3"; ok(-s $f, 'mail_sync tracked for redundant imports'); lei_ok('inspect', "blob:$out->[5]->{blob}"); my $x = json_utf8->decode($lei_out); is(ref($x->{'lei/store'}), 'ARRAY', 'lei/store in inspect'); is(ref($x->{'mail-sync'}), 'HASH', 'sync in inspect'); is(ref($x->{'mail-sync'}->{$k[0]}), 'ARRAY', 'UID arrays in inspect') or diag explain($x); my $psgi_attach = 'cfa3622cbeffc9bd6b0fc66c4d60d420ba74f60d'; lei_ok('blob', $psgi_attach); like($lei_out, qr!^Content-Type: multipart/mixed;!sm, 'got full blob'); lei_ok('blob', "$psgi_attach:2"); is($lei_out, "b64\xde\xad\xbe\xef\n", 'got attachment'); lei_ok 'forget-mail-sync', $url; lei_ok 'ls-mail-sync'; unlike($lei_out, qr!\Q$host_port\E!, 'sync info gone after forget'); my $uid_url = "$url/;UID=".$stats->{'uid.max'}; lei_ok 'import', $uid_url; lei_ok 'ls-mail-sync'; is($lei_out, "$url\n", 'ls-mail-sync added URL w/o UID'); lei_ok 'inspect', $uid_url; $lei_out =~ /([a-f0-9]{40,})/ or xbail 'inspect missed blob with UID URL'; my $blob = $1; lei_ok 'lcat', $uid_url; like $lei_out, qr/^Subject: /sm, 'lcat shows mail text with UID URL'; like $lei_out, qr/\bblob:$blob\b/, 'lcat showed blob'; my $orig = $lei_out; lei_ok 'lcat', "blob:$blob"; is($lei_out, $orig, 'lcat understands blob:...'); lei_ok qw(lcat -f json), $uid_url; $out = json_utf8->decode($lei_out); is(scalar(@$out), 2, 'got JSON') or diag explain($out); lei_ok qw(lcat), $url_orig; is($lei_out, $orig, 'lcat w/o UID works'); ok(!lei(qw(export-kw), $url_orig), 'export-kw fails on read-only IMAP'); like($lei_err, qr/does not support/, 'error noted in failure'); }); done_testing; public-inbox-1.9.0/t/lei-import-maildir.t000066400000000000000000000063051430031475700202540ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use Cwd qw(abs_path); test_lei(sub { my $md = "$ENV{HOME}/md"; for ($md, "$md/new", "$md/cur", "$md/tmp") { mkdir($_) or BAIL_OUT("mkdir $_: $!"); } symlink(abs_path('t/data/0001.patch'), "$md/cur/x:2,S") or BAIL_OUT "symlink $md $!"; lei_ok(qw(import), "$md/", \'import Maildir'); my $imp_err = $lei_err; my %i; lei_ok('inspect', $md); $i{no_type} = $lei_out; lei_ok('inspect', "$md/"); $i{no_type_tslash} = $lei_out; lei_ok('inspect', "maildir:$md"), $i{with_type} = $lei_out; lei_ok('inspect', "maildir:$md/"), $i{with_type_tslash} = $lei_out; lei_ok('inspect', "MAILDIR:$md"), $i{ALLCAPS} = $lei_out; lei_ok(['inspect', $md], undef, { -C => $ENV{HOME}, %$lei_opt }); $i{rel_no_type} = $lei_out; lei_ok(['inspect', "maildir:$md"], undef, { -C => $ENV{HOME}, %$lei_opt }); $i{rel_with_type} = $lei_out; my %v = map { $_ => 1 } values %i; is(scalar(keys %v), 1, 'inspect handles relative and absolute paths'); my $inspect = json_utf8->decode([ keys %v ]->[0]); is_deeply($inspect, {"maildir:$md" => { 'name.count' => 1 }}, 'inspect maildir: path had expected output') or xbail($inspect); lei_ok(qw(q s:boolean)); my $res = json_utf8->decode($lei_out); like($res->[0]->{'s'}, qr/use boolean/, 'got expected result') or diag explain($imp_err, $res); is_deeply($res->[0]->{kw}, ['seen'], 'keyword set'); is($res->[1], undef, 'only got one result'); lei_ok('inspect', "blob:$res->[0]->{blob}"); $inspect = json_utf8->decode($lei_out); is(ref(delete $inspect->{"lei/store"}), 'ARRAY', 'lei/store IDs'); is_deeply($inspect, { 'mail-sync' => { "maildir:$md" => [ 'x:2,S' ] } }, 'maildir sync info as expected'); lei_ok qw(ls-mail-sync); is($lei_out, "maildir:$md\n", 'ls-mail-sync as expected'); lei_ok(qw(import), $md, \'import Maildir again'); $imp_err = $lei_err; lei_ok(qw(q -d none s:boolean), \'lei q w/o dedupe'); my $r2 = json_utf8->decode($lei_out); is_deeply($r2, $res, 'idempotent import') or diag explain($imp_err, $res); rename("$md/cur/x:2,S", "$md/cur/x:2,RS") or BAIL_OUT "rename: $!"; lei_ok('import', "maildir:$md", \'import Maildir after +answered'); lei_ok(qw(q -d none s:boolean), \'lei q after +answered'); $res = json_utf8->decode($lei_out); like($res->[0]->{'s'}, qr/use boolean/, 'got expected result'); is_deeply($res->[0]->{kw}, ['answered', 'seen'], 'keywords set'); is($res->[1], undef, 'only got one result'); symlink(abs_path('t/utf8.eml'), "$md/cur/u:2,ST") or BAIL_OUT "symlink $md $!"; lei_ok('import', "maildir:$md", \'import Maildir w/ trashed message'); $imp_err = $lei_err; lei_ok(qw(q -d none m:testmessage@example.com)); $res = json_utf8->decode($lei_out); is_deeply($res, [ undef ], 'trashed message not imported') or diag explain($imp_err, $res); lei_ok qw(rm t/data/0001.patch); lei_ok(qw(q s:boolean)); is($lei_out, "[null]\n", 'removed message gone from results'); my $g0 = "$ENV{HOME}/.local/share/lei/store/local/0.git"; my $x = xqx(['git', "--git-dir=$g0", qw(cat-file blob HEAD:d)]); is($?, 0, "git cat-file shows file is `d'"); }); done_testing; public-inbox-1.9.0/t/lei-import-nntp.t000066400000000000000000000104561430031475700176140ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_git 2.6; require_mods(qw(json DBD::SQLite Search::Xapian Net::NNTP)); my ($ro_home, $cfg_path) = setup_public_inboxes; my ($tmpdir, $for_destroy) = tmpdir; my $sock = tcp_server; my $cmd = [ '-nntpd', '-W0', "--stdout=$tmpdir/1", "--stderr=$tmpdir/2" ]; my $env = { PI_CONFIG => $cfg_path }; my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-nntpd $?"); my $host_port = tcp_host_port($sock); undef $sock; test_lei({ tmpdir => $tmpdir }, sub { lei_ok(qw(q z:1..)); my $out = json_utf8->decode($lei_out); is_deeply($out, [ undef ], 'nothing imported, yet'); my $url = "nntp://$host_port/t.v2"; lei_ok(qw(ls-mail-source), "nntp://$host_port/"); like($lei_out, qr/^t\.v2$/ms, 'shows newsgroup'); lei_ok(qw(ls-mail-source), $url); is($lei_out, "t.v2\n", 'shows only newsgroup with filter'); lei_ok(qw(ls-mail-source -l), "nntp://$host_port/"); is(ref(json_utf8->decode($lei_out)), 'ARRAY', 'ls-mail-source JSON'); lei_ok('import', $url); lei_ok "lcat", "nntp://$host_port/testmessage\@example.com"; my $local = $lei_out; lei_ok "lcat", "nntp://example.com/testmessage\@example.com"; my $remote = $lei_out; is($local, $remote, 'Message-ID used even from unknown host'); lei_ok(qw(q z:1..)); $out = json_utf8->decode($lei_out); ok(scalar(@$out) > 1, 'got imported messages'); is(pop @$out, undef, 'trailing JSON null element was null'); my %r; for (@$out) { $r{ref($_)}++ } is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes'); my $f = "$ENV{HOME}/.local/share/lei/store/mail_sync.sqlite3"; ok(-s $f, 'mail_sync exists tracked for redundant imports'); lei_ok 'ls-mail-sync'; like($lei_out, qr!\A\Q$url\E\n\z!, 'ls-mail-sync output as-expected'); ok(!lei(qw(import), "$url/12-1"), 'backwards range rejected'); # new home local $ENV{HOME} = "$tmpdir/h2"; lei_ok(qw(ls-mail-source -l), $url); my $ls = json_utf8->decode($lei_out); my ($high, $low) = @{$ls->[0]}{qw(high low)}; ok($high > $low, 'high > low'); my $end = $high - 1; lei_ok qw(import), "$url/$high"; lei_ok('inspect', $url); is_xdeeply(json_utf8->decode($lei_out), { $url => { 'article.count' => 1, 'article.min' => $high, 'article.max' => $high, } }, 'inspect output for URL after single message') or diag $lei_out; lei_ok('inspect', "$url/$high"); my $x = json_utf8->decode($lei_out); like($x->{$url}->{$high}, qr/\A[a-f0-9]{40,}\z/, 'inspect shows blob'); lei_ok qw(lcat -f json), "$url/$high"; my $lcat = json_utf8->decode($lei_out); is($lcat->[1], undef, 'only one result for lcat'); is($lcat->[0]->{blob}, $x->{$url}->{$high}, 'lcat showed correct blob'); lei_ok 'ls-mail-sync'; is($lei_out, "$url\n", 'article number not stored as folder'); lei_ok qw(q z:0..); my $one = json_utf8->decode($lei_out); pop @$one; # trailing null is(scalar(@$one), 1, 'only 1 result'); local $ENV{HOME} = "$tmpdir/h3"; lei_ok qw(import), "$url/$low-$end"; lei_ok('inspect', $url); is_xdeeply(json_utf8->decode($lei_out), { $url => { 'article.count' => $end - $low + 1, 'article.min' => $low, 'article.max' => $end, } }, 'inspect output for URL after range') or diag $lei_out; lei_ok('inspect', "$url/$low-$end"); $x = json_utf8->decode($lei_out); is_deeply([ ($low..$end) ], [ sort { $a <=> $b } keys %{$x->{$url}} ], 'inspect range shows range'); is(scalar(grep(/\A[a-f0-9]{40,}\z/, values %{$x->{$url}})), $end - $low + 1, 'all values are git blobs'); lei_ok qw(lcat -f json), "$url/$low"; $lcat = json_utf8->decode($lei_out); is($lcat->[1], undef, 'only one result for lcat'); is($lcat->[0]->{blob}, $x->{$url}->{$low}, 'lcat showed correct blob'); lei_ok qw(lcat -f json), "$url/$low-$end"; $lcat = json_utf8->decode($lei_out); pop @$lcat; for ($low..$end) { my $tip = shift @$lcat; is($x->{$url}->{$_}, $tip->{blob}, "blob matches art #$_"); } lei_ok 'ls-mail-sync'; is($lei_out, "$url\n", 'article range not stored as folder'); lei_ok qw(q z:0..); my $start = json_utf8->decode($lei_out); pop @$start; # trailing null is(scalar(@$start), scalar(map { $_ } ($low..$end)), 'range worked as expected'); my %seen; for (@$start, @$one) { is($seen{$_->{blob}}++, 0, "blob $_->{blob} seen once"); } }); done_testing; public-inbox-1.9.0/t/lei-import.t000066400000000000000000000104261430031475700166340ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; test_lei(sub { ok(!lei(qw(import -F bogus), 't/plack-qp.eml'), 'fails with bogus format'); like($lei_err, qr/\bis `eml', not --in-format/, 'gave error message'); lei_ok(qw(q s:boolean), \'search miss before import'); unlike($lei_out, qr/boolean/i, 'no results, yet'); open my $fh, '<', 't/data/0001.patch' or BAIL_OUT $!; lei_ok([qw(import -F eml -)], undef, { %$lei_opt, 0 => $fh }, \'import single file from stdin') or diag $lei_err; close $fh; lei_ok(qw(q s:boolean), \'search hit after import'); lei_ok(qw(q s:boolean -f mboxrd), \'blob accessible after import'); { my $expect = [ eml_load('t/data/0001.patch') ]; require PublicInbox::MboxReader; my @cmp; open my $fh, '<', \$lei_out or BAIL_OUT "open :scalar: $!"; PublicInbox::MboxReader->mboxrd($fh, sub { my ($eml) = @_; $eml->header_set('Status'); push @cmp, $eml; }); is_deeply(\@cmp, $expect, 'got expected message in mboxrd'); } lei_ok(qw(import -F eml), 't/data/message_embed.eml', \'import single file by path'); lei_ok(qw(q m:testmessage@example.com)); is($lei_out, "[null]\n", 'no results, yet'); my $oid = '9bf1002c49eb075df47247b74d69bcd555e23422'; my $eml = eml_load('t/utf8.eml'); my $in = 'From x@y Fri Oct 2 00:00:00 1993'."\n".$eml->as_string; lei_ok([qw(import -F eml -)], undef, { %$lei_opt, 0 => \$in }); lei_ok(qw(q m:testmessage@example.com)); is(json_utf8->decode($lei_out)->[0]->{'blob'}, $oid, 'got expected OID w/o From'); my $eml_str = <<''; From: a@b Message-ID: Status: RO my $opt = { %$lei_opt, 0 => \$eml_str }; lei_ok([qw(import -F eml -)], undef, $opt, \'import single file with keywords from stdin'); lei_ok(qw(q m:x@y)); my $res = json_utf8->decode($lei_out); is($res->[1], undef, 'only one result'); is($res->[0]->{'m'}, 'x@y', 'got expected message'); is($res->[0]->{kw}, undef, 'Status ignored for eml'); lei_ok(qw(q -f mboxrd m:x@y)); unlike($lei_out, qr/^Status:/, 'no Status: in imported message'); lei_ok('blob', $res->[0]->{blob}); is($lei_out, "From: a\@b\nMessage-ID: \n", 'got blob back'); $eml->header_set('Message-ID', ''); $eml->header_set('Status', 'RO'); $in = 'From v@y Fri Oct 2 00:00:00 1993'."\n".$eml->as_string; lei_ok([qw(import --no-kw -F mboxrd -)], undef, { %$lei_opt, 0 => \$in }, \'import single file with --no-kw from stdin'); lei(qw(q m:v@y)); $res = json_utf8->decode($lei_out); is($res->[1], undef, 'only one result'); is($res->[0]->{'m'}, 'v@y', 'got expected message'); is($res->[0]->{kw}, undef, 'no keywords set'); $eml->header_set('Message-ID', ''); $in = 'From k@y Fri Oct 2 00:00:00 1993'."\n".$eml->as_string; lei_ok([qw(import -F mboxrd /dev/fd/0)], undef, { %$lei_opt, 0 => \$in }, \'import single file with --kw (default) from stdin'); lei(qw(q m:k@y)); $res = json_utf8->decode($lei_out); is($res->[1], undef, 'only one result'); is($res->[0]->{'m'}, 'k@y', 'got expected message'); is_deeply($res->[0]->{kw}, ['seen'], "`seen' keywords set"); # no From, Sender, or Message-ID $eml_str = <<'EOM'; Subject: draft message with no sender References: Resent-Message-ID: No use for a name EOM lei_ok([qw(import -F eml -)], undef, { %$lei_opt, 0 => \$eml_str }); lei_ok(['q', 's:draft message with no sender']); my $draft_a = json_utf8->decode($lei_out); ok(!exists $draft_a->[0]->{'m'}, 'no fake mid stored or exposed'); lei_ok([qw(tag -F eml - +kw:draft)], undef, { %$lei_opt, 0 => \$eml_str }); lei_ok(['q', 's:draft message with no sender']); my $draft_b = json_utf8->decode($lei_out); my $kw = delete $draft_b->[0]->{kw}; is_deeply($kw, ['draft'], 'draft kw set'); is_deeply($draft_a, $draft_b, 'fake Message-ID lookup') or diag explain($draft_a, $draft_b); lei_ok('blob', '--mail', $draft_b->[0]->{blob}); is($lei_out, $eml_str, 'draft retrieved by blob'); $eml_str = "Message-ID: \nSubject: label-this\n\n"; lei_ok([qw(import -F eml - +kw:seen +L:inbox)], undef, { %$lei_opt, 0 => \$eml_str }); lei_ok(qw(q m:inbox@example.com)); $res = json_utf8->decode($lei_out); is_deeply($res->[0]->{kw}, ['seen'], 'keyword set'); is_deeply($res->[0]->{L}, ['inbox'], 'label set'); # see t/lei_to_mail.t for "import -F mbox*" }); done_testing; public-inbox-1.9.0/t/lei-index.t000066400000000000000000000077641430031475700164440ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use File::Spec; require_mods(qw(lei)); my ($ro_home, $cfg_path) = setup_public_inboxes; my ($tmpdir, $for_destroy) = tmpdir; my $env = { PI_CONFIG => $cfg_path }; my $srv = {}; SKIP: { require_mods(qw(-nntpd Net::NNTP), 1); my $rdr = { 3 => tcp_server }; $srv->{nntpd} = start_script( [qw(-nntpd -W0), "--stdout=$tmpdir/n1", "--stderr=$tmpdir/n2"], $env, $rdr) or xbail "nntpd: $?"; $srv->{nntp_host_port} = tcp_host_port($rdr->{3}); } SKIP: { require_mods(qw(-imapd Mail::IMAPClient), 1); my $rdr = { 3 => tcp_server }; $srv->{imapd} = start_script( [qw(-imapd -W0), "--stdout=$tmpdir/i1", "--stderr=$tmpdir/i2"], $env, $rdr) or xbail("-imapd $?"); $srv->{imap_host_port} = tcp_host_port($rdr->{3}); } for ('', qw(cur new)) { mkdir "$tmpdir/md/$_" or xbail "mkdir: $!"; mkdir "$tmpdir/md1/$_" or xbail "mkdir: $!"; } symlink(File::Spec->rel2abs('t/plack-qp.eml'), "$tmpdir/md/cur/x:2,"); my $expect = do { open my $fh, '<', 't/plack-qp.eml' or xbail $!; local $/; <$fh>; }; # mbsync and offlineimap both put ":2," in "new/" files: symlink(File::Spec->rel2abs('t/utf8.eml'), "$tmpdir/md/new/u:2,") or xbail "symlink $!"; symlink(File::Spec->rel2abs('t/mda-mime.eml'), "$tmpdir/md1/cur/x:2,S") or xbail "symlink $!"; test_lei({ tmpdir => $tmpdir }, sub { my $store_path = "$ENV{HOME}/.local/share/lei/store/"; lei_ok('index', "$tmpdir/md"); lei_ok(qw(q mid:qp@example.com)); my $res_a = json_utf8->decode($lei_out); my $blob = $res_a->[0]->{'blob'}; like($blob, qr/\A[0-9a-f]{40,}\z/, 'got blob from qp@example'); lei_ok(qw(-C / blob), $blob); is($lei_out, $expect, 'got expected blob via Maildir'); lei_ok(qw(q mid:qp@example.com -f text)); like($lei_out, qr/^hi = bye/sm, 'lei2mail fallback'); lei_ok(qw(q mid:testmessage@example.com -f text)); lei_ok(qw(-C / blob --mail 9bf1002c49eb075df47247b74d69bcd555e23422)); my $all_obj = ['git', "--git-dir=$store_path/ALL.git", qw(cat-file --batch-check --batch-all-objects)]; is_deeply([xqx($all_obj)], [], 'no git objects'); lei_ok('import', 't/plack-qp.eml'); ok(grep(/\A$blob blob /, my @objs = xqx($all_obj)), 'imported blob'); lei_ok(qw(q m:qp@example.com --dedupe=none)); my $res_b = json_utf8->decode($lei_out); is_deeply($res_b, $res_a, 'no extra DB entries'); # ensure tag works on index-only messages: lei_ok(qw(tag +kw:seen t/utf8.eml)); lei_ok(qw(q mid:testmessage@example.com)); is_deeply(json_utf8->decode($lei_out)->[0]->{kw}, ['seen'], 'seen kw can be set on index-only message'); lei_ok(qw(q z:0.. -o), "$tmpdir/all-results") for (1..2); is_deeply([xqx($all_obj)], \@objs, 'no new objects after 2x q to trigger implicit import'); lei_ok 'index', "$tmpdir/md1/cur/x:2,S"; lei_ok qw(q m:multipart-html-sucks@11); is_deeply(json_utf8->decode($lei_out)->[0]->{'kw'}, ['seen'], 'keyword set'); lei_ok 'reindex'; lei_ok qw(q m:multipart-html-sucks@11); is_deeply(json_utf8->decode($lei_out)->[0]->{'kw'}, ['seen'], 'keyword still set after reindex'); $srv->{nntpd} and lei_ok('index', "nntp://$srv->{nntp_host_port}/t.v2"); $srv->{imapd} and lei_ok('index', "imap://$srv->{imap_host_port}/t.v2.0"); is_deeply([xqx($all_obj)], \@objs, 'no new objects from NNTP+IMAP'); lei_ok qw(q m:multipart-html-sucks@11); $res_a = json_utf8->decode($lei_out)->[0]; is_deeply($res_a->{'kw'}, ['seen'], 'keywords still set after NNTP + IMAP import'); # ensure import works after lms->local_blob fallback in lei/store lei_ok('import', 't/mda-mime.eml'); lei_ok qw(q m:multipart-html-sucks@11); $res_b = json_utf8->decode($lei_out)->[0]; my $t = xqx(['git', "--git-dir=$store_path/ALL.git", qw(cat-file -t), $res_b->{blob}]); is($t, "blob\n", 'got blob'); lei_ok('reindex'); lei_ok qw(q m:multipart-html-sucks@11); $res_a = json_utf8->decode($lei_out)->[0]; is_deeply($res_a->{'kw'}, ['seen'], 'keywords still set after reindex'); }); done_testing; public-inbox-1.9.0/t/lei-inspect.t000066400000000000000000000007321430031475700167660ustar00rootroot00000000000000#!perl -w # Copyright all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; test_lei(sub { my ($ro_home, $cfg_path) = setup_public_inboxes; lei_ok qw(inspect --dir), "$ro_home/t1", 'mid:testmessage@example.com'; my $ent = json_utf8->decode($lei_out); is(ref($ent->{smsg}), 'ARRAY', 'smsg array'); is(ref($ent->{xdoc}), 'ARRAY', 'xdoc array'); }); done_testing; public-inbox-1.9.0/t/lei-lcat.t000066400000000000000000000016631430031475700162500ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods(qw(lei)); test_lei(sub { my $in = "\nMessage-id: \n"; lei_ok([qw(lcat --stdin)], undef, { 0 => \$in, %$lei_opt }); unlike($lei_out, qr/\S/, 'nothing, yet'); lei_ok('import', 't/plack-qp.eml'); lei_ok([qw(lcat --stdin)], undef, { 0 => \$in, %$lei_opt }); like($lei_out, qr/qp\@example\.com/, 'got a result'); # test Link:, -f reply, and implicit --stdin: my $prev = $lei_out; $in = "\nLink: https://example.com/foo/qp\@example.com/\n"; lei_ok([qw(lcat -f reply)], undef, { 0 => \$in, %$lei_opt}); my $exp = <<'EOM'; To: qp@example.com Subject: Re: QP In-Reply-To: On some unknown date, qp wrote: > hi = bye EOM like($lei_out, qr/\AFrom [^\n]+\n\Q$exp\E/sm, '-f reply works'); }); done_testing; public-inbox-1.9.0/t/lei-mirror.psgi000066400000000000000000000003361430031475700173320ustar00rootroot00000000000000use Plack::Builder; use PublicInbox::WWW; my $www = PublicInbox::WWW->new; $www->preload; builder { enable 'Head'; mount '/pfx' => builder { sub { $www->call(@_) } }; mount '/' => builder { sub { $www->call(@_) } }; }; public-inbox-1.9.0/t/lei-mirror.t000066400000000000000000000203031430031475700166270ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Inbox; require_mods(qw(-httpd lei DBD::SQLite)); require_cmd('curl'); require PublicInbox::Msgmap; my $sock = tcp_server(); my ($tmpdir, $for_destroy) = tmpdir(); my $http = 'http://'.tcp_host_port($sock); my ($ro_home, $cfg_path) = setup_public_inboxes; my $cmd = [ qw(-httpd -W0 ./t/lei-mirror.psgi), "--stdout=$tmpdir/out", "--stderr=$tmpdir/err" ]; my $td = start_script($cmd, { PI_CONFIG => $cfg_path }, { 3 => $sock }); my %created; test_lei({ tmpdir => $tmpdir }, sub { my $home = $ENV{HOME}; my $t1 = "$home/t1-mirror"; my $mm_orig = "$ro_home/t1/public-inbox/msgmap.sqlite3"; $created{v1} = PublicInbox::Msgmap->new_file($mm_orig)->created_at; lei_ok('add-external', $t1, '--mirror', "$http/t1/", \'--mirror v1'); my $mm_dup = "$t1/public-inbox/msgmap.sqlite3"; ok(-f $mm_dup, 't1-mirror indexed'); is(PublicInbox::Inbox::try_cat("$t1/description"), "mirror of $http/t1/\n", 'description set'); ok(-f "$t1/Makefile", 'convenience Makefile added (v1)'); ok(-f "$t1/inbox.config.example", 'inbox.config.example downloaded'); is((stat(_))[9], $created{v1}, 'inbox.config.example mtime is ->created_at'); is((stat(_))[2] & 0222, 0, 'inbox.config.example not writable'); my $tb = PublicInbox::Msgmap->new_file($mm_dup)->created_at; is($tb, $created{v1}, 'created_at matched in mirror'); lei_ok('ls-external'); like($lei_out, qr!\Q$t1\E!, 't1 added to ls-externals'); my $t2 = "$home/t2-mirror"; $mm_orig = "$ro_home/t2/msgmap.sqlite3"; $created{v2} = PublicInbox::Msgmap->new_file($mm_orig)->created_at; lei_ok('add-external', $t2, '--mirror', "$http/t2/", \'--mirror v2'); $mm_dup = "$t2/msgmap.sqlite3"; ok(-f $mm_dup, 't2-mirror indexed'); ok(-f "$t2/description", 't2 description'); ok(-f "$t2/Makefile", 'convenience Makefile added (v2)'); is(PublicInbox::Inbox::try_cat("$t2/description"), "mirror of $http/t2/\n", 'description set'); $tb = PublicInbox::Msgmap->new_file($mm_dup)->created_at; is($tb, $created{v2}, 'created_at matched in v2 mirror'); lei_ok('ls-external'); like($lei_out, qr!\Q$t2\E!, 't2 added to ls-externals'); ok(!lei('add-external', $t2, '--mirror', "$http/t2/"), '--mirror fails if reused') or diag "$lei_err.$lei_out = $?"; like($lei_err, qr/\Q$t2\E' already exists/, 'destination in error'); ok(!lei('add-external', "$home/t2\nnewline", '--mirror', "$http/t2/"), '--mirror fails on newline'); like($lei_err, qr/`\\n' not allowed/, 'newline noted in error'); lei_ok('ls-external'); like($lei_out, qr!\Q$t2\E!, 'still in ls-externals'); unlike($lei_out, qr!\Qnewline\E!, 'newline entry not added'); ok(!lei('add-external', "$t2-fail", '-Lmedium'), '--mirror v2'); like($lei_err, qr/not a directory/, 'non-directory noted'); ok(!-d "$t2-fail", 'destination not created on failure'); lei_ok('ls-external'); unlike($lei_out, qr!\Q$t2-fail\E!, 'not added to ls-external'); lei_ok('add-external', "$t1-pfx", '--mirror', "$http/pfx/t1/", \'--mirror v1 w/ PSGI prefix'); ok(!-e "$t1-pfx/mirror.done", 'no leftover mirror.done'); my $d = "$home/404"; ok(!lei(qw(add-external --mirror), "$http/404", $d), 'mirror 404'); unlike($lei_err, qr!unlink.*?404/mirror\.done!, 'no unlink failure message'); ok(!-d $d, "`404' dir not created"); lei_ok('ls-external'); unlike($lei_out, qr!\Q$d\E!s, 'not added to ls-external'); $d = "$home/bad-epoch"; ok(!lei(qw(add-external -q --epoch=0.. --mirror), "$http/t1/", $d), 'v1 fails on --epoch'); ok(!-d $d, 'destination not created on unacceptable --epoch'); ok(!lei(qw(add-external -q --epoch=1 --mirror), "$http/t2/", $d), 'v2 fails on bad epoch range'); ok(!-d $d, 'destination not created on bad epoch'); my %phail = ( HTTPS => 'https://public-inbox.org/' . 'phail', ONION => 'http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/' . 'phail,' ); for my $t (qw(HTTPS ONION)) { SKIP: { my $k = "TEST_LEI_EXTERNAL_$t"; $ENV{$k} or skip "$k unset", 1; my $url = $phail{$t}; my $dir = "phail-$t"; ok(!lei(qw(add-external -Lmedium --mirror), $url, $dir), '--mirror non-existent v2'); is($? >> 8, 22, 'curl 404'); ok(!-d $dir, 'directory not created'); unlike($lei_err, qr/# mirrored/, 'no success message'); like($lei_err, qr/curl.*404/, "curl 404 shown for $k"); } # SKIP } # for }); SKIP: { undef $sock; my $d = "$tmpdir/d"; mkdir $d or xbail "mkdir $d $!"; my $opt = { -C => $d, 2 => \(my $err) }; ok(!run_script([qw(-clone -q), "$http/404"], undef, $opt), '404 fails'); ok(!-d "$d/404", 'destination not created'); ok(run_script([qw(-clone -q -C), $d, "$http/t2"], undef, $opt), '-clone succeeds on v2'); ok(-f "$d/t2/git/0.git/config", 'epoch cloned'); # writeBitmaps is the default for bare repos in git 2.22+, # so we may stop setting it ourselves. 0 and is(xqx(['git', "--git-dir=$d/t2/git/0.git", 'config', qw(--bool repack.writeBitmaps)]), "true\n", 'write bitmaps set (via include.path=all.git/config'); is(xqx(['git', "--git-dir=$d/t2/git/0.git", 'config', qw(include.path)]), "../../all.git/config\n", 'include.path set'); ok(-s "$d/t2/all.git/objects/info/alternates", 'all.git alternates created'); ok(-f "$d/t2/manifest.js.gz", 'manifest saved'); ok(!-e "$d/t2/mirror.done", 'no leftover mirror.done'); ok(!run_script([qw(-fetch --exit-code -C), "$d/t2"], undef, $opt), '-fetch succeeds w/ manifest.js.gz'); is($? >> 8, 127, '--exit-code gave 127'); unlike($err, qr/git --git-dir=\S+ fetch/, 'no fetch done w/ manifest'); unlink("$d/t2/manifest.js.gz") or xbail "unlink $!"; ok(!run_script([qw(-fetch --exit-code -C), "$d/t2"], undef, $opt), '-fetch succeeds w/o manifest.js.gz'); is($? >> 8, 127, '--exit-code gave 127'); like($err, qr/git --git-dir=\S+ fetch/, 'fetch forced w/o manifest'); ok(run_script([qw(-clone -q -C), $d, "$http/t1"], undef, $opt), 'cloning v1 works'); ok(-d "$d/t1", 'v1 cloned'); ok(!-e "$d/t1/mirror.done", 'no leftover file'); ok(-f "$d/t1/manifest.js.gz", 'manifest saved'); ok(!run_script([qw(-fetch --exit-code -C), "$d/t1"], undef, $opt), 'fetching v1 works'); is($? >> 8, 127, '--exit-code gave 127'); unlike($err, qr/git --git-dir=\S+ fetch/, 'no fetch done w/ manifest'); unlink("$d/t1/manifest.js.gz") or xbail "unlink $!"; my $before = [ glob("$d/t1/*") ]; ok(!run_script([qw(-fetch --exit-code -C), "$d/t1"], undef, $opt), 'fetching v1 works w/o manifest.js.gz'); is($? >> 8, 127, '--exit-code gave 127'); unlink("$d/t1/FETCH_HEAD"); # git internal like($err, qr/git --git-dir=\S+ fetch/, 'no fetch done w/ manifest'); ok(unlink("$d/t1/manifest.js.gz"), 'manifest created'); my $after = [ glob("$d/t1/*") ]; is_deeply($before, $after, 'no new files created'); local $ENV{HOME} = $tmpdir; ok(run_script([qw(-index -Lbasic), "$d/t1"]), 'index v1'); ok(run_script([qw(-index -Lbasic), "$d/t2"]), 'index v2'); SKIP: { join('', sort(keys %created)) eq 'v1v2' or skip "lei didn't run", 2; my $f = "$d/t1/public-inbox/msgmap.sqlite3"; my $ca = PublicInbox::Msgmap->new_file($f)->created_at; is($ca, $created{v1}, 'clone + index v1 synced ->created_at'); $f = "$d/t2/msgmap.sqlite3"; $ca = PublicInbox::Msgmap->new_file($f)->created_at; is($ca, $created{v2}, 'clone + index v2 synced ->created_at'); } test_lei(sub { lei_ok qw(inspect num:1 --dir), "$d/t1"; ok(ref(json_utf8->decode($lei_out)), 'inspect num: on v1'); lei_ok qw(inspect num:1 --dir), "$d/t2"; ok(ref(json_utf8->decode($lei_out)), 'inspect num: on v2'); }); } ok($td->kill, 'killed -httpd'); $td->join; { require_ok 'PublicInbox::LeiMirror'; my $mrr = { src => 'https://example.com/src/', dst => $tmpdir }; my $exp = "mirror of https://example.com/src/\n"; my $f = "$tmpdir/description"; PublicInbox::LeiMirror::set_description($mrr); is(PublicInbox::Inbox::try_cat($f), $exp, 'description set on ENOENT'); my $fh; (open($fh, '>', $f) and close($fh)) or xbail $!; PublicInbox::LeiMirror::set_description($mrr); is(PublicInbox::Inbox::try_cat($f), $exp, 'description set on empty'); (open($fh, '>', $f) and print $fh "x\n" and close($fh)) or xbail $!; is(PublicInbox::Inbox::try_cat($f), "x\n", 'description preserved if non-default'); } done_testing; public-inbox-1.9.0/t/lei-p2q.t000066400000000000000000000030441430031475700160220ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_git 2.6; require_mods(qw(json DBD::SQLite Search::Xapian)); test_lei(sub { ok(!lei(qw(p2q this-better-cause-format-patch-to-fail)), 'p2q fails on bogus arg') or diag $lei_err; like($lei_err, qr/format-patch.*failed/, 'notes format-patch failure'); lei_ok(qw(p2q -w dfpost t/data/0001.patch)); is($lei_out, "dfpost:6e006fd73b1d\n", 'pathname') or diag $lei_err; open my $fh, '+<', 't/data/0001.patch' or xbail "open: $!"; lei_ok([qw(p2q -w dfpost -)], undef, { %$lei_opt, 0 => $fh }); is($lei_out, "dfpost:6e006fd73b1d\n", '--stdin') or diag $lei_err; sysseek($fh, 0, 0) or xbail "lseek: $!"; lei_ok([qw(p2q -w dfpost)], undef, { %$lei_opt, 0 => $fh }); is($lei_out, "dfpost:6e006fd73b1d\n", 'implicit --stdin'); lei_ok(qw(p2q --uri t/data/0001.patch -w), 'dfpost,dfn'); is($lei_out, "dfpost%3A6e006fd73b1d+". "dfn%3Alib%2FPublicInbox%2FSearch.pm\n", '--uri -w dfpost,dfn'); lei_ok(qw(p2q t/data/0001.patch), '--want=dfpost,OR,dfn'); is($lei_out, "dfpost:6e006fd73b1d OR dfn:lib/PublicInbox/Search.pm\n", '--want=OR'); lei_ok(qw(p2q t/data/0001.patch --want=dfpost9)); is($lei_out, "dfpost:6e006fd73b1d OR " . "dfpost:6e006fd73b1 OR " . "dfpost:6e006fd73b OR " . "dfpost:6e006fd73\n", '3-byte chop'); lei_ok(qw(p2q t/data/message_embed.eml --want=dfb)); like($lei_out, qr/\bdfb:\S+/, 'got dfb off /dev/null file'); }); done_testing; public-inbox-1.9.0/t/lei-q-kw.t000066400000000000000000000214401430031475700161770ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use POSIX qw(mkfifo); use Fcntl qw(SEEK_SET O_RDONLY O_NONBLOCK); use IO::Uncompress::Gunzip qw(gunzip); use IO::Compress::Gzip qw(gzip); use PublicInbox::MboxReader; use PublicInbox::LeiToMail; use PublicInbox::Spawn qw(popen_rd); my $exp = { '' => eml_load('t/plack-qp.eml'), '' => eml_load('t/utf8.eml'), }; $exp->{''}->header_set('Status', 'RO'); test_lei(sub { lei_ok(qw(import -F eml t/plack-qp.eml)); my $o = "$ENV{HOME}/dst"; lei_ok(qw(q -o), "maildir:$o", qw(m:qp@example.com)); my @fn = glob("$o/cur/*:2,"); scalar(@fn) == 1 or xbail $lei_err, 'wrote multiple or zero files:', \@fn; rename($fn[0], "$fn[0]S") or BAIL_OUT "rename $!"; lei_ok(qw(q -o), "maildir:$o", qw(m:bogus-noresults@example.com)); ok(!glob("$o/cur/*"), 'last result cleared after augment-import'); lei_ok(qw(q -o), "maildir:$o", qw(m:qp@example.com)); @fn = glob("$o/cur/*:2,S"); is(scalar(@fn), 1, "`seen' flag set on Maildir file") or diag "$o contents: ", explain([glob("$o/*/*")]); # ensure --no-import-before works my $n = $fn[0]; $n =~ s/,S\z/,RS/; rename($fn[0], $n) or BAIL_OUT "rename $!"; lei_ok(qw(q --no-import-before -o), "maildir:$o", qw(m:bogus-noresults@example.com)); ok(!glob("$o/cur/*"), '--no-import-before cleared destination'); lei_ok(qw(q -o), "maildir:$o", qw(m:qp@example.com)); @fn = glob("$o/cur/*:2,S"); is(scalar(@fn), 1, "`seen' flag (but not `replied') set on Maildir file"); SKIP: { $o = "$ENV{HOME}/fifo"; mkfifo($o, 0600) or skip("mkfifo not supported: $!", 1); # cat(1) since lei() may not execve for FD_CLOEXEC to work my $cat = popen_rd(['cat', $o]); ok(!lei(qw(q --import-before bogus -o), "mboxrd:$o"), '--import-before fails on non-seekable output'); like($lei_err, qr/not seekable/, 'unseekable noted in error'); is(do { local $/; <$cat> }, '', 'no output on FIFO'); close $cat; $cat = popen_rd(['cat', $o]); lei_ok(qw(q m:qp@example.com -o), "mboxrd:$o"); my $buf = do { local $/; <$cat> }; open my $fh, '<', \$buf or BAIL_OUT $!; PublicInbox::MboxReader->mboxrd($fh, sub { my ($eml) = @_; $eml->header_set('Status', 'RO'); is_deeply($eml, $exp->{''}, 'FIFO output works as expected'); }); }; lei_ok qw(import -F eml t/utf8.eml), \'for augment test'; my $read_file = sub { if ($_[0] =~ /\.gz\z/) { gunzip($_[0] => \(my $buf = ''), MultiStream => 1) or BAIL_OUT 'gunzip'; $buf; } else { open my $fh, '+<', $_[0] or BAIL_OUT $!; do { local $/; <$fh> }; } }; my $write_file = sub { if ($_[0] =~ /\.gz\z/) { gzip(\($_[1]), $_[0]) or BAIL_OUT 'gzip'; } else { open my $fh, '>', $_[0] or BAIL_OUT $!; print $fh $_[1] or BAIL_OUT $!; close $fh or BAIL_OUT; } }; for my $sfx ('', '.gz') { $o = "$ENV{HOME}/dst.mboxrd$sfx"; lei_ok(qw(q -o), "mboxrd:$o", qw(m:qp@example.com)); my $buf = $read_file->($o); $buf =~ s/^Status: [^\n]*\n//sm or BAIL_OUT "no status in $buf"; $write_file->($o, $buf); lei_ok(qw(q -o), "mboxrd:$o", qw(rereadandimportkwchange)); $buf = $read_file->($o); is($buf, '', 'emptied'); lei_ok(qw(q -o), "mboxrd:$o", qw(m:qp@example.com)); $buf = $read_file->($o); $buf =~ s/\nStatus: O\n\n/\nStatus: RO\n\n/s or BAIL_OUT "no Status in $buf"; $write_file->($o, $buf); lei_ok(qw(q -a -o), "mboxrd:$o", qw(m:testmessage@example.com)); $buf = $read_file->($o); open my $fh, '<', \$buf or BAIL_OUT "PerlIO::scalar $!"; my %res; PublicInbox::MboxReader->mboxrd($fh, sub { my ($eml) = @_; my $mid = $eml->header_raw('Message-ID'); if ($mid eq '') { is_deeply([$eml->header('Status')], [], "no status $sfx"); $eml->header_set('Status'); } elsif ($mid eq '') { is($eml->header('Status'), 'RO', 'status preserved'); } else { fail("unknown mid $mid"); } $res{$mid} = $eml; }); is_deeply(\%res, $exp, '--augment worked') or diag $lei_err; lei_ok(qw(q -o), "mboxrd:/dev/stdout", qw(m:qp@example.com)) or diag $lei_err; like($lei_out, qr/^Status: RO\n/sm, 'Status set by previous augment'); } # /mbox + mbox.gz tests my ($ro_home, $cfg_path) = setup_public_inboxes; # import keywords-only for external messages: $o = "$ENV{HOME}/kwdir"; my $m = 'alpine.DEB.2.20.1608131214070.4924@example'; my @inc = ('-I', "$ro_home/t1"); lei_ok(qw(q -o), $o, "m:$m", @inc); # emulate MUA marking a Maildir message as read: @fn = glob("$o/cur/*"); scalar(@fn) == 1 or xbail $lei_err, 'wrote multiple or zero files:', \@fn; rename($fn[0], "$fn[0]S") or BAIL_OUT "rename $!"; lei_ok(qw(q -o), $o, 'bogus', \'clobber output dir to import keywords'); @fn = glob("$o/cur/*"); is_deeply(\@fn, [], 'output dir actually clobbered'); lei_ok('q', "m:$m", @inc); my $res = json_utf8->decode($lei_out); is_deeply($res->[0]->{kw}, ['seen'], 'seen flag set for external message') or diag explain($res); lei_ok('q', "m:$m", '--no-external'); is_deeply($res = json_utf8->decode($lei_out), [ undef ], 'external message not imported') or diag explain($res); $o = "$ENV{HOME}/kwmboxrd"; lei_ok(qw(q -o), "mboxrd:$o", "m:$m", @inc); # emulate MUA marking mboxrd message as unread open my $fh, '<', $o or BAIL_OUT; my $s = do { local $/; <$fh> }; $s =~ s/^Status: RO\n/Status: O\nX-Status: AF\n/sm or fail "failed to clear R flag in $s"; open $fh, '>', $o or BAIL_OUT; print $fh $s or BAIL_OUT; close $fh or BAIL_OUT; lei_ok(qw(q -o), "mboxrd:$o", 'm:bogus', @inc, \'clobber mbox to import keywords'); lei_ok(qw(q -o), "mboxrd:$o", "m:$m", @inc); open $fh, '<', $o or BAIL_OUT; $s = do { local $/; <$fh> }; like($s, qr/^Status: O\nX-Status: AF\n/ms, 'seen keyword gone in mbox, answered + flagged set'); lei_ok(qw(q --pretty), "m:$m", @inc); like($lei_out, qr/^ "kw": \["answered", "flagged"\],\n/sm, '--pretty JSON output shows kw: on one line'); # ensure import on previously external-only message works lei_ok('q', "m:$m"); is_deeply(json_utf8->decode($lei_out), [ undef ], 'to-be-imported message non-existent'); lei_ok(qw(import -F eml t/x-unknown-alpine.eml)); is($lei_err, '', 'no errors importing previous external-only message'); lei_ok('q', "m:$m"); $res = json_utf8->decode($lei_out); is($res->[1], undef, 'got one result'); is_deeply($res->[0]->{kw}, [ qw(answered flagged) ], 'kw preserved on exact'); # ensure fuzzy match import works, too $m = 'multipart@example.com'; $o = "$ENV{HOME}/fuzz"; lei_ok('q', '-o', $o, "m:$m", @inc); @fn = glob("$o/cur/*"); scalar(@fn) == 1 or xbail $lei_err, "wrote multiple or zero files", \@fn; rename($fn[0], "$fn[0]S") or BAIL_OUT "rename $!"; lei_ok('q', '-o', $o, "m:$m"); is_deeply([glob("$o/cur/*")], [], 'clobbered output results'); my $eml = eml_load('t/plack-2-txt-bodies.eml'); $eml->header_set('List-Id', ''); my $in = $eml->as_string; lei_ok([qw(import -F eml --stdin)], undef, { 0 => \$in, %$lei_opt }); is($lei_err, '', 'no errors from import'); lei_ok(qw(q -f mboxrd), "m:$m"); open $fh, '<', \$lei_out or BAIL_OUT $!; my @res; PublicInbox::MboxReader->mboxrd($fh, sub { push @res, shift }); is($res[0]->header('Status'), 'RO', 'seen kw set'); $res[0]->header_set('Status'); is_deeply(\@res, [ $eml ], 'imported message matches w/ List-Id'); $eml->header_set('List-Id', ''); $in = $eml->as_string; lei_ok([qw(import -F eml --stdin)], undef, { 0 => \$in, %$lei_opt }); is($lei_err, '', 'no errors from 2nd import'); lei_ok(qw(q -f mboxrd), "m:$m", 'l:another.example.com'); my @another; open $fh, '<', \$lei_out or BAIL_OUT $!; PublicInbox::MboxReader->mboxrd($fh, sub { push @another, shift }); is($another[0]->header('Status'), 'RO', 'seen kw set'); # forwarded { local $ENV{DBG} = 1; $o = "$ENV{HOME}/forwarded"; lei_ok(qw(q -o), $o, "m:$m"); my @p = glob("$o/cur/*"); scalar(@p) == 1 or xbail('multiple when 1 expected', \@p); my $passed = $p[0]; $passed =~ s/,S\z/,PS/ or xbail "failed to replace $passed"; rename($p[0], $passed) or xbail "rename $!"; lei_ok(qw(q -o), $o, 'm:bogus', \'clobber maildir'); is_deeply([glob("$o/cur/*")], [], 'old results clobbered'); lei_ok(qw(q -o), $o, "m:$m"); @p = glob("$o/cur/*"); scalar(@p) == 1 or xbail('multiple when 1 expected', \@p); like($p[0], qr/,PS/, 'passed (Forwarded) flag kept'); lei_ok(qw(q -o), "mboxrd:$o.mboxrd", "m:$m"); open $fh, '<', "$o.mboxrd" or xbail $!; my @res; PublicInbox::MboxReader->mboxrd($fh, sub { push @res, shift }); scalar(@res) == 1 or xbail('multiple when 1 expected', \@res); is($res[0]->header('Status'), 'RO', 'seen kw set'); is($res[0]->header('X-Status'), undef, 'no X-Status'); lei_ok(qw(q -o), "mboxrd:$o.mboxrd", 'bogus-for-import-before'); lei_ok(qw(q -o), $o, "m:$m"); @p = glob("$o/cur/*"); scalar(@p) == 1 or xbail('multiple when 1 expected', \@p); like($p[0], qr/,PS/, 'passed (Forwarded) flag still kept'); } }); # test_lei done_testing; public-inbox-1.9.0/t/lei-q-remote-import.t000066400000000000000000000076301430031475700203660ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods(qw(lei -httpd)); require_cmd 'curl'; use PublicInbox::MboxReader; my ($ro_home, $cfg_path) = setup_public_inboxes; my $sock = tcp_server; my ($tmpdir, $for_destroy) = tmpdir; my $cmd = [ '-httpd', '-W0', "--stdout=$tmpdir/1", "--stderr=$tmpdir/2" ]; my $env = { PI_CONFIG => $cfg_path }; my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-httpd: $?"); my $host_port = tcp_host_port($sock); my $url = "http://$host_port/t2/"; my $exp1 = [ eml_load('t/plack-qp.eml') ]; my $exp2 = [ eml_load('t/iso-2202-jp.eml') ]; my $slurp_emls = sub { open my $fh, '<', $_[0] or BAIL_OUT "open: $!"; my @eml; PublicInbox::MboxReader->mboxrd($fh, sub { my $eml = shift; $eml->header_set('Status'); push @eml, $eml; }); \@eml; }; test_lei({ tmpdir => $tmpdir }, sub { my $o = "$ENV{HOME}/o.mboxrd"; my @cmd = ('q', '-o', "mboxrd:$o", 'm:qp@example.com'); lei_ok(@cmd); ok(-f $o && !-s _, 'output exists but is empty'); unlink $o or BAIL_OUT $!; lei_ok(@cmd, '-I', $url); is_deeply($slurp_emls->($o), $exp1, 'got results after remote search'); unlink $o or BAIL_OUT $!; lei_ok(@cmd); ok(-f $o && -s _, 'output exists after import but is not empty') or diag $lei_err; is_deeply($slurp_emls->($o), $exp1, 'got results w/o remote search'); unlink $o or BAIL_OUT $!; $cmd[-1] = 'm:199707281508.AAA24167@hoyogw.example'; lei_ok(@cmd, '-I', $url, '--no-import-remote'); is_deeply($slurp_emls->($o), $exp2, 'got another after remote search'); unlink $o or BAIL_OUT $!; lei_ok(@cmd); ok(-f $o && !-s _, '--no-import-remote did not memoize'); open my $fh, '>', "$o.lock"; $cmd[-1] = 'm:qp@example.com'; unlink $o or xbail("unlink $o $! cwd=".Cwd::getcwd()); lei_ok(@cmd, '--lock=none'); ok(-f $o && -s _, '--lock=none respected') or diag $lei_err; unlink $o or xbail("unlink $o $! cwd=".Cwd::getcwd()); ok(!lei(@cmd, '--lock=dotlock,timeout=0.000001'), 'dotlock fails'); like($lei_err, qr/dotlock timeout/, 'timeout noted'); ok(-f $o && !-s _, 'nothing output on lock failure'); unlink "$o.lock" or BAIL_OUT $!; lei_ok(@cmd, '--lock=dotlock,timeout=0.000001', \'succeeds after lock removal'); my $ibx = create_inbox 'local-external', indexlevel => 'medium', sub { my ($im) = @_; $im->add(eml_load('t/utf8.eml')) or BAIL_OUT '->add'; }; lei_ok(qw(add-external -q), $ibx->{inboxdir}); lei_ok(qw(q -q -o), "mboxrd:$o", '--only', $url, 'm:testmessage@example.com'); is($lei_err, '', 'no warnings or errors'); ok(-s $o, 'got result from remote external'); my $exp = eml_load('t/utf8.eml'); is_deeply($slurp_emls->($o), [$exp], 'got expected result'); lei_ok(qw(q --no-external -o), "mboxrd:/dev/stdout", 'm:testmessage@example.com'); is($lei_out, '', 'message not imported when in local external'); open $fh, '>', $o or BAIL_OUT; print $fh <<'EOF' or BAIL_OUT; From a@z Mon Sep 17 00:00:00 2001 From: nobody@localhost Date: Sat, 13 Mar 2021 18:23:01 +0600 Message-ID: Status: OR whatever EOF close $fh or BAIL_OUT; lei_ok(qw(q -o), "mboxrd:$o", 'm:testmessage@example.com'); is_deeply($slurp_emls->($o), [$exp], 'got expected result after clobber') or diag $lei_err; lei_ok(qw(q -o mboxrd:/dev/stdout m:never-before-seen@example.com)); like($lei_out, qr/seen\@example\.com>\nStatus: RO\n\nwhatever/sm, '--import-before imported totally unseen message'); lei_ok(qw(q --save z:0.. -o), "$ENV{HOME}/md", '--only', $url); my @f = glob("$ENV{HOME}/md/*/*"); lei_ok('up', "$ENV{HOME}/md"); is_deeply(\@f, [ glob("$ENV{HOME}/md/*/*") ], 'lei up remote dedupe works on maildir'); my $edit_env = { VISUAL => 'cat', EDITOR => 'cat' }; lei_ok([qw(edit-search), "$ENV{HOME}/md"], $edit_env); like($lei_out, qr/^\Q[external "$url"]\E\n\s*lastresult = \d+/sm, 'lastresult set'); }); done_testing; public-inbox-1.9.0/t/lei-q-save.t000066400000000000000000000254601430031475700165220ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Smsg; use List::Util qw(sum); use File::Path qw(remove_tree); my $doc1 = eml_load('t/plack-qp.eml'); $doc1->header_set('Date', PublicInbox::Smsg::date({ds => time - (86400 * 5)})); my $doc2 = eml_load('t/utf8.eml'); $doc2->header_set('Date', PublicInbox::Smsg::date({ds => time - (86400 * 4)})); my $doc3 = eml_load('t/msg_iter-order.eml'); $doc3->header_set('Date', PublicInbox::Smsg::date({ds => time - (86400 * 4)})); my $pre_existing = <<'EOF'; From x Mon Sep 17 00:00:00 2001 Message-ID: Subject: pre-existing Date: Sat, 02 Oct 2010 00:00:00 +0000 blah EOF test_lei(sub { my $home = $ENV{HOME}; my $in = $doc1->as_string; lei_ok [qw(import -q -F eml -)], undef, { 0 => \$in, %$lei_opt }; lei_ok qw(q -q z:0.. d:last.week..), '-o', "MAILDIR:$home/md/"; my %before = map { $_ => 1 } glob("$home/md/cur/*"); my $f = (keys %before)[0] or xbail({before => \%before}); is_deeply(eml_load($f), $doc1, 'doc1 matches'); lei_ok qw(ls-mail-sync); is($lei_out, "maildir:$home/md\n", 'canonicalized mail sync name'); my @s = glob("$home/.local/share/lei/saved-searches/md-*"); is(scalar(@s), 1, 'got one saved search'); my $cfg = PublicInbox::Config->new("$s[0]/lei.saved-search"); is($cfg->{'lei.q.output'}, "maildir:$home/md", 'canonicalized output'); is_deeply($cfg->{'lei.q'}, ['z:0..', 'd:last.week..'], 'store relative time, not parsed (absolute) timestamp'); # ensure "lei up" works, since it compliments "lei q --save" $in = $doc2->as_string; lei_ok [qw(import -q -F eml -)], undef, { 0 => \$in, %$lei_opt }; lei_ok qw(up -q md -C), $home; lei_ok qw(up -q . -C), "$home/md"; lei_ok qw(up -q), "/$home/md"; my %after = map { $_ => 1 } glob("$home/md/{new,cur}/*"); is(delete $after{(keys(%before))[0]}, 1, 'original message kept'); is(scalar(keys %after), 1, 'one new message added'); $f = (keys %after)[0] or xbail({after => \%after}); is_deeply(eml_load($f), $doc2, 'doc2 matches'); # check stdin lei_ok [qw(q - -o), "mboxcl2:mbcl2" ], undef, { -C => $home, %$lei_opt, 0 => \'d:last.week..'}; @s = glob("$home/.local/share/lei/saved-searches/mbcl2-*"); $cfg = PublicInbox::Config->new("$s[0]/lei.saved-search"); is_deeply $cfg->{'lei.q'}, 'd:last.week..', 'q --stdin stores relative time'; my $size = -s "$home/mbcl2"; ok(defined($size) && $size > 0, 'results written'); lei_ok([qw(up mbcl2)], undef, { -C => $home, %$lei_opt }); is(-s "$home/mbcl2", $size, 'size unchanged on noop up'); $in = $doc3->as_string; lei_ok [qw(import -q -F eml -)], undef, { 0 => \$in, %$lei_opt }; lei_ok([qw(up mbcl2)], undef, { -C => $home, %$lei_opt }); ok(-s "$home/mbcl2" > $size, 'size increased after up'); ok(!lei(qw(up -q), $home), 'up fails on unknown dir'); like($lei_err, qr/--no-save was used/, 'error noted --no-save'); lei_ok(qw(q --no-save d:last.week.. -q -o), "$home/no-save"); ok(!lei(qw(up -q), "$home/no-save"), 'up fails on --no-save'); like($lei_err, qr/--no-save was used/, 'error noted --no-save'); lei_ok qw(ls-search); my @d = split(/\n/, $lei_out); lei_ok qw(ls-search -z); my @z = split(/\0/, $lei_out); is_deeply(\@d, \@z, '-z output matches non-z'); is_deeply(\@d, [ "$home/mbcl2", "$home/md" ], 'ls-search output alphabetically sorted'); lei_ok qw(ls-search -l); my $json = PublicInbox::Config->json->decode($lei_out); ok($json && $json->[0]->{output}, 'JSON has output'); lei_ok qw(_complete lei up); like($lei_out, qr!^\Q$home/mbcl2\E$!sm, 'complete got mbcl2 output'); like($lei_out, qr!^\Q$home/md\E$!sm, 'complete got maildir output'); unlink("$home/mbcl2") or xbail "unlink $!"; lei_ok qw(_complete lei up); like($lei_out, qr!^\Q$home/mbcl2\E$!sm, 'mbcl2 output shown despite unlink'); lei_ok([qw(up mbcl2)], undef, { -C => $home, %$lei_opt }); ok(-f "$home/mbcl2" && -s _ == 0, 'up recreates on missing output'); # no --augment open my $mb, '>', "$home/mbrd" or xbail "open $!"; print $mb $pre_existing; close $mb or xbail "close: $!"; lei_ok(qw(q -o mboxrd:mbrd m:qp@example.com -C), $home); open $mb, '<', "$home/mbrd" or xbail "open $!"; is_deeply([grep(/pre-existing/, <$mb>)], [], 'pre-existing messsage gone w/o augment'); close $mb; lei_ok(qw(q m:import-before@example.com)); is(json_utf8->decode($lei_out)->[0]->{'s'}, 'pre-existing', '--save imported before clobbering'); # --augment open $mb, '>', "$home/mbrd-aug" or xbail "open $!"; print $mb $pre_existing; close $mb or xbail "close: $!"; lei_ok(qw(q -a -o mboxrd:mbrd-aug m:qp@example.com -C), $home); open $mb, '<', "$home/mbrd-aug" or xbail "open $!"; $mb = do { local $/; <$mb> }; like($mb, qr/pre-existing/, 'pre-existing message preserved w/ -a'); like($mb, qr//, 'new result written w/ -a'); lei_ok(qw(up --all=local)); ok(!lei(qw(forget-search), "$home/bogus"), 'bogus forget'); like($lei_err, qr/--save was not used/, 'error noted --save'); lei_ok qw(_complete lei forget-search); like($lei_out, qr/mbrd-aug/, 'forget-search completion'); lei_ok(qw(forget-search -v), "$home/mbrd-aug"); is($lei_out, '', 'no output'); like($lei_err, qr/\bmbrd-aug\b/, '-v (verbose) reported unlinks'); lei_ok qw(_complete lei forget-search); unlike($lei_out, qr/mbrd-aug/, 'forget-search completion cleared after forget'); ok(!lei('up', "$home/mbrd-aug"), 'lei up fails after forget'); like($lei_err, qr/--no-save was used/, 'error noted --no-save'); # dedupe=mid my $o = "$home/dd-mid"; $in = $doc2->as_string . "\n-------\nappended list sig\n"; lei_ok [qw(import -q -F eml -)], undef, { 0 => \$in, %$lei_opt }; lei_ok(qw(q --dedupe=mid m:testmessage@example.com -o), $o); my @m = glob("$o/cur/*"); is(scalar(@m), 1, '--dedupe=mid w/ --save'); $in = $doc2->as_string . "\n-------\nanother list sig\n"; lei_ok [qw(import -q -F eml -)], undef, { 0 => \$in, %$lei_opt }; lei_ok 'up', $o; is_deeply([glob("$o/cur/*")], \@m, 'lei up dedupe=mid works'); for my $dd (qw(content)) { $o = "$home/dd-$dd"; lei_ok(qw(q m:testmessage@example.com -o), $o, "--dedupe=$dd"); @m = glob("$o/cur/*"); is(scalar(@m), 3, 'all 3 matches with dedupe='.$dd); } # dedupe=oid $o = "$home/dd-oid"; my $ibx = create_inbox 'ibx', indexlevel => 'medium', tmpdir => "$home/v1", sub {}; lei_ok(qw(q --dedupe=oid m:qp@example.com -o), $o, '-I', $ibx->{inboxdir}); @m = glob("$o/cur/*"); is(scalar(@m), 1, 'got first result'); my $im = $ibx->importer(0); my $diff = "X-Insignificant-Header: x\n".$doc1->as_string; $im->add(PublicInbox::Eml->new($diff)); $im->done; lei_ok('up', $o); @m = glob("$o/{new,cur}/*"); is(scalar(@m), 2, 'got 2nd result due to different OID'); SKIP: { symlink($o, "$home/ln -s") or skip "symlinks not supported in $home?: $!", 1; lei_ok('up', "$home/ln -s"); lei_ok('forget-search', "$home/ln -s"); }; my $v2 = "$home/v2"; # v2: as an output destination my (@before, @after); require PublicInbox::MboxReader; lei_ok(qw(q z:0.. -o), "v2:$v2"); like($lei_err, qr/^# ([1-9][0-9]*) written to \Q$v2\E/sm, 'non-zero write output to stderr'); lei_ok(qw(q z:0.. -o), "mboxrd:$home/before", '--only', $v2, '-j1,1'); open my $fh, '<', "$home/before"; PublicInbox::MboxReader->mboxrd($fh, sub { push @before, $_[0] }); isnt(scalar(@before), 0, 'initial v2 written'); my $orig = sum(map { -f $_ ? -s _ : () } ( glob("$v2/git/0.git/objects/*/*"))); lei_ok(qw(import t/data/0001.patch)); lei_ok 'up', $v2; lei_ok(qw(q z:0.. -o), "mboxrd:$home/after", '--only', $v2, '-j1,1'); open $fh, '<', "$home/after"; PublicInbox::MboxReader->mboxrd($fh, sub { push @after, $_[0] }); my $last = shift @after; $last->header_set('Status'); is_deeply($last, eml_load('t/data/0001.patch'), 'lei up worked on v2'); is_deeply(\@before, \@after, 'got same results'); my $v2s = "$home/v2s"; lei_ok(qw(q --shared z:0.. -o), "v2:$v2s"); my $shared = sum(map { -f $_ ? -s _ : () } ( glob("$v2s/git/0.git/objects/*/*"))); ok($shared < $orig, 'fewer bytes stored with --shared') or diag "shared=$shared orig=$orig"; lei_ok([qw(edit-search), $v2s], { VISUAL => 'cat', EDITOR => 'cat' }); like($lei_out, qr/^\[lei/sm, 'edit-search can cat'); lei_ok('-C', "$home/v2s", qw(q -q -o ../s m:testmessage@example.com)); lei_ok qw(ls-search); unlike $lei_out, qr{/\.\./s$}sm, 'relative path not in ls-search'; like $lei_out, qr{^\Q$home\E/s$}sm, 'absolute path appears in ls-search'; lei_ok qw(up ../s -C), "$home/v2s", \'relative lei up'; lei_ok qw(up), "$home/s", \'absolute lei up'; # mess up a config file my @lss = glob("$home/" . '.local/share/lei/saved-searches/*/lei.saved-search'); my $out = xqx([qw(git config -f), $lss[0], 'lei.q.output']); xsys($^X, qw(-i -p -e), "s/\\[/\\0/", $lss[0]) and xbail "-ipe $lss[0]: $?"; lei_ok qw(ls-search); like($lei_err, qr/bad config line.*?\Q$lss[0]\E/, 'git config parse error shown w/ lei ls-search'); lei_ok qw(up --all), \'up works with bad config'; like($lei_err, qr/bad config line.*?\Q$lss[0]\E/, 'git config parse error shown w/ lei up'); xsys($^X, qw(-i -p -e), "s/\\0/\\[/", $lss[0]) and xbail "-ipe $lss[0]: $?"; lei_ok qw(ls-search); is($lei_err, '', 'no errors w/ fixed config'); like($lei_out, qr!\Q$home/after\E!, "`after' in ls-search"); remove_tree("$home/after"); lei_ok qw(forget-search --prune); lei_ok qw(ls-search); unlike($lei_out, qr!\Q$home/after\E!, "`after' pruned"); my $d = "$home/d"; lei_ok [qw(import -q -F eml)], undef, {0 => \"Subject: do not call\n\n"}; lei_ok qw(q -o), $d, 's:do not call'; my @orig = glob("$d/*/*"); is(scalar(@orig), 1, 'got one message via argv'); lei_ok [qw(import -q -Feml)], undef, {0 => \"Subject: do not ever call\n\n"}; lei_ok 'up', $d; is_deeply([glob("$d/*/*")], \@orig, 'nothing written'); lei_ok [qw(import -q -Feml)], undef, {0 => \"Subject: do not call, ever\n\n"}; lei_ok 'up', $d; @after = glob("$d/*/*"); is(scalar(@after), 2, '2 total, messages, now'); is_deeply([glob("$d/cur/*")], \@orig, 'cur untouched'); my @new = glob("$d/new/*"); is(scalar(@new), 1, "new message written to `new'"); is(eml_load($new[0])->header('Subject'), 'do not call, ever', 'up retrieved correct message'); $d = "$home/d-stdin"; lei_ok [ qw(q -q -o), $d ], undef, { 0 => \'s:"do not ever call"' }; @orig = glob("$d/*/*"); is(scalar(@orig), 1, 'got one message via stdin'); lei_ok [qw(import -q -Feml)], undef, {0 => \"Subject: do not fall or ever call\n\n"}; lei_ok [qw(import -q -Feml)], undef, {0 => \"Subject: do not ever call, again\n\n"}; lei_ok 'up', $d; @new = glob("$d/new/*"); is(scalar(@new), 1, "new message written to `new'") or do { for (@new) { diag "$_ ".eml_load($_)->header('Subject') } }; is_deeply([glob("$d/cur/*")], \@orig, 'cur untouched'); is(eml_load($new[0])->header('Subject'), 'do not ever call, again', 'up retrieved correct message'); }); done_testing; public-inbox-1.9.0/t/lei-q-thread.t000066400000000000000000000043071430031475700170300ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_git 2.6; require_mods(qw(json DBD::SQLite Search::Xapian)); use PublicInbox::LeiToMail; my ($ro_home, $cfg_path) = setup_public_inboxes; test_lei(sub { my $eml = eml_load('t/utf8.eml'); my $buf = PublicInbox::LeiToMail::eml2mboxrd($eml, { kw => ['seen'] }); lei_ok([qw(import -F mboxrd -)], undef, { 0 => $buf, %$lei_opt }); lei_ok qw(q -t m:testmessage@example.com); my $res = json_utf8->decode($lei_out); is_deeply($res->[0]->{kw}, [ 'seen' ], 'q -t sets keywords') or diag explain($res); $eml = eml_load('t/utf8.eml'); $eml->header_set('References', $eml->header('Message-ID')); $eml->header_set('Message-ID', ''); $buf = PublicInbox::LeiToMail::eml2mboxrd($eml, { kw => ['draft'] }); lei_ok([qw(import -F mboxrd -)], undef, { 0 => $buf, %$lei_opt }); lei_ok([qw(q - -t)], undef, { 0 => \'m:testmessage@example.com', %$lei_opt }); $res = json_utf8->decode($lei_out); is(scalar(@$res), 3, 'got 2 results'); pop @$res; my %m = map { $_->{'m'} => $_ } @$res; is_deeply($m{'testmessage@example.com'}->{kw}, ['seen'], 'flag set in direct hit') or diag explain($res); is_deeply($m{'a-reply@miss'}->{kw}, ['draft'], 'flag set in thread hit') or diag explain($res); lei_ok qw(q -t -t m:testmessage@example.com); $res = json_utf8->decode($lei_out); is(scalar(@$res), 3, 'got 2 results with -t -t'); pop @$res; %m = map { $_->{'m'} => $_ } @$res; is_deeply($m{'testmessage@example.com'}->{kw}, ['flagged', 'seen'], 'flagged set in direct hit') or diag explain($res); is_deeply($m{'a-reply@miss'}->{kw}, ['draft'], 'set in thread hit') or diag explain($res); lei_ok qw(q -tt m:testmessage@example.com --only), "$ro_home/t2"; $res = json_utf8->decode($lei_out); is_deeply($res->[0]->{kw}, [ qw(flagged seen) ], 'flagged set on external with -tt') or diag explain($res); lei_ok qw(q -t m:testmessage@example.com --only), "$ro_home/t2"; $res = json_utf8->decode($lei_out); is_deeply($res->[0]->{kw}, [ 'seen' ], 'flagged not set on external with 1 -t') or diag explain($res); }); done_testing; public-inbox-1.9.0/t/lei-refresh-mail-sync.t000066400000000000000000000114661430031475700206570ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods(qw(lei)); use File::Path qw(remove_tree); require Socket; my $stop_daemon = sub { # needed since we don't have inotify lei_ok qw(daemon-pid); chomp(my $pid = $lei_out); $pid > 0 or xbail "bad pid: $pid"; kill('TERM', $pid) or xbail "kill: $!"; for (0..10) { tick; kill(0, $pid) or last; } kill(0, $pid) and xbail "daemon still running (PID:$pid)"; }; test_lei({ daemon_only => 1 }, sub { my $d = "$ENV{HOME}/d"; my ($ro_home, $cfg_path) = setup_public_inboxes; lei_ok qw(daemon-pid); lei_ok qw(add-external), "$ro_home/t2"; lei_ok qw(q mid:testmessage@example.com -o), "Maildir:$d"; my (@o) = glob("$d/*/*"); scalar(@o) == 1 or xbail('multiple results', \@o); my ($bn0) = ($o[0] =~ m!/([^/]+)\z!); my $oid = '9bf1002c49eb075df47247b74d69bcd555e23422'; lei_ok 'inspect', "blob:$oid"; my $before = json_utf8->decode($lei_out); my $exp0 = { 'mail-sync' => { "maildir:$d" => [ $bn0 ] } }; is_deeply($before, $exp0, 'inspect shows expected'); $stop_daemon->(); my $dst = $o[0]; $dst =~ s/:2,.*\z// and $dst =~ s!/cur/!/new/! and rename($o[0], $dst) or xbail "rename($o[0] => $dst): $!"; lei_ok 'inspect', "blob:$oid"; is_deeply(json_utf8->decode($lei_out), $before, 'inspect unchanged immediately after restart'); lei_ok 'refresh-mail-sync', '--all'; lei_ok 'inspect', "blob:$oid"; my ($bn1) = ($dst =~ m!/([^/]+)\z!); my $exp1 = { 'mail-sync' => { "maildir:$d" => [ $bn1 ] } }; is_deeply(json_utf8->decode($lei_out), $exp1, 'refresh-mail-sync updated location'); $stop_daemon->(); rename($dst, "$d/unwatched") or xbail "rename $dst out-of-the-way $!"; lei_ok 'refresh-mail-sync', $d; lei_ok 'inspect', "blob:$oid"; is($lei_out, '{}', 'no known locations after "removal"'); lei_ok 'refresh-mail-sync', "Maildir:$d"; $stop_daemon->(); rename("$d/unwatched", $dst) or xbail "rename $dst back"; lei_ok 'refresh-mail-sync', "Maildir:$d"; lei_ok 'inspect', "blob:$oid"; is_deeply(json_utf8->decode($lei_out), $exp1, 'replaced file noted again'); $stop_daemon->(); remove_tree($d); lei_ok 'refresh-mail-sync', '--all'; lei_ok 'inspect', "blob:$oid"; is($lei_out, '{}', 'no known locations after "removal"'); lei_ok 'ls-mail-sync'; is($lei_out, '', 'no sync left when folder is gone'); SKIP: { require_mods(qw(-imapd -nntpd Mail::IMAPClient Net::NNTP), 1); require File::Copy; # stdlib my $home = $ENV{HOME}; my $srv; my $cfg_path2 = "$home/cfg2"; File::Copy::cp($cfg_path, $cfg_path2); my $env = { PI_CONFIG => $cfg_path2 }; my $sock_cls; for my $x (qw(imapd)) { my $s = tcp_server; $sock_cls //= ref($s); my $cmd = [ "-$x", '-W0', "--stdout=$home/$x.out", "--stderr=$home/$x.err" ]; my $td = start_script($cmd, $env, { 3 => $s }) or xbail("-$x"); my $addr = tcp_host_port($s); $srv->{$x} = { addr => $addr, td => $td, cmd => $cmd, s => $s }; } my $url = "imap://$srv->{imapd}->{addr}/t.v1.0"; lei_ok 'import', $url, '+L:v1'; lei_ok 'inspect', "blob:$oid"; $before = json_utf8->decode($lei_out); my @f = grep(m!\Aimap://;AUTH=ANONYMOUS\@\Q$srv->{imapd}->{addr}\E!, keys %{$before->{'mail-sync'}}); is(scalar(@f), 1, 'got IMAP folder') or xbail(\@f); xsys([qw(git config), '-f', $cfg_path2, qw(--unset publicinbox.t1.newsgroup)]) and xbail "git config $?"; $stop_daemon->(); # drop IMAP IDLE $srv->{imapd}->{td}->kill('HUP'); tick; # wait for HUP lei_ok 'refresh-mail-sync', $url; lei_ok 'inspect', "blob:$oid"; my $after = json_utf8->decode($lei_out); ok(!$after->{'mail-sync'}, 'no sync info for non-existent mailbox'); lei_ok 'ls-mail-sync'; unlike $lei_out, qr!^\Q$f[0]\E!, 'IMAP folder gone from mail_sync'; # simulate server downtime $url = "imap://$srv->{imapd}->{addr}/t.v2.0"; lei_ok 'import', $url, '+L:v2'; lei_ok 'inspect', "blob:$oid"; $before = $lei_out; delete $srv->{imapd}->{td}; # kill + join daemon my $pid = fork // xbail "fork"; if ($pid == 0) { # dummy server to kill new connections $SIG{TERM} = sub { POSIX::_exit(0) }; $srv->{imapd}->{s}->blocking(1); while (1) { my $caddr = accept(my $c, $srv->{imapd}->{s}) // next; shutdown($c, 2); } POSIX::_exit(0); } my $ar = PublicInbox::AutoReap->new($pid); ok(!(lei 'refresh-mail-sync', $url), 'URL fails on dead -imapd'); ok(!(lei 'refresh-mail-sync', '--all'), '--all fails on dead -imapd'); $ar->kill for qw(avoid sig wake miss-no signalfd or EVFILT_SIG); $ar->join('TERM'); my $cmd = $srv->{imapd}->{cmd}; my $s = $srv->{imapd}->{s}; $s->blocking(0); $srv->{imapd}->{td} = start_script($cmd, $env, { 3 => $s }) or xbail "@$cmd"; lei_ok 'refresh-mail-sync', '--all'; lei_ok 'inspect', "blob:$oid"; is($lei_out, $before, 'no changes when server was down'); }; # imapd+nntpd stuff }); done_testing; public-inbox-1.9.0/t/lei-reindex.t000066400000000000000000000005731430031475700167620ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use v5.12; use PublicInbox::TestCommon; require_mods(qw(lei)); my ($tmpdir, $for_destroy) = tmpdir; test_lei(sub { ok(!lei('reindex'), 'reindex fails w/o store'); like $lei_err, qr/nothing indexed/, "`nothing indexed' noted"; }); done_testing; public-inbox-1.9.0/t/lei-sigpipe.t000066400000000000000000000036021430031475700167600ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use POSIX qw(WTERMSIG WIFSIGNALED SIGPIPE); use PublicInbox::OnDestroy; # undo systemd (and similar) ignoring SIGPIPE, since lei expects to be run # from an interactive terminal: # https://public-inbox.org/meta/20220227080422.gyqowrxomzu6gyin@sourcephile.fr/ my $oldSIGPIPE = $SIG{PIPE}; $SIG{PIPE} = 'DEFAULT'; my $cleanup = PublicInbox::OnDestroy->new($$, sub { $SIG{PIPE} = $oldSIGPIPE; }); test_lei(sub { my $f = "$ENV{HOME}/big.eml"; my $imported; for my $out ([], [qw(-f mboxcl2)], [qw(-f text)]) { pipe(my ($r, $w)) or BAIL_OUT $!; my $size = 65536; if ($^O eq 'linux' && fcntl($w, 1031, 4096)) { $size = 4096; } unless (-f $f) { open my $fh, '>', $f or xbail "open $f: $!"; print $fh <<'EOM' or xbail; From: big@example.com Message-ID: EOM print $fh 'Subject:'; print $fh (' '.('x' x 72)."\n") x (($size / 73) + 1); print $fh "\nbody\n"; close $fh or xbail "close: $!"; } lei_ok(qw(import), $f) if $imported++ == 0; open my $errfh, '+>>', "$ENV{HOME}/stderr.log" or xbail $!; my $opt = { run_mode => 0, 2 => $errfh, 1 => $w }; my $cmd = [qw(lei q -q -t), @$out, 'z:1..']; my $tp = start_script($cmd, undef, $opt); close $w; vec(my $rvec = '', fileno($r), 1) = 1; if (!select($rvec, undef, undef, 30)) { seek($errfh, 0, 0) or xbail $!; my $s = do { local $/; <$errfh> }; xbail "lei q had no output after 30s, stderr=$s"; } is(sysread($r, my $buf, 1), 1, 'read one byte'); close $r; # trigger SIGPIPE $tp->join; ok(WIFSIGNALED($?), "signaled @$out"); is(WTERMSIG($?), SIGPIPE, "got SIGPIPE @$out"); seek($errfh, 0, 0) or xbail $!; my $s = do { local $/; <$errfh> }; is($s, '', "quiet after sigpipe @$out"); } }); done_testing; public-inbox-1.9.0/t/lei-tag.t000066400000000000000000000075711430031475700161040ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_git 2.6; require_mods(qw(json DBD::SQLite Search::Xapian)); my ($ro_home, $cfg_path) = setup_public_inboxes; my $check_kw = sub { my ($exp, %opt) = @_; my $args = $opt{args} // []; my $mid = $opt{mid} // 'testmessage@example.com'; lei_ok('q', "m:$mid", @$args); my $res = json_utf8->decode($lei_out); is($res->[1], undef, 'only got one result'); my $msg = $opt{msg} ? " $opt{msg}" : ''; ($exp ? is_deeply($res->[0]->{kw}, $exp, "got @$exp$msg") : is($res->[0]->{kw}, undef, "got undef$msg")) or diag explain($res); if (exists $opt{L}) { $exp = $opt{L}; ($exp ? is_deeply($res->[0]->{L}, $exp, "got @$exp$msg") : is($res->[0]->{L}, undef, "got undef$msg")) or diag explain($res); } }; test_lei(sub { lei_ok(qw(ls-label)); is($lei_out, '', 'no labels, yet'); lei_ok(qw(import t/utf8.eml)); lei_ok(qw(tag t/utf8.eml +kw:flagged +L:urgent)); $check_kw->(['flagged'], L => ['urgent']); lei_ok(qw(ls-label)); is($lei_out, "urgent\n", 'label found'); ok(!lei(qw(tag -F eml t/utf8.eml +kw:seeen)), 'bad kw rejected'); like($lei_err, qr/`seeen' is not one of/, 'got helpful error'); ok(!lei(qw(tag -F eml t/utf8.eml +k:seen)), 'bad prefix rejected'); like($lei_err, qr/Unable to handle.*\Q+k:seen\E/, 'bad prefix noted'); ok(!lei(qw(tag -F eml t/utf8.eml)), 'no keywords'); like($lei_err, qr/no keywords or labels specified/, 'lack of kw/L noted'); my $mb = "$ENV{HOME}/mb"; my $md = "$ENV{HOME}/md"; lei_ok(qw(q m:testmessage@example.com -o), "mboxrd:$mb"); ok(-s $mb, 'wrote mbox result'); lei_ok(qw(q m:testmessage@example.com -o), $md); my @fn = glob("$md/cur/*"); scalar(@fn) == 1 or xbail $lei_err, 'no mail', \@fn; rename($fn[0], "$fn[0]S") or BAIL_OUT "rename $!"; $check_kw->(['flagged'], msg => 'after bad request'); lei_ok(qw(tag -F eml t/utf8.eml -kw:flagged)); $check_kw->(undef, msg => 'keyword cleared'); lei_ok(qw(tag -F mboxrd +kw:seen), $mb); $check_kw->(['seen'], msg => 'mbox Status ignored'); lei_ok(qw(tag -kw:seen +kw:answered), $md); $check_kw->(['answered'], msg => 'Maildir Status ignored'); open my $in, '<', 't/utf8.eml' or BAIL_OUT $!; lei_ok([qw(tag -F eml - +kw:seen +L:nope)], undef, { %$lei_opt, 0 => $in }); $check_kw->(['answered', 'seen'], msg => 'stdin works'); lei_ok(qw(q L:urgent)); my $res = json_utf8->decode($lei_out); is($res->[0]->{'m'}, 'testmessage@example.com', 'L: query works'); lei_ok(qw(q kw:seen)); my $r2 = json_utf8->decode($lei_out); is_deeply($r2, $res, 'kw: query works, too') or diag explain([$r2, $res]); lei_ok(qw(_complete lei tag)); my %c = map { $_ => 1 } split(/\s+/, $lei_out); ok($c{'+L:urgent'} && $c{'-L:urgent'} && $c{'+L:nope'} && $c{'-L:nope'}, 'completed with labels'); my $mid = 'qp@example.com'; lei_ok qw(q -f mboxrd --only), "$ro_home/t2", "mid:$mid"; $in = $lei_out; lei_ok [qw(tag -F mboxrd --stdin +kw:seen +L:qp)], undef, { %$lei_opt, 0 => \$in }; $check_kw->(['seen'], L => ['qp'], mid => $mid, args => [ '--only', "$ro_home/t2" ], msg => 'external-only message'); lei_ok(qw(ls-label)); is($lei_out, "nope\nqp\nurgent\n", 'ls-label shows qp'); lei_ok qw(tag -F eml t/utf8.eml +L:inbox +L:x); lei_ok qw(q m:testmessage@example.com); $check_kw->([qw(answered seen)], L => [qw(inbox nope urgent x)]); lei_ok(qw(ls-label)); is($lei_out, "inbox\nnope\nqp\nurgent\nx\n", 'ls-label shows qp'); lei_ok qw(q L:inbox); is(json_utf8->decode($lei_out)->[0]->{blob}, $r2->[0]->{blob}, 'label search works'); ok(!lei(qw(tag -F eml t/utf8.eml +L:ALLCAPS)), '+L:ALLCAPS fails'); lei_ok(qw(ls-label)); is($lei_out, "inbox\nnope\nqp\nurgent\nx\n", 'ls-label unchanged'); if (0) { # TODO label+kw search w/ externals lei_ok(qw(q L:qp), "mid:$mid", '--only', "$ro_home/t2"); } }); done_testing; public-inbox-1.9.0/t/lei-up.t000066400000000000000000000032411430031475700157430ustar00rootroot00000000000000#!perl -w # Copyright all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use IO::Uncompress::Gunzip qw(gunzip $GunzipError); test_lei(sub { my ($ro_home, $cfg_path) = setup_public_inboxes; my $s = eml_load('t/plack-qp.eml')->as_string; lei_ok [qw(import -q -F eml -)], undef, { 0 => \$s, %$lei_opt }; lei_ok qw(q z:0.. -f mboxcl2 -o), "$ENV{HOME}/a.mbox.gz"; lei_ok qw(q z:0.. -f mboxcl2 -o), "$ENV{HOME}/b.mbox.gz"; lei_ok qw(q z:0.. -f mboxcl2 -o), "$ENV{HOME}/a"; lei_ok qw(q z:0.. -f mboxcl2 -o), "$ENV{HOME}/b"; lei_ok qw(ls-search); $s = eml_load('t/utf8.eml')->as_string; lei_ok [qw(import -q -F eml -)], undef, { 0 => \$s, %$lei_opt }; lei_ok qw(up --all=local); open my $fh, "$ENV{HOME}/a.mbox.gz" or xbail "open: $!"; my $gz = do { local $/; <$fh> }; my $uc; gunzip(\$gz => \$uc, MultiStream => 1) or xbail "gunzip $GunzipError"; open $fh, "$ENV{HOME}/a" or xbail "open: $!"; my $exp = do { local $/; <$fh> }; is($uc, $exp, 'compressed and uncompressed match (a.gz)'); like($exp, qr/testmessage\@example.com/, '2nd message added'); open $fh, "$ENV{HOME}/b.mbox.gz" or xbail "open: $!"; $gz = do { local $/; <$fh> }; undef $uc; gunzip(\$gz => \$uc, MultiStream => 1) or xbail "gunzip $GunzipError"; is($uc, $exp, 'compressed and uncompressed match (b.gz)'); open $fh, "$ENV{HOME}/b" or xbail "open: $!"; $uc = do { local $/; <$fh> }; is($uc, $exp, 'uncompressed both match'); lei_ok [ qw(up -q), "$ENV{HOME}/b", "--mua=touch $ENV{HOME}/c" ], undef, { run_mode => 0 }; ok(-f "$ENV{HOME}/c", '--mua works with single output'); }); done_testing; public-inbox-1.9.0/t/lei-watch.t000066400000000000000000000065151430031475700164340ustar00rootroot00000000000000#!perl -w # Copyright all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use File::Path qw(make_path remove_tree); plan skip_all => "TEST_FLAKY not enabled for $0" if !$ENV{TEST_FLAKY}; require_mods('lei'); my $have_fast_inotify = eval { require Linux::Inotify2 } || eval { require IO::KQueue }; $have_fast_inotify or diag("$0 IO::KQueue or Linux::Inotify2 missing, test will be slow"); my ($ro_home, $cfg_path) = setup_public_inboxes; test_lei(sub { my $md = "$ENV{HOME}/md"; my $cfg_f = "$ENV{HOME}/.config/lei/config"; my $md2 = $md.'2'; lei_ok 'ls-watch'; is($lei_out, '', 'nothing in ls-watch, yet'); my ($ino_fdinfo, $ino_contents); SKIP: { $have_fast_inotify && $^O eq 'linux' or skip 'Linux/inotify-only internals check', 1; lei_ok 'daemon-pid'; chomp(my $pid = $lei_out); skip 'missing /proc/$PID/fd', 1 if !-d "/proc/$pid/fd"; my @ino = grep { (readlink($_) // '') =~ /\binotify\b/ } glob("/proc/$pid/fd/*"); is(scalar(@ino), 1, 'only one inotify FD'); my $ino_fd = (split('/', $ino[0]))[-1]; $ino_fdinfo = "/proc/$pid/fdinfo/$ino_fd"; open my $fh, '<', $ino_fdinfo or xbail "open $ino_fdinfo: $!"; $ino_contents = [ <$fh> ]; } if (0) { # TODO my $url = 'imaps://example.com/foo.bar.0'; lei_ok([qw(add-watch --state=pause), $url], undef, {}); lei_ok 'ls-watch'; is($lei_out, "$url\n", 'ls-watch shows added watch'); ok(!lei(qw(add-watch --state=pause), 'bogus'.$url), 'bogus URL rejected'); } # first, make sure tag-ro works make_path("$md/new", "$md/cur", "$md/tmp"); lei_ok qw(add-watch --state=tag-ro), $md; lei_ok 'ls-watch'; like($lei_out, qr/^\Qmaildir:$md\E$/sm, 'maildir shown'); lei_ok qw(q mid:testmessage@example.com -o), $md, '-I', "$ro_home/t1"; my @f = glob("$md/cur/*:2,"); is(scalar(@f), 1, 'got populated maildir with one result'); rename($f[0], "$f[0]S") or xbail "rename $!"; # set (S)een tick($have_fast_inotify ? 0.2 : 2.2); # always needed for 1 CPU systems lei_ok qw(note-event done); # flushes immediately (instead of 5s) lei_ok qw(q mid:testmessage@example.com -o), $md2, '-I', "$ro_home/t1"; my @f2 = glob("$md2/*/*"); is(scalar(@f2), 1, 'got one result'); like($f2[0], qr/S\z/, 'seen set from rename') or diag explain(\@f2); my $e2 = eml_load($f2[0]); my $e1 = eml_load("$f[0]S"); is_deeply($e2, $e1, 'results match'); SKIP: { $ino_fdinfo or skip 'Linux/inotify-only watch check', 1; open my $fh, '<', $ino_fdinfo or xbail "open $ino_fdinfo: $!"; my $cmp = [ <$fh> ]; ok(scalar(@$cmp) > scalar(@$ino_contents), 'inotify has Maildir watches'); } lei_ok 'rm-watch', $md; lei_ok 'ls-watch', \'refresh watches'; is($lei_out, '', 'no watches left'); lei_ok 'add-watch', $md2; remove_tree($md2); lei_ok 'rm-watch', "maildir:".$md2, \'with maildir: prefix'; lei_ok 'ls-watch', \'refresh watches'; is($lei_out, '', 'no watches left'); lei_ok 'add-watch', $md; remove_tree($md); lei_ok 'rm-watch', $md, \'absolute path w/ missing dir'; lei_ok 'ls-watch', \'refresh watches'; is($lei_out, '', 'no watches left'); SKIP: { $ino_fdinfo or skip 'Linux/inotify-only removal removal', 1; open my $fh, '<', $ino_fdinfo or xbail "open $ino_fdinfo: $!"; my $cmp = [ <$fh> ]; is_xdeeply($cmp, $ino_contents, 'inotify Maildir watches gone'); }; }); done_testing; public-inbox-1.9.0/t/lei.t000066400000000000000000000143141430031475700153240ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use File::Path qw(rmtree); # this only tests the basic help/config/init/completion bits of lei; # actual functionality is tested in other t/lei-*.t tests my $home; my $home_trash = []; my $cleanup = sub { rmtree([@$home_trash, @_]) }; my $test_help = sub { ok(!lei([]), 'no args fails'); is($? >> 8, 1, '$? is 1'); is($lei_out, '', 'nothing in stdout'); like($lei_err, qr/^usage:/sm, 'usage in stderr'); for my $arg (['-h'], ['--help'], ['help'], [qw(daemon-pid --help)]) { lei_ok($arg); like($lei_out, qr/^usage:/sm, "usage in stdout (@$arg)"); is($lei_err, '', "nothing in stderr (@$arg)"); } for my $arg ([''], ['--halp'], ['halp'], [qw(daemon-pid --halp)]) { ok(!lei($arg), "lei @$arg"); is($? >> 8, 1, '$? set correctly'); isnt($lei_err, '', 'something in stderr'); is($lei_out, '', 'nothing in stdout'); } lei_ok(qw(init -h)); like($lei_out, qr! \Q$home\E/\.local/share/lei/store\b!, 'actual path shown in init -h'); lei_ok(qw(init -h), { XDG_DATA_HOME => '/XDH' }, \'init with XDG_DATA_HOME'); like($lei_out, qr! /XDH/lei/store\b!, 'XDG_DATA_HOME in init -h'); is($lei_err, '', 'no errors from init -h'); lei_ok(qw(config -h)); like($lei_out, qr! \Q$home\E/\.config/lei/config\b!, 'actual path shown in config -h'); lei_ok(qw(config -h), { XDG_CONFIG_HOME => '/XDC' }, \'config with XDG_CONFIG_HOME'); like($lei_out, qr! /XDC/lei/config\b!, 'XDG_CONFIG_HOME in config -h'); is($lei_err, '', 'no errors from config -h'); }; my $ok_err_info = sub { my ($msg) = @_; is(grep(!/^#/, split(/^/, $lei_err)), 0, $msg) or diag "$msg: err=$lei_err"; }; my $test_init = sub { $cleanup->(); lei_ok('init', \'init w/o args'); $ok_err_info->('after init w/o args'); lei_ok('init', \'idempotent init w/o args'); $ok_err_info->('after idempotent init w/o args'); ok(!lei('init', "$home/x"), 'init conflict'); is(grep(/^E:/, split(/^/, $lei_err)), 1, 'got error on conflict'); ok(!-e "$home/x", 'nothing created on conflict'); $cleanup->(); lei_ok('init', "$home/x", \'init conflict resolved'); $ok_err_info->('init w/ arg'); lei_ok('init', "$home/x", \'init idempotent w/ path'); $ok_err_info->('init idempotent w/ arg'); ok(-d "$home/x", 'created dir'); $cleanup->("$home/x"); ok(!lei('init', "$home/x", "$home/2"), 'too many args fails'); like($lei_err, qr/too many/, 'noted excessive'); ok(!-e "$home/x", 'x not created on excessive'); for my $d (@$home_trash) { my $base = (split(m!/!, $d))[-1]; ok(!-d $d, "$base not created"); } is($lei_out, '', 'nothing in stdout on init failure'); }; my $test_config = sub { $cleanup->(); lei_ok(qw(config a.b c), \'config set var'); is($lei_out.$lei_err, '', 'no output on var set'); lei_ok(qw(config -l), \'config -l'); is($lei_err, '', 'no errors on listing'); is($lei_out, "a.b=c\n", 'got expected output'); ok(!lei(qw(config -f), "$home/.config/f", qw(x.y z)), 'config set var with -f fails'); like($lei_err, qr/not supported/, 'not supported noted'); ok(!-f "$home/config/f", 'no file created'); lei_ok(qw(-c imap.debug config --bool imap.debug)); is($lei_out, "true\n", "-c sets w/o value"); lei_ok(qw(-c imap.debug=1 config --bool imap.debug)); is($lei_out, "true\n", "-c coerces value"); lei_ok(qw(-c imap.debug=tr00 config imap.debug)); is($lei_out, "tr00\n", "-c string value passed as-is"); lei_ok(qw(-c imap.debug=a -c imap.debug=b config --get-all imap.debug)); is($lei_out, "a\nb\n", '-c and --get-all work together'); lei_ok([qw(config -e)], { VISUAL => 'cat', EDITOR => 'cat' }); is($lei_out, "[a]\n\tb = c\n", '--edit works'); }; my $test_completion = sub { lei_ok(qw(_complete lei), \'no errors on complete'); my %out = map { $_ => 1 } split(/\s+/s, $lei_out); ok($out{'q'}, "`lei q' offered as completion"); ok($out{'add-external'}, "`lei add-external' offered as completion"); lei_ok(qw(_complete lei q), \'complete q (no args)'); %out = map { $_ => 1 } split(/\s+/s, $lei_out); for my $sw (qw(-f --format -o --output --mfolder --augment -a --mua --no-local --local --verbose -v --save --no-save --no-remote --remote --torsocks --reverse -r )) { ok($out{$sw}, "$sw offered as `lei q' completion"); } lei_ok(qw(_complete lei q --form), \'complete q --format'); is($lei_out, "--format\n", 'complete lei q --format'); for my $sw (qw(-f --format)) { lei_ok(qw(_complete lei q), $sw); %out = map { $_ => 1 } split(/\s+/s, $lei_out); for my $f (qw(mboxrd mboxcl2 mboxcl mboxo json jsonl concatjson maildir)) { ok($out{$f}, "got $sw $f as output format"); } } lei_ok(qw(_complete lei import)); %out = map { $_ => 1 } split(/\s+/s, $lei_out); for my $sw (qw(--no-kw --kw)) { ok($out{$sw}, "$sw offered as `lei import' completion"); } }; my $test_fail = sub { lei('q', 'whatever', '-C', '/dev/null'); is($? >> 8, 1, 'chdir at end fails to /dev/null'); lei('-C', '/dev/null', 'q', 'whatever'); is($? >> 8, 1, 'chdir at beginning fails to /dev/null'); lei_ok('q', "foo\n"); like($lei_err, qr/trailing `\\n' removed/s, "noted `\\n' removal"); for my $lk (qw(ei inbox)) { my $d = "$home/newline\n$lk"; mkdir $d; open my $fh, '>', "$d/$lk.lock" or BAIL_OUT "open $d/$lk.lock"; for my $fl (qw(-I --only)) { ok(!lei('q', $fl, $d, 'whatever'), "newline $lk.lock fails with q $fl"); like($lei_err, qr/`\\n' not allowed/, "error noted with q $fl"); } } lei_ok('sucks', \'yes, but hopefully less every day'); like($lei_out, qr/loaded features/, 'loaded features shown'); SKIP: { skip 'no curl', 3 unless require_cmd('curl', 1); lei(qw(q --only http://127.0.0.1:99999/bogus/ t:m)); is($? >> 8, 3, 'got curl exit for bogus URL'); lei(qw(q --only http://127.0.0.1:99999/bogus/ t:m -o), "$home/junk"); is($? >> 8, 3, 'got curl exit for bogus URL with Maildir') or diag $lei_err; is($lei_out, '', 'no output'); }; # /SKIP }; test_lei(sub { $home = $ENV{HOME}; $home_trash = [ "$home/.local", "$home/.config", "$home/junk" ]; $test_help->(); $test_config->(); $test_init->(); $test_completion->(); $test_fail->(); }); test_lei({ mods => [] }, sub { lei_ok('sucks', \'no optional modules required'); }); done_testing; public-inbox-1.9.0/t/lei_dedupe.t000066400000000000000000000062411430031475700166520ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::Smsg; require_mods(qw(DBD::SQLite)); use_ok 'PublicInbox::LeiDedupe'; my $eml = eml_load('t/plack-qp.eml'); my $mid = $eml->header_raw('Message-ID'); my $different = eml_load('t/msg_iter-order.eml'); $different->header_set('Message-ID', $mid); my $smsg = bless { ds => time }, 'PublicInbox::Smsg'; $smsg->populate($eml); $smsg->{$_} //= '' for (qw(to cc references)) ; my $check_storable = sub { my ($x) = @_; SKIP: { require_mods('Storable', 1); my $dup = Storable::thaw(Storable::freeze($x)); is_deeply($dup, $x, "$x->[3] round-trips through storable"); } }; my $lei = { opt => { dedupe => 'none' } }; my $dd = PublicInbox::LeiDedupe->new($lei); $check_storable->($dd); $dd->prepare_dedupe; ok(!$dd->is_dup($eml), '1st is_dup w/o dedupe'); ok(!$dd->is_dup($eml), '2nd is_dup w/o dedupe'); ok(!$dd->is_dup($different), 'different is_dup w/o dedupe'); ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 1'); ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 2'); for my $strat (undef, 'content') { $lei->{opt}->{dedupe} = $strat; $dd = PublicInbox::LeiDedupe->new($lei); $check_storable->($dd); $dd->prepare_dedupe; my $desc = $strat // 'default'; ok(!$dd->is_dup($eml), "1st is_dup with $desc dedupe"); ok($dd->is_dup($eml), "2nd seen with $desc dedupe"); ok(!$dd->is_dup($different), "different is_dup with $desc dedupe"); ok(!$dd->is_smsg_dup($smsg), "is_smsg_dup pass w/ $desc dedupe"); ok($dd->is_smsg_dup($smsg), "is_smsg_dup reject w/ $desc dedupe"); } $lei->{opt}->{dedupe} = 'bogus'; eval { PublicInbox::LeiDedupe->new($lei) }; like($@, qr/unsupported.*bogus/, 'died on bogus strategy'); $lei->{opt}->{dedupe} = 'mid'; $dd = PublicInbox::LeiDedupe->new($lei); $check_storable->($dd); $dd->prepare_dedupe; ok(!$dd->is_dup($eml), '1st is_dup with mid dedupe'); ok($dd->is_dup($eml), '2nd seen with mid dedupe'); ok($dd->is_dup($different), 'different seen with mid dedupe'); ok(!$dd->is_smsg_dup($smsg), 'smsg mid dedupe pass'); ok($dd->is_smsg_dup($smsg), 'smsg mid dedupe reject'); $lei->{opt}->{dedupe} = 'oid'; $dd = PublicInbox::LeiDedupe->new($lei); $check_storable->($dd); $dd->prepare_dedupe; # --augment won't have OIDs: ok(!$dd->is_dup($eml), '1st is_dup with oid dedupe (augment)'); ok($dd->is_dup($eml), '2nd seen with oid dedupe (augment)'); ok(!$dd->is_dup($different), 'different is_dup with mid dedupe (augment)'); $different->header_set('Status', 'RO'); ok($dd->is_dup($different), 'different seen with oid dedupe Status removed'); $smsg = { blob => '01d' }; ok(!$dd->is_dup($eml, $smsg), '1st is_dup with oid dedupe'); ok($dd->is_dup($different, $smsg), 'different content ignored if oid matches'); $smsg->{blob} = uc($smsg->{blob}); ok($dd->is_dup($eml, $smsg), 'case insensitive oid comparison :P'); $smsg->{blob} = '01dbad'; ok(!$dd->is_dup($eml, $smsg), 'case insensitive oid comparison :P'); $smsg->{blob} = 'dead'; ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe pass'); ok($dd->is_smsg_dup($smsg), 'smsg dedupe reject'); done_testing; public-inbox-1.9.0/t/lei_external.t000066400000000000000000000035111430031475700172230ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # internal unit test, see t/lei-externals.t for functional tests use strict; use v5.10.1; use Test::More; my $cls = 'PublicInbox::LeiExternal'; require_ok $cls; my $canon = $cls->can('ext_canonicalize'); my $exp = 'https://example.com/my-inbox/'; is($canon->('https://example.com/my-inbox'), $exp, 'trailing slash added'); is($canon->('https://example.com/my-inbox//'), $exp, 'trailing slash removed'); is($canon->('https://example.com//my-inbox/'), $exp, 'leading slash removed'); is($canon->('https://EXAMPLE.com/my-inbox/'), $exp, 'lowercased'); is($canon->('/this/path/is/nonexistent/'), '/this/path/is/nonexistent', 'non-existent pathname canonicalized'); is($canon->('/this//path/'), '/this/path', 'extra slashes gone'); is($canon->('/ALL/CAPS'), '/ALL/CAPS', 'caps preserved'); my $glob2re = $cls->can('glob2re'); is($glob2re->('http://[::1]:1234/foo/'), undef, 'IPv6 URL not globbed'); is($glob2re->('foo'), undef, 'plain string unchanged'); is_deeply($glob2re->('[f-o]'), '[f-o]' , 'range accepted'); is_deeply($glob2re->('*'), '[^/]*?' , 'wildcard accepted'); is_deeply($glob2re->('{a,b,c}'), '(a|b|c)' , 'braces'); is_deeply($glob2re->('{,b,c}'), '(|b|c)' , 'brace with empty @ start'); is_deeply($glob2re->('{a,b,}'), '(a|b|)' , 'brace with empty @ end'); is_deeply($glob2re->('{a}'), undef, 'ungrouped brace'); is_deeply($glob2re->('{a'), undef, 'open left brace'); is_deeply($glob2re->('a}'), undef, 'open right brace'); is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob'); is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range'); is_deeply($glob2re->('\\{a,b\\}'), undef, 'escaped brace'); is_deeply($glob2re->('\\\\{a,b}'), '\\\\\\\\(a|b)', 'fake escape brace'); done_testing; public-inbox-1.9.0/t/lei_lcat.t000066400000000000000000000026201430031475700163240ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ # unit test for "lei lcat" internals, see t/lei-lcat.t for functional test use strict; use v5.10.1; use Test::More; use_ok 'PublicInbox::LeiLcat'; my $cb = \&PublicInbox::LeiLcat::extract_1; my $ck = sub { my ($txt, $exp, $t) = @_; my $lei = {}; is($cb->($lei, $txt), $exp, $txt); ($t ? is_deeply($lei, { mset_opt => { threads => 1 } }, "-t $exp") : is_deeply($lei, {}, "no -t for $exp")) or diag explain($lei); }; for my $txt (qw(https://example.com/inbox/foo@bar/ https://example.com/inbox/foo@bar https://example.com/inbox/foo@bar/raw id:foo@bar mid:foo@bar )) { $ck->($txt, 'mid:foo@bar'); } for my $txt (qw(https://example.com/inbox/foo@bar/T/ https://example.com/inbox/foo@bar/t/ https://example.com/inbox/foo@bar/t.mbox.gz )) { $ck->($txt, 'mid:foo@bar', '-t'); } $ck->('https://example.com/x/foobar/T/', 'mid:foobar', '-t'); $ck->('https://example.com/x/foobar/raw', 'mid:foobar'); is($cb->(my $lei = {}, 'asdf'), undef, 'no Message-ID'); is($cb->($lei = {}, 'm:x'), 'm:x', 'bare m: accepted'); done_testing; public-inbox-1.9.0/t/lei_mail_sync.t000066400000000000000000000054061430031475700173640ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite)); require_ok 'PublicInbox::LeiMailSync'; my ($dir, $for_destroy) = tmpdir(); my $lms = PublicInbox::LeiMailSync->new("$dir/t.sqlite3"); $lms->lms_write_prepare; my $ro = PublicInbox::LeiMailSync->new("$dir/t.sqlite3"); is_deeply([$ro->folders], [], 'no folders, yet'); my $imap = 'imaps://bob@[::1]/INBOX;UIDVALIDITY=9'; $lms->lms_write_prepare; my $deadbeef = "\xde\xad\xbe\xef"; is($lms->set_src($deadbeef, $imap, 1), 1, 'set IMAP once'); ok($lms->set_src($deadbeef, $imap, 1) == 0, 'set IMAP idempotently'); is_deeply([$ro->folders], [$imap], 'IMAP folder added'); note explain([$ro->folders($imap)]); note explain([$imap, [$ro->folders]]); is_deeply([$ro->folders($imap)], [$imap], 'IMAP folder with full GLOB'); is_deeply([$ro->folders('imaps://bob@[::1]/INBOX')], [$imap], 'IMAP folder with partial GLOB'); is_deeply($ro->locations_for($deadbeef), { $imap => [ 1 ] }, 'locations_for w/ imap'); my $maildir = 'maildir:/home/user/md'; my $fname = 'foo:2,S'; $lms->lms_write_prepare; ok($lms->set_src($deadbeef, $maildir, \$fname), 'set Maildir once'); ok($lms->set_src($deadbeef, $maildir, \$fname) == 0, 'set Maildir again'); is_deeply($ro->locations_for($deadbeef), { $imap => [ 1 ], $maildir => [ $fname ] }, 'locations_for w/ maildir + imap'); if ('mess things up pretend old bug') { $lms->lms_write_prepare; diag "messing things up"; $lms->{dbh}->do('UPDATE folders SET loc = ? WHERE loc = ?', undef, "$maildir/", $maildir); ok(delete $lms->{fmap}, 'clear folder map'); $lms->lms_write_prepare; ok($lms->set_src($deadbeef, $maildir, \$fname), 'set Maildir once'); }; is_deeply([sort($ro->folders)], [$imap, $maildir], 'both folders shown'); my @res; $ro->each_src($maildir, sub { my ($oidbin, $id) = @_; push @res, [ unpack('H*', $oidbin), $id ]; }); is_deeply(\@res, [ ['deadbeef', \$fname] ], 'each_src works on Maildir'); @res = (); $ro->each_src($imap, sub { my ($oidbin, $id) = @_; push @res, [ unpack('H*', $oidbin), $id ]; }); is_deeply(\@res, [ ['deadbeef', 1] ], 'each_src works on IMAP'); is_deeply($ro->location_stats($maildir), { 'name.count' => 1 }, 'Maildir location stats'); is_deeply($ro->location_stats($imap), { 'uid.count' => 1, 'uid.max' => 1, 'uid.min' => 1 }, 'IMAP location stats'); $lms->lms_write_prepare; is($lms->clear_src($imap, 1), 1, 'clear_src on IMAP'); is($lms->clear_src($maildir, \$fname), 1, 'clear_src on Maildir'); ok($lms->clear_src($imap, 1) == 0, 'clear_src again on IMAP'); ok($lms->clear_src($maildir, \$fname) == 0, 'clear_src again on Maildir'); is_deeply($ro->location_stats($maildir), {}, 'nothing left'); done_testing; public-inbox-1.9.0/t/lei_overview.t000066400000000000000000000016311430031475700172500ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use POSIX qw(_exit); require_mods(qw(Search::Xapian DBD::SQLite)); require_ok 'PublicInbox::LeiOverview'; my $ovv = bless {}, 'PublicInbox::LeiOverview'; $ovv->ovv_out_lk_init; my $lock_path = $ovv->{lock_path}; ok(-f $lock_path, 'lock init'); undef $ovv; ok(!-f $lock_path, 'lock DESTROY'); $ovv = bless {}, 'PublicInbox::LeiOverview'; $ovv->ovv_out_lk_init; $lock_path = $ovv->{lock_path}; ok(-f $lock_path, 'lock init #2'); my $pid = fork // BAIL_OUT "fork $!"; if ($pid == 0) { undef $ovv; _exit(0); } is(waitpid($pid, 0), $pid, 'child exited'); is($?, 0, 'no error in child process'); ok(-f $lock_path, 'lock was not destroyed by child'); undef $ovv; ok(!-f $lock_path, 'lock DESTROY #2'); done_testing; public-inbox-1.9.0/t/lei_saved_search.t000066400000000000000000000004231430031475700200270ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite)); use_ok 'PublicInbox::LeiSavedSearch'; done_testing; public-inbox-1.9.0/t/lei_store.t000066400000000000000000000131111430031475700165320ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite Search::Xapian)); require_git 2.6; require_ok 'PublicInbox::LeiStore'; require_ok 'PublicInbox::ExtSearch'; my ($home, $for_destroy) = tmpdir(); my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') }; my $store_dir = "$home/sto"; local $ENV{GIT_COMMITTER_EMAIL} = 'lei@example.com'; local $ENV{GIT_COMMITTER_NAME} = 'lei user'; my $sto = PublicInbox::LeiStore->new($store_dir, { creat => 1 }); ok($sto, '->new'); my $smsg = $sto->add_eml(eml_load('t/data/0001.patch')); like($smsg->{blob}, qr/\A[0-9a-f]+\z/, 'add returned OID'); my $eml = eml_load('t/data/0001.patch'); is($sto->add_eml($eml), undef, 'idempotent'); $sto->done; { my $es = $sto->search; ok($es->can('isrch'), ref($es). ' can ->isrch (for SolverGit)'); my $msgs = $es->over->query_xover(0, 1000); is(scalar(@$msgs), 1, 'one message'); is($msgs->[0]->{blob}, $smsg->{blob}, 'blob matches'); my $mset = $es->mset("mid:$msgs->[0]->{mid}"); is($mset->size, 1, 'search works'); is_deeply($es->mset_to_artnums($mset), [ $msgs->[0]->{num} ], 'mset_to_artnums'); my $mi = ($mset->items)[0]; my @kw = PublicInbox::Search::xap_terms('K', $mi->get_document); is_deeply(\@kw, [], 'no flags'); } for my $parallel (0, 1) { $sto->{priv_eidx}->{parallel} = $parallel; my $docids = $sto->set_eml_vmd($eml, { kw => [ qw(seen draft) ] }); is(scalar @$docids, 1, 'set keywords on one doc'); $sto->done; my @kw = $sto->search->msg_keywords($docids->[0]); is_deeply(\@kw, [qw(draft seen)], 'kw matches'); $docids = $sto->add_eml_vmd($eml, {kw => [qw(seen draft)]}); $sto->done; is(scalar @$docids, 1, 'idempotently added keywords to doc'); @kw = $sto->search->msg_keywords($docids->[0]); is_deeply(\@kw, [qw(draft seen)], 'kw matches after noop'); $docids = $sto->remove_eml_vmd($eml, {kw => [qw(seen draft)]}); is(scalar @$docids, 1, 'removed from one doc'); $sto->done; @kw = $sto->search->msg_keywords($docids->[0]); is_deeply(\@kw, [], 'kw matches after remove'); $docids = $sto->remove_eml_vmd($eml, {kw=> [qw(answered)]}); is(scalar @$docids, 1, 'removed from one doc (idempotently)'); $sto->done; @kw = $sto->search->msg_keywords($docids->[0]); is_deeply(\@kw, [], 'kw matches after remove (idempotent)'); $docids = $sto->add_eml_vmd($eml, {kw => [qw(answered)]}); is(scalar @$docids, 1, 'added to empty doc'); $sto->done; @kw = $sto->search->msg_keywords($docids->[0]); is_deeply(\@kw, ['answered'], 'kw matches after add'); $docids = $sto->set_eml_vmd($eml, { kw => [] }); is(scalar @$docids, 1, 'set to clobber'); $sto->done; @kw = $sto->search->msg_keywords($docids->[0]); is_deeply(\@kw, [], 'set clobbers all'); my $set = eml_load('t/plack-qp.eml'); $set->header_set('Message-ID', ""); my $ret = $sto->set_eml($set, { kw => [ 'seen' ] }); is(ref $ret, 'PublicInbox::Smsg', 'initial returns smsg'); my $ids = $sto->set_eml($set, { kw => [ 'seen' ] }); is_deeply($ids, [ $ret->{num} ], 'set_eml idempotent'); $ids = $sto->set_eml($set, { kw => [ qw(seen answered) ] }); is_deeply($ids, [ $ret->{num} ], 'set_eml to change kw'); $sto->done; @kw = $sto->search->msg_keywords($ids->[0]); is_deeply(\@kw, [qw(answered seen)], 'set changed kw'); } SKIP: { require_mods(qw(Storable), 1); ok($sto->can('ipc_do'), 'ipc_do works if we have Storable'); $eml->header_set('Message-ID', ''); my $pid = $sto->ipc_worker_spawn('lei-store'); ok($pid > 0, 'got a worker'); my $smsg = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] }); is(ref($smsg), 'PublicInbox::Smsg', 'set_eml works over ipc'); my $ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] }); is_deeply($ids, [ $smsg->{num} ], 'docid returned'); $eml->header_set('Message-ID'); my $no_mid = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] }); my $wait = $sto->ipc_do('done'); my @kw = $sto->search->msg_keywords($no_mid->{num}); is_deeply(\@kw, [qw(seen)], 'ipc set changed kw'); is(ref($smsg), 'PublicInbox::Smsg', 'no mid works ipc'); $ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] }); is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/ ipc'); $sto->ipc_do('done'); $sto->ipc_worker_stop; $ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen answered) ] }); is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/o ipc'); $wait = $sto->ipc_do('done'); my $lse = $sto->search; @kw = $lse->msg_keywords($no_mid->{num}); is_deeply(\@kw, [qw(answered seen)], 'set changed kw w/o ipc'); is($lse->kw_changed($eml, [qw(answered seen)]), 0, 'kw_changed false when unchanged'); is($lse->kw_changed($eml, [qw(answered seen flagged)]), 1, 'kw_changed true when +flagged'); is($lse->kw_changed(eml_load('t/plack-qp.eml'), ['seen']), undef, 'kw_changed undef on unknown message'); } SKIP: { require_mods(qw(HTTP::Date), 1); my $now = HTTP::Date::time2str(time); $now =~ s/GMT/+0000/ or xbail "no GMT in $now"; my $eml = PublicInbox::Eml->new(<<"EOM"); Received: (listserv\@example.com) by example.com via listexpand id abcde (ORCPT ); $now; Date: $now Subject: timezone-dependent test WHAT IS TIME ANYMORE? EOM my $smsg = $sto->add_eml($eml); ok($smsg && $smsg->{blob}, 'recently received message'); $sto->done; local $ENV{TZ} = 'GMT+5'; my $lse = $sto->search; my $qstr = 'rt:1.hour.ago.. s:timezone'; $lse->query_approxidate($lse->git, $qstr); my $mset = $lse->mset($qstr); is($mset->size, 1, 'rt:1.hour.ago.. works w/ local time'); } done_testing; public-inbox-1.9.0/t/lei_to_mail.t000066400000000000000000000224361430031475700170340ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Eml; use Fcntl qw(SEEK_SET O_RDONLY O_NONBLOCK); use PublicInbox::Spawn qw(popen_rd); use List::Util qw(shuffle); require_mods(qw(DBD::SQLite)); require PublicInbox::MdirReader; require PublicInbox::MboxReader; require PublicInbox::LeiOverview; require PublicInbox::LEI; use_ok 'PublicInbox::LeiToMail'; my $from = "Content-Length: 10\nSubject: x\n\nFrom hell\n"; my $noeol = "Subject: x\n\nFrom hell"; my $crlf = $noeol; $crlf =~ s/\n/\r\n/g; my $kw = [qw(seen answered flagged)]; my $smsg = { kw => $kw, blob => '0'x40 }; my @MBOX = qw(mboxcl2 mboxrd mboxcl mboxo); for my $mbox (@MBOX) { my $m = "eml2$mbox"; my $cb = PublicInbox::LeiToMail->can($m); my $s = $cb->(PublicInbox::Eml->new($from), $smsg); is(substr($$s, -1, 1), "\n", "trailing LF in normal $mbox"); my $eml = PublicInbox::Eml->new($s); is($eml->header('Status'), 'RO', "Status: set by $m"); is($eml->header('X-Status'), 'AF', "X-Status: set by $m"); if ($mbox eq 'mboxcl2') { like($eml->body_raw, qr/^From /, "From not escaped $m"); } else { like($eml->body_raw, qr/^>From /, "From escaped once by $m"); } my @cl = $eml->header('Content-Length'); if ($mbox =~ /mboxcl/) { is(scalar(@cl), 1, "$m only has one Content-Length header"); is($cl[0] + length("\n"), length($eml->body_raw), "$m Content-Length matches"); } else { is(scalar(@cl), 0, "$m clobbered Content-Length"); } $s = $cb->(PublicInbox::Eml->new($noeol), $smsg); is(substr($$s, -1, 1), "\n", "trailing LF added by $m when original lacks EOL"); $eml = PublicInbox::Eml->new($s); if ($mbox eq 'mboxcl2') { is($eml->body_raw, "From hell\n", "From not escaped by $m"); } else { is($eml->body_raw, ">From hell\n", "From escaped once by $m"); } $s = $cb->(PublicInbox::Eml->new($crlf), $smsg); is(substr($$s, -2, 2), "\r\n", "trailing CRLF added $m by original lacks EOL"); $eml = PublicInbox::Eml->new($s); if ($mbox eq 'mboxcl2') { is($eml->body_raw, "From hell\r\n", "From not escaped by $m"); } else { is($eml->body_raw, ">From hell\r\n", "From escaped once by $m"); } if ($mbox =~ /mboxcl/) { is($eml->header('Content-Length') + length("\r\n"), length($eml->body_raw), "$m Content-Length matches"); } elsif ($mbox eq 'mboxrd') { $s = $cb->($eml, $smsg); $eml = PublicInbox::Eml->new($s); is($eml->body_raw, ">>From hell\r\n\r\n", "From escaped again by $m"); } } my ($tmpdir, $for_destroy) = tmpdir(); local $ENV{TMPDIR} = $tmpdir; open my $err, '>>', "$tmpdir/lei.err" or BAIL_OUT $!; my $lei = bless { 2 => $err, cmd => 'test' }, 'PublicInbox::LEI'; my $commit = sub { $_[0] = undef; # wcb delete $lei->{1}; }; my $buf = <<'EOM'; From: x@example.com Subject: x blah EOM my $fn = "$tmpdir/x.mbox"; my ($mbox) = shuffle(@MBOX); # pick one, shouldn't matter my $wcb_get = sub { my ($fmt, $dst) = @_; delete $lei->{dedupe}; # to be recreated $lei->{ovv} = bless { fmt => $fmt, dst => $dst }, 'PublicInbox::LeiOverview'; my $l2m = PublicInbox::LeiToMail->new($lei); SKIP: { require_mods('Storable', 1); my $dup = Storable::thaw(Storable::freeze($l2m)); is_deeply($dup, $l2m, "$fmt round-trips through storable"); } $l2m->pre_augment($lei); $l2m->do_augment($lei); $l2m->post_augment($lei); $l2m->write_cb($lei); }; my $deadbeef = { blob => 'deadbeef', kw => [ qw(seen) ] }; my $orig = do { my $wcb = $wcb_get->($mbox, $fn); is(ref $wcb, 'CODE', 'write_cb returned callback'); ok(-f $fn && !-s _, 'empty file created'); $wcb->(\(my $dup = $buf), $deadbeef); $commit->($wcb); open my $fh, '<', $fn or BAIL_OUT $!; my $raw = do { local $/; <$fh> }; like($raw, qr/^blah\n/sm, 'wrote content'); unlink $fn or BAIL_OUT $!; $wcb = $wcb_get->($mbox, $fn); ok(-f $fn && !-s _, 'truncated mbox destination'); $wcb->(\($dup = $buf), $deadbeef); $commit->($wcb); open $fh, '<', $fn or BAIL_OUT $!; is(do { local $/; <$fh> }, $raw, 'wrote identical content'); $raw; }; test_lei({tmpdir => "$tmpdir/using -F"}, sub { lei_ok(qw(import -F), $mbox, $fn, \'imported mbox'); lei_ok(qw(q s:x), \'lei q works') or diag $lei_err; my $res = json_utf8->decode($lei_out); my $x = $res->[0]; is($x->{'s'}, 'x', 'subject imported') or diag $lei_out; is_deeply($x->{'kw'}, ['seen'], 'kw imported') or diag $lei_out; is($res->[1], undef, 'only one result'); }); test_lei({tmpdir => "$tmpdir/using TYPE: prefix"}, sub { lei_ok('import', "$mbox:$fn", \'imported mbox:/path') or diag $lei_err; lei_ok(qw(q s:x), \'lei q works') or diag $lei_err; my $res = json_utf8->decode($lei_out); my $x = $res->[0]; is($x->{'s'}, 'x', 'subject imported') or diag $lei_out; is_deeply($x->{'kw'}, ['seen'], 'kw imported') or diag $lei_out; is($res->[1], undef, 'only one result'); }); my $zsfx2cmd = PublicInbox::MboxReader->can('zsfx2cmd'); for my $zsfx (qw(gz bz2 xz)) { SKIP: { my $cmd = eval { $zsfx2cmd->($zsfx, 0, $lei) }; skip $@, 3 if $@; my $dc_cmd = eval { $zsfx2cmd->($zsfx, 1, $lei) }; ok($dc_cmd, "decompressor for .$zsfx"); my $f = "$fn.$zsfx"; my $wcb = $wcb_get->($mbox, $f); $wcb->(\(my $dup = $buf), { %$deadbeef }); $commit->($wcb); my $uncompressed = xqx([@$dc_cmd, $f]); is($uncompressed, $orig, "$zsfx works unlocked"); unlink $f or BAIL_OUT "unlink $!"; $wcb = $wcb_get->($mbox, $f); $wcb->(\($dup = $buf), { %$deadbeef }); $commit->($wcb); is(xqx([@$dc_cmd, $f]), $orig, "$zsfx matches with lock"); local $lei->{opt} = { augment => 1 }; $wcb = $wcb_get->($mbox, $f); $wcb->(\($dup = $buf . "\nx\n"), { %$deadbeef }); $commit->($wcb); my $cat = popen_rd([@$dc_cmd, $f]); my @raw; PublicInbox::MboxReader->$mbox($cat, sub { push @raw, shift->as_string }); like($raw[1], qr/\nblah\n\nx\n\z/s, "augmented $zsfx"); like($raw[0], qr/\nblah\n\z/s, "original preserved $zsfx"); local $lei->{opt} = { augment => 1 }; $wcb = $wcb_get->($mbox, $f); $wcb->(\($dup = $buf . "\ny\n"), { %$deadbeef }); $commit->($wcb); my @raw3; $cat = popen_rd([@$dc_cmd, $f]); PublicInbox::MboxReader->$mbox($cat, sub { push @raw3, shift->as_string }); my $y = pop @raw3; is_deeply(\@raw3, \@raw, 'previous messages preserved'); like($y, qr/\nblah\n\ny\n\z/s, "augmented $zsfx (atomic)"); } } my $as_orig = sub { my ($eml) = @_; $eml->header_set('Status'); $eml->as_string; }; unlink $fn or BAIL_OUT $!; if ('default deduplication uses content_hash') { my $wcb = $wcb_get->('mboxo', $fn); $deadbeef->{kw} = []; $wcb->(\(my $x = $buf), $deadbeef) for (1..2); $commit->($wcb); my $cmp = ''; open my $fh, '<', $fn or BAIL_OUT $!; PublicInbox::MboxReader->mboxo($fh, sub { $cmp .= $as_orig->(@_) }); is($cmp, $buf, 'only one message written'); local $lei->{opt} = { augment => 1 }; $wcb = $wcb_get->('mboxo', $fn); $wcb->(\($x = $buf . "\nx\n"), $deadbeef) for (1..2); $commit->($wcb); open $fh, '<', $fn or BAIL_OUT $!; my @x; PublicInbox::MboxReader->mboxo($fh, sub { push @x, $as_orig->(@_) }); is(scalar(@x), 2, 'augmented mboxo'); is($x[0], $cmp, 'original message preserved'); is($x[1], $buf . "\nx\n", 'new message appended'); } { # stdout support open my $tmp, '+>', undef or BAIL_OUT $!; local $lei->{1} = $tmp; my $wcb = $wcb_get->('mboxrd', '/dev/stdout'); $wcb->(\(my $x = $buf), $deadbeef); $commit->($wcb); seek($tmp, 0, SEEK_SET) or BAIL_OUT $!; my $cmp = ''; PublicInbox::MboxReader->mboxrd($tmp, sub { $cmp .= $as_orig->(@_) }); is($cmp, $buf, 'message written to stdout'); } SKIP: { # FIFO support use POSIX qw(mkfifo); my $fn = "$tmpdir/fifo"; mkfifo($fn, 0600) or skip("mkfifo not supported: $!", 1); sysopen(my $cat, $fn, O_RDONLY|O_NONBLOCK) or BAIL_OUT $!; my $wcb = $wcb_get->('mboxo', $fn); $wcb->(\(my $x = $buf), $deadbeef); $commit->($wcb); my $cmp = ''; $cat->blocking(1); PublicInbox::MboxReader->mboxo($cat, sub { $cmp .= $as_orig->(@_) }); is($cmp, $buf, 'message written to FIFO'); } { # Maildir support my $mdr = PublicInbox::MdirReader->new; my $md = "$tmpdir/maildir/"; my $wcb = $wcb_get->('maildir', $md); is(ref($wcb), 'CODE', 'got Maildir callback'); my $b4dc0ffee = { blob => 'badc0ffee', kw => [] }; $wcb->(\(my $x = $buf), $b4dc0ffee); my @f; $mdr->maildir_each_file($md, sub { push @f, shift }); open my $fh, $f[0] or BAIL_OUT $!; is(do { local $/; <$fh> }, $buf, 'wrote to Maildir'); $wcb = $wcb_get->('maildir', $md); my $deadcafe = { blob => 'deadcafe', kw => [] }; $wcb->(\($x = $buf."\nx\n"), $deadcafe); my @x = (); $mdr->maildir_each_file($md, sub { push @x, shift }); is(scalar(@x), 1, 'wrote one new file'); ok(!-f $f[0], 'old file clobbered'); open $fh, $x[0] or BAIL_OUT $!; is(do { local $/; <$fh> }, $buf."\nx\n", 'wrote new file to Maildir'); local $lei->{opt}->{augment} = 1; $wcb = $wcb_get->('maildir', $md); $wcb->(\($x = $buf."\ny\n"), $deadcafe); $wcb->(\($x = $buf."\ny\n"), $b4dc0ffee); # skipped by dedupe @f = (); $mdr->maildir_each_file($md, sub { push @f, shift }); is(scalar grep(/\A\Q$x[0]\E\z/, @f), 1, 'old file still there'); my @new = grep(!/\A\Q$x[0]\E\z/, @f); is(scalar @new, 1, '1 new file written (b4dc0ffee skipped)'); open $fh, $x[0] or BAIL_OUT $!; is(do { local $/; <$fh> }, $buf."\nx\n", 'old file untouched'); open $fh, $new[0] or BAIL_OUT $!; is(do { local $/; <$fh> }, $buf."\ny\n", 'new file written'); } done_testing; public-inbox-1.9.0/t/lei_xsearch.t000066400000000000000000000064321430031475700170430ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use List::Util qw(shuffle); use PublicInbox::TestCommon; use PublicInbox::Eml; require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::ExtSearchIdx; require_git 2.6; require_ok 'PublicInbox::LeiXSearch'; require_ok 'PublicInbox::LeiALE'; require_ok 'PublicInbox::LEI'; my ($home, $for_destroy) = tmpdir(); my @ibx; for my $V (1..2) { for my $i (3..6) { push @ibx, create_inbox("v$V-$i", indexlevel => 'full', version => $V, sub { my ($im, $ibx) = @_; for my $j (0..9) { my $eml = PublicInbox::Eml->new(<{-primary_address} Date: Fri, 02 Oct 1993 0$V:0$i:0$j +0000 Subject: v${V}i${i}j$j Message-ID: ${V}er ${i}on j$j EOM $im->add($eml) or BAIL_OUT '->add'; } }); # create_inbox } } my $first = shift @ibx; is($first->{name}, 'v1-3', 'first plucked'); my $last = pop @ibx; is($last->{name}, 'v2-6', 'last plucked'); my $eidx = PublicInbox::ExtSearchIdx->new("$home/eidx"); $eidx->attach_inbox($first); $eidx->attach_inbox($last); $eidx->eidx_sync({fsync => 0}); my $es = PublicInbox::ExtSearch->new("$home/eidx"); my $lxs = PublicInbox::LeiXSearch->new; for my $ibxish (shuffle($es, @ibx)) { $lxs->prepare_external($ibxish); } for my $loc ($lxs->locals) { $lxs->attach_external($loc); } my $nr = $lxs->xdb->get_doccount; my $mset = $lxs->mset('d:19931002..19931003', { limit => $nr }); is($mset->size, $nr, 'got all messages'); my @msgs; for my $mi ($mset->items) { if (my $smsg = $lxs->smsg_for($mi)) { push @msgs, $smsg; } else { diag "E: ${\$mi->get_docid} missing"; } } is(scalar(@msgs), $nr, 'smsgs retrieved for all'); $mset = $lxs->recent(undef, { limit => 1 }); is($mset->size, 1, 'one result'); my @ibxish = $lxs->locals; is(scalar(@ibxish), scalar(@ibx) + 1, 'got locals back'); is($lxs->search, $lxs, '->search works'); is($lxs->over, undef, '->over fails'); { $lxs = PublicInbox::LeiXSearch->new; my $v2ibx = create_inbox 'v2full', version => 2, sub { $_[0]->add(eml_load('t/plack-qp.eml')); }; my $v1ibx = create_inbox 'v1medium', indexlevel => 'medium', tmpdir => "$home/v1tmp", sub { $_[0]->add(eml_load('t/utf8.eml')); }; $lxs->prepare_external($v1ibx); $lxs->prepare_external($v2ibx); for my $loc ($lxs->locals) { $lxs->attach_external($loc); } my $mset = $lxs->mset('m:testmessage@example.com'); is($mset->size, 1, 'got m: match on medium+full XSearch mix'); my $mitem = ($mset->items)[0]; my $smsg = $lxs->smsg_for($mitem) or BAIL_OUT 'smsg_for broken'; my $ale = PublicInbox::LeiALE::_new("$home/ale"); my $lei = bless {}, 'PublicInbox::LEI'; $ale->refresh_externals($lxs, $lei); my $exp = [ $smsg->{blob}, 'blob', -s 't/utf8.eml' ]; is_deeply([ $ale->git->check($smsg->{blob}) ], $exp, 'ale->git->check'); $lxs = PublicInbox::LeiXSearch->new; $lxs->prepare_external($v2ibx); $ale->refresh_externals($lxs, $lei); is_deeply([ $ale->git->check($smsg->{blob}) ], $exp, 'ale->git->check remembered inactive external'); rename("$home/v1tmp", "$home/v1moved") or BAIL_OUT "rename: $!"; $ale->refresh_externals($lxs, $lei); is($ale->git->check($smsg->{blob}), undef, 'missing after directory gone'); } done_testing; public-inbox-1.9.0/t/linkify.t000066400000000000000000000075271430031475700162300ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Linkify; { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/url-with-trailing-period'; my $s = $u . '.'; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq($u.), 'trailing period not in URL'); } { my $l = PublicInbox::Linkify->new; my $u = 'http://i-forgot-trailing-slash.example.com'; my $s = $u; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq($u), 'missing trailing slash OK'); } # handle URLs in parenthesized statements { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/'; my $s = "(see: $u)"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq{(see: $u)}, 'trailing ) not in URL'); } { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/url-with-trailing-semicolon'; my $s = $u . ';'; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq($u;), 'trailing semicolon not in URL'); } { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/url-with-(parens)'; my $s = "hello $u world"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq(hello $u world), 'URL preserved'); $s = "$u. hi"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq($u. hi), 'paired () in URL OK'); $u .= "?query=a"; $s = "hello $u world"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq(hello $u world), 'query preserved'); $u .= "#fragment"; $s = "hello $u world"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq(hello $u world), 'query + fragment preserved'); $u = "http://example.com/"; $s = "hello $u world"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq(hello $u world), "root URL preserved"); $u = "http://example.com/#fragment"; $s = "hello $u world"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq(hello $u world), "root + fragment"); } # Markdown compatibility { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/'; my $s = "[markdown]($u)"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq![markdown]($u)!, 'Markdown-compatible'); $s = qq![markdown]($u "title")!; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq![markdown]($u "title")!, 'Markdown title compatible'); $s = qq![markdown]($u).!; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq![markdown]($u).!, 'Markdown-compatible end of sentence'); } # Perl and Ruby code compatibility { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/'; foreach my $q ("'%s'", '"%s"', 'q!%s!', 'q(%s)') { # Perl my $s = sprintf("my \$var = $q;", $u); $s = $l->linkify_1($s); $s = $l->linkify_2($s); like($s, qr/>\Q$u\Elinkify_1($s); $s = $l->linkify_2($s); like($s, qr/>\Q$u\Enew; my $s = '(see http://example.com/).'; $s = $l->linkify_1($s); $s = $l->linkify_2($s); like($s, qr!\(see ]+>http://example\.com/\)\.!s, 'punctuation with unpaired ) OK') } if ('IDN example: ') { my $hc = '月'; my $u = "http://www.\x{6708}.example.com/"; my $s = $u; my $l = PublicInbox::Linkify->new; $s = $l->linkify_1($s); $s = $l->linkify_2($s); my $expect = qq{http://www.$hc.example.com/}; is($s, $expect, 'IDN message escaped properly'); } done_testing(); public-inbox-1.9.0/t/main-bin/000077500000000000000000000000001430031475700160555ustar00rootroot00000000000000public-inbox-1.9.0/t/main-bin/spamc000077500000000000000000000000501430031475700171010ustar00rootroot00000000000000#!/bin/sh # trivial spamc mock exec cat public-inbox-1.9.0/t/mbox_lock.t000066400000000000000000000056541430031475700165370ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use POSIX qw(_exit); use PublicInbox::DS qw(now); use Errno qw(EAGAIN); use PublicInbox::OnDestroy; use_ok 'PublicInbox::MboxLock'; my ($tmpdir, $for_destroy) = tmpdir(); my $f = "$tmpdir/f"; my $mbl = PublicInbox::MboxLock->acq($f, 1, ['dotlock']); ok(-f "$f.lock", 'dotlock created'); undef $mbl; ok(!-f "$f.lock", 'dotlock gone'); $mbl = PublicInbox::MboxLock->acq($f, 1, ['none']); ok(!-f "$f.lock", 'no dotlock with none'); undef $mbl; { opendir my $cur, '.' or BAIL_OUT $!; my $od = PublicInbox::OnDestroy->new(sub { chdir $cur }); chdir $tmpdir or BAIL_OUT; my $abs = "$tmpdir/rel.lock"; my $rel = PublicInbox::MboxLock->acq('rel', 1, ['dotlock']); chdir '/' or BAIL_OUT; ok(-f $abs, 'lock with abs path created'); undef $rel; ok(!-f $abs, 'lock gone despite being in the wrong dir'); } eval { PublicInbox::MboxLock->acq($f, 1, ['bogus']); fail "should not succeed with `bogus'"; }; ok($@, "fails on `bogus' lock method"); eval { PublicInbox::MboxLock->acq($f, 1, ['timeout=1']); fail "should not succeed with only timeout"; }; ok($@, "fails with only `timeout=' and no lock method"); my $defaults = PublicInbox::MboxLock->defaults; is(ref($defaults), 'ARRAY', 'default lock methods'); my $test_rw_lock = sub { my ($func) = @_; my $m = ["$func,timeout=0.000001"]; for my $i (1..2) { pipe(my ($r, $w)) or BAIL_OUT "pipe: $!"; my $t0 = now; my $pid = fork // BAIL_OUT "fork $!"; if ($pid == 0) { eval { PublicInbox::MboxLock->acq($f, 1, $m) }; my $err = $@; syswrite $w, "E: $err"; _exit($err ? 0 : 1); } undef $w; waitpid($pid, 0); is($?, 0, "$func r/w lock behaved as expected #$i"); my $d = now - $t0; ok($d < 1, "$func r/w timeout #$i") or diag "elapsed=$d"; my $err = do { local $/; <$r> }; $! = EAGAIN; my $msg = "$!"; like($err, qr/\Q$msg\E/, "got EAGAIN in child #$i"); } }; my $test_ro_lock = sub { my ($func) = @_; for my $i (1..2) { my $t0 = now; my $pid = fork // BAIL_OUT "fork $!"; if ($pid == 0) { eval { PublicInbox::MboxLock->acq($f, 0, [ $func ]) }; _exit($@ ? 1 : 0); } waitpid($pid, 0); is($?, 0, "$func ro lock behaved as expected #$i"); my $d = now - $t0; ok($d < 1, "$func timeout respected #$i") or diag "elapsed=$d"; } }; SKIP: { grep(/fcntl/, @$defaults) or skip 'File::FcntlLock not available', 1; my $top = PublicInbox::MboxLock->acq($f, 1, $defaults); ok($top, 'fcntl lock acquired'); $test_rw_lock->('fcntl'); undef $top; $top = PublicInbox::MboxLock->acq($f, 0, $defaults); ok($top, 'fcntl read lock acquired'); $test_ro_lock->('fcntl'); } $mbl = PublicInbox::MboxLock->acq($f, 1, ['flock']); ok($mbl, 'flock acquired'); $test_rw_lock->('flock'); undef $mbl; $mbl = PublicInbox::MboxLock->acq($f, 0, ['flock']); $test_ro_lock->('flock'); done_testing; public-inbox-1.9.0/t/mbox_reader.t000066400000000000000000000106011430031475700170350ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use List::Util qw(shuffle); use PublicInbox::Eml; use Fcntl qw(SEEK_SET); require_ok 'PublicInbox::MboxReader'; require_ok 'PublicInbox::LeiToMail'; my %raw = ( hdr_only => "From: header-only\@example.com\n\n", small_from => "From: small-from\@example.com\n\nFrom hell\n", small => "From: small\@example.com\n\nfrom hell\n", big_hdr_only => "From: big-header\@example.com\n" . (('A: '.('a' x 72)."\n") x 1000)."\n", big_body => "From: big-body\@example.com\n\n". (('b: '.('b' x 72)."\n") x 1000) . "From hell\n", big_all => "From: big-all\@example.com\n". (("A: ".('a' x 72)."\n") x 1000). "\n" . (("b: ".('b' x 72)."\n") x 1000) . "From hell\n", ); { my $eml = PublicInbox::Eml->new($raw{small}); my $mbox_keywords = PublicInbox::MboxReader->can('mbox_keywords'); is_deeply($mbox_keywords->($eml), [], 'no keywords'); $eml->header_set('Status', 'RO'); is_deeply($mbox_keywords->($eml), ['seen'], 'seen extracted'); $eml->header_set('X-Status', 'A'); is_deeply($mbox_keywords->($eml), [qw(answered seen)], 'seen+answered extracted'); } if ($ENV{TEST_EXTRA}) { for my $fn (glob('t/*.eml'), glob('t/*/*.{patch,eml}')) { $raw{$fn} = eml_load($fn)->as_string; } } my $reader = PublicInbox::MboxReader->new; my $check_fmt = sub { my $fmt = shift; my @order = shuffle(keys %raw); my $eml2mbox = PublicInbox::LeiToMail->can("eml2$fmt"); open my $fh, '+>', undef or BAIL_OUT "open: $!"; for my $k (@order) { my $eml = PublicInbox::Eml->new($raw{$k}); my $buf = $eml2mbox->($eml); print $fh $$buf or BAIL_OUT "print $!"; } seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!"; $reader->$fmt($fh, sub { my ($eml) = @_; $eml->header_set('Status'); $eml->header_set('Lines'); my $cur = shift @order; my @cl = $eml->header_raw('Content-Length'); if ($fmt =~ /\Amboxcl/) { is(scalar(@cl), 1, "Content-Length set $fmt $cur"); my $raw = $eml->body_raw; my $adj = 0; if ($fmt eq 'mboxcl') { my @from = ($raw =~ /^(From )/smg); $adj = scalar(@from); } is(length($raw), $cl[0] - $adj, "Content-Length is correct $fmt $cur"); # clobber for ->as_string comparison below $eml->header_set('Content-Length'); # special case for t/solve/bare.patch, not sure if we # should even handle it... if ($cl[0] eq '0' && ${$eml->{hdr}} eq '') { delete $eml->{bdy}; } } else { is(scalar(@cl), 0, "Content-Length unset $fmt $cur"); } my $orig = PublicInbox::Eml->new($raw{$cur}); is($eml->as_string, $orig->as_string, "read back original $fmt $cur"); }); }; my @mbox = qw(mboxrd mboxo mboxcl mboxcl2); for my $fmt (@mbox) { $check_fmt->($fmt) } s/\n/\r\n/sg for (values %raw); for my $fmt (@mbox) { $check_fmt->($fmt) } { my $no_blank_eom = <<'EOM'; From x@y Fri Oct 2 00:00:00 1993 a: b body1 From x@y Fri Oct 2 00:00:00 1993 c: d body2 EOM # chop($no_blank_eom) eq "\n" or BAIL_OUT 'broken LF'; for my $variant (qw(mboxrd mboxo)) { my @x; open my $fh, '<', \$no_blank_eom or BAIL_OUT 'PerlIO::scalar'; $reader->$variant($fh, sub { push @x, shift }); is_deeply($x[0]->{bdy}, \"body1\n", 'LF preserved in 1st'); is_deeply($x[1]->{bdy}, \"body2\n", 'no LF added in 2nd'); } } SKIP: { use PublicInbox::Spawn qw(popen_rd); my $fh = popen_rd([ $^X, '-E', <<'' ]); say "From x@y Fri Oct 2 00:00:00 1993"; print "a: b\n\n", "x" x 70000, "\n\n"; say "From x@y Fri Oct 2 00:00:00 2010"; print "Final: bit\n\n", "Incomplete\n\n"; exit 1 my @x; eval { $reader->mboxrd($fh, sub { push @x, shift->as_string }) }; like($@, qr/error closing mbox/, 'detects error reading from pipe'); is(scalar(@x), 1, 'only saw one message'); is(scalar(grep(/Final/, @x)), 0, 'no incomplete bit'); } { my $html = <hi,how are you EOM for my $m (qw(mboxrd mboxcl mboxcl2 mboxo)) { my (@w, @x); local $SIG{__WARN__} = sub { push @w, @_ }; open my $fh, '<', \$html or xbail 'PerlIO::scalar'; PublicInbox::MboxReader->$m($fh, sub { push @x, $_[0]->as_string }); if ($m =~ /\Amboxcl/) { is_deeply(\@x, [], "messages in invalid $m"); } else { is_deeply(\@x, [ "\n$html" ], "body-only $m"); } is_deeply([grep(!/^W: leftover/, @w)], [], "no extra warnings besides leftover ($m)"); } } done_testing; public-inbox-1.9.0/t/mda-mime.eml000066400000000000000000000006401430031475700165500ustar00rootroot00000000000000From: a@example.com Subject: blah Cc: test-public@example.com Message-ID: Content-Type: multipart/alternative; boundary="b" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit --b Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: base64 PGh0bWw+PGJvZHk+aGk8L2JvZHk+PC9odG1sPg== --b Content-Type: text/plain Content-Transfer-Encoding: quoted-printable hi =3D "bye"= --b-- public-inbox-1.9.0/t/mda.t000066400000000000000000000221111430031475700153060ustar00rootroot00000000000000# Copyright (C) 2014-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Cwd qw(getcwd); use PublicInbox::MID qw(mid2path); use PublicInbox::Git; use PublicInbox::InboxWritable; use PublicInbox::TestCommon; use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/pi-home"; my $pi_home = "$home/.public-inbox"; my $pi_config = "$pi_home/config"; my $maindir = "$tmpdir/main.git"; my $main_bin = getcwd()."/t/main-bin"; my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock my $fail_bin = getcwd()."/t/fail-bin"; my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc spam mock my $addr = 'test-public@example.com'; my $cfgpfx = "publicinbox.test"; my $faildir = "$home/faildir/"; my $git = PublicInbox::Git->new($maindir); my $fail_bad_header = sub ($$$) { my ($good_rev, $msg, $in) = @_; my @f = glob("$faildir/*/*"); unlink @f if @f; my ($out, $err) = ("", ""); my $opt = { 0 => \$in, 1 => \$out, 2 => \$err }; local $ENV{PATH} = $main_path; ok(run_script(['-mda'], undef, $opt), "no error on undeliverable ($msg)"); my $rev = $git->qx(qw(rev-list HEAD)); chomp $rev; is($rev, $good_rev, "bad revision not committed ($msg)"); @f = glob("$faildir/*/*"); is(scalar @f, 1, "faildir written to"); [ $in, $out, $err ]; }; { ok(-x "$main_bin/spamc", "spamc ham mock found (run in top of source tree"); ok(-x "$fail_bin/spamc", "spamc mock found (run in top of source tree"); is(1, mkdir($home, 0755), "setup ~/ for testing"); is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox"); PublicInbox::Import::init_bare($maindir); open my $fh, '>>', $pi_config or die; print $fh <header('From'); my ($author) = PublicInbox::Address::names($from); my ($email) = PublicInbox::Address::emails($from); my $date = $msg->header('Date'); is('Eléanor', encode('us-ascii', my $tmp = $author, Encode::HTMLCREF), 'HTML conversion is correct'); is($email, 'e@example.com', 'email parsed correctly'); is($date, 'Thu, 01 Jan 1970 00:00:00 +0000', 'message date parsed correctly'); $author; }; die $@ if $@; { my $good_rev; local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = $addr; my $in = < To: You Cc: $addr Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF # ensure successful message delivery { local $ENV{PATH} = $main_path; ok(run_script(['-mda'], undef, { 0 => \$in })); my $rev = $git->qx(qw(rev-list HEAD)); like($rev, qr/\A[a-f0-9]{40}/, "good revision committed"); chomp $rev; my $cmt = $git->cat_file($rev); like($$cmt, qr/^author Me 0 \+0000\n/m, "author info set correctly"); like($$cmt, qr/^committer test /m, "committer info set correctly"); $good_rev = $rev; } # ensure failures work, fail with bad spamc { my @prev = <$faildir/new/*>; is(scalar @prev, 0 , "nothing in PI_EMERGENCY before"); local $ENV{PATH} = $fail_path; ok(run_script(['-mda'], undef, { 0 => \$in })); my @revs = $git->qx(qw(rev-list HEAD)); is(scalar @revs, 1, "bad revision not committed"); my @new = <$faildir/new/*>; is(scalar @new, 1, "PI_EMERGENCY is written to"); } $fail_bad_header->($good_rev, "bad recipient", <<""); From: Me To: You Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 my $fail = $fail_bad_header->($good_rev, "duplicate Message-ID", <<""); From: Me To: You Cc: $addr Message-ID: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 like($fail->[2], qr/CONFLICT/, "duplicate Message-ID message"); $fail_bad_header->($good_rev, "missing From:", <<""); To: $addr Message-ID: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 $fail_bad_header->($good_rev, "short subject:", <<""); To: $addr From: cat\@example.com Message-ID: Subject: a Date: Thu, 01 Jan 1970 00:00:00 +0000 $fail_bad_header->($good_rev, "no date", <<""); To: $addr From: u\@example.com Message-ID: Subject: hihi $fail_bad_header->($good_rev, "bad date", <<""); To: $addr From: u\@example.com Message-ID: Subject: hihi Date: deadbeef } # spam training { local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = $addr; local $ENV{PATH} = $main_path; my $mid = 'spam-train@example.com'; my $in = < To: You Cc: $addr Message-ID: <$mid> Subject: this message will be trained as spam Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF { # deliver the spam message, first ok(run_script(['-mda'], undef, { 0 => \$in })); my $path = mid2path($mid); my $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/\Q$mid\E/, "message delivered"); # now train it local $ENV{GIT_AUTHOR_EMAIL} = 'trainer@example.com'; local $ENV{GIT_COMMITTER_EMAIL} = 'trainer@example.com'; local $ENV{GIT_COMMITTER_NAME}; delete $ENV{GIT_COMMITTER_NAME}; ok(run_script(['-learn', 'spam'], undef, { 0 => $msg }), "no failure from learning spam"); ok(run_script(['-learn', 'spam'], undef, { 0 => $msg }), "no failure from learning spam idempotently"); } } # train ham message { local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = $addr; local $ENV{PATH} = $main_path; my $mid = 'ham-train@example.com'; my $in = < To: You Cc: $addr Message-ID: <$mid> Subject: this message will be trained as spam Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF # now train it # these should be overridden local $ENV{GIT_AUTHOR_EMAIL} = 'trainer@example.com'; local $ENV{GIT_COMMITTER_EMAIL} = 'trainer@example.com'; ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), "learned ham without failure"); my $path = mid2path($mid); my $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/\Q$mid\E/, "ham message delivered"); ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), "learned ham idempotently "); # ensure trained email is filtered, too my $mime = eml_load 't/mda-mime.eml'; ($mid) = ($mime->header_raw('message-id') =~ /<([^>]+)>/); { $in = $mime->as_string; ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), "learned ham without failure"); my $path = mid2path($mid); $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/<\Q$mid\E>/, "ham message delivered"); unlike($$msg, qr//i, ' filtered'); } } # List-ID based delivery { local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = undef; delete $ENV{ORIGINAL_RECIPIENT}; local $ENV{PATH} = $main_path; my $list_id = 'foo.example.com'; my $mid = 'list-id-delivery@example.com'; my $in = < To: You Cc: $addr Message-ID: <$mid> List-Id: <$list_id> Subject: this message will be trained as spam Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF xsys(qw(git config --file), $pi_config, "$cfgpfx.listid", uc $list_id); $? == 0 or die "failed to set listid $?"; ok(run_script(['-mda'], undef, { 0 => \$in }), 'mda OK with List-Id match'); my $path = mid2path($mid); my $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/\Q$list_id\E/, 'delivered message w/ List-ID matches'); # try a message w/o precheck $in = < List-Id: <$list_id> this message would not be accepted without --no-precheck EOF my ($out, $err) = ('', ''); my $rdr = { 0 => \$in, 1 => \$out, 2 => \$err }; ok(run_script(['-mda', '--no-precheck'], undef, $rdr), 'mda OK with List-Id match and --no-precheck'); my $cur = $git->qx(qw(diff HEAD~1..HEAD)); like($cur, qr/this message would not be accepted without --no-precheck/, '--no-precheck delivered message anyways'); # try a message with multiple List-ID headers $in = < List-ID: <$list_id> Message-ID: <2lids\@example> Subject: two List-IDs From: user To: $addr Date: Fri, 02 Oct 1993 00:00:00 +0000 EOF ($out, $err) = ('', ''); ok(run_script(['-mda'], undef, $rdr), 'mda OK with multiple List-Id matches'); $cur = $git->qx(qw(diff HEAD~1..HEAD)); like($cur, qr/^\+Message-ID: <2lids\@example>/sm, 'multi List-ID match delivered'); like($err, qr/multiple List-ID/, 'warned about multiple List-ID'); # ensure -learn rm works after inbox address is updated ($out, $err) = ('', ''); xsys(qw(git config --file), $pi_config, "$cfgpfx.address", 'updated-address@example.com'); ok(run_script(['-learn', 'rm'], undef, $rdr), 'rm-ed via -learn'); $cur = $git->qx(qw(diff HEAD~1..HEAD)); like($cur, qr/^-Message-ID: <2lids\@example>/sm, 'changed in git'); } done_testing(); public-inbox-1.9.0/t/mda_filter_rubylang.t000066400000000000000000000036161430031475700205670ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::V2Writable'; my ($tmpdir, $for_destroy) = tmpdir(); my $pi_config = "$tmpdir/pi_config"; local $ENV{PI_CONFIG} = $pi_config; local $ENV{PI_EMERGENCY} = "$tmpdir/emergency"; my @cfg = ('git', 'config', "--file=$pi_config"); is(xsys(@cfg, 'publicinboxmda.spamcheck', 'none'), 0); for my $v (qw(V1 V2)) { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my $cfgpfx = "publicinbox.$v"; my $inboxdir = "$tmpdir/$v"; my $addr = "test-$v\@example.com"; my $cmd = [ '-init', "-$v", $v, $inboxdir, "http://example.com/$v", $addr ]; ok(run_script($cmd), 'public-inbox-init'); ok(run_script([qw(-index -j0), $inboxdir]), 'public-inbox-index'); is(xsys(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0); is(xsys(@cfg, "$cfgpfx.altid", 'serial:alerts:file=msgmap.sqlite3'), 0); for my $i (1..2) { my $env = { ORIGINAL_RECIPIENT => $addr }; my $opt = { 0 => \(< Date: Sat, 05 Jan 2019 04:19:17 +0000 something EOF ok(run_script(['-mda'], $env, $opt), 'message delivered'); } my $cfg = PublicInbox::Config->new; my $ibx = $cfg->lookup_name($v); # make sure all serials are searchable: for my $i (1..2) { my $mset = $ibx->search->mset("alerts:$i"); is($mset->size, 1, "got one result for alerts:$i"); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i"); } is_deeply([], \@warn, 'no warnings'); # TODO: public-inbox-learn doesn't know about filters # (but -watch does) } done_testing(); public-inbox-1.9.0/t/mdir_reader.t000066400000000000000000000024351430031475700170310ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use PublicInbox::TestCommon; require_ok 'PublicInbox::MdirReader'; *maildir_basename_flags = \&PublicInbox::MdirReader::maildir_basename_flags; *maildir_path_flags = \&PublicInbox::MdirReader::maildir_path_flags; is(maildir_basename_flags('foo'), '', 'new valid name accepted'); is(maildir_basename_flags('foo:2,'), '', 'cur valid name accepted'); is(maildir_basename_flags('foo:2,bar'), 'bar', 'flags name accepted'); is(maildir_basename_flags('.foo:2,bar'), undef, 'no hidden files'); is(maildir_basename_flags('fo:o:2,bar'), undef, 'no extra colon'); is(maildir_path_flags('/path/to/foo:2,S'), 'S', 'flag returned for path'); is(maildir_path_flags('/path/to/.foo:2,S'), undef, 'no hidden paths'); is(maildir_path_flags('/path/to/foo:2,'), '', 'no flags in path'); # not sure if there's a better place for eml_from_path use_ok 'PublicInbox::InboxWritable', qw(eml_from_path); is(eml_from_path('.'), undef, 'eml_from_path fails on directory'); is_deeply([PublicInbox::MdirReader::flags2kw('S')], [{ 'seen' => 1 }, []], "`seen' kw set from flag"); is_deeply([PublicInbox::MdirReader::flags2kw('Su')], [{ 'seen' => 1 }, ['u']], 'unknown flag ignored'); done_testing; public-inbox-1.9.0/t/mid.t000066400000000000000000000045121430031475700153230ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Eml; use PublicInbox::MID qw(mid_escape mids references mids_for_index id_compress); is(mid_escape('foo!@(bar)'), 'foo!@(bar)'); is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)'); is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)'); # n.b: this is probably invalid since we dropped CGI for PSGI: like(id_compress('foo%bar@wtf'), qr/\A[a-f0-9]{40}\z/, "percent always converted to sha1 to workaround buggy httpds"); is(id_compress('foobar-wtf'), 'foobar-wtf', 'regular ID not compressed'); { my $mime = PublicInbox::Eml->new("Message-ID: \n\n"); $mime->header_set('X-Alt-Message-ID', ''); is_deeply(['mid-1@a'], mids($mime->header_obj), 'mids in common case'); $mime->header_set('Message-Id', '', ''); is_deeply(['mid-1@a', 'mid-2@b'], mids($mime->header_obj), '2 mids'); $mime->header_set('Message-Id', '', ''); is_deeply(['mid-1@a'], mids($mime->header_obj), 'dup mids'); $mime->header_set('Message-Id', ' comment'); is_deeply(['mid-1@a'], mids($mime->header_obj), 'comment ignored'); $mime->header_set('Message-Id', 'bare-mid'); is_deeply(['bare-mid'], mids($mime->header_obj), 'bare mid OK'); $mime->header_set('References', ' '); $mime->header_set('In-Reply-To', ''); is_deeply(['hello', 'world', 'weld'], references($mime->header_obj), 'references combines with In-Reply-To'); $mime->header_set('References', "\n\t"); $mime->header_set('In-Reply-To'); is_deeply(references($mime->header_obj), ['hello', 'world'], 'multiline References OK'); $mime->header_set('References', ""); is_deeply(references($mime->header_obj), ['helloworld'], 'drop \t in References <656C30A1EFC89F6B2082D9B6@localhost>'); $mime->header_set('Message-ID', ""); is_deeply(mids($mime->header_obj), ['helloworld'], 'drop \t in Message-ID'); $mime->header_set('To', 'u@example.com'); $mime->header_set('References', ' '); is_deeply(references($mime->header_obj), [qw(hello world)]); is_deeply([qw(helloworld alt-id-for-nntp)], mids_for_index($mime->header_obj), 'X-Alt-Message-ID can be indexed'); } done_testing(); 1; public-inbox-1.9.0/t/mime.t000066400000000000000000000075021430031475700155030ustar00rootroot00000000000000#!perl -w # Copyright (C) 2017-2021 all contributors # This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # Artistic or GPL-1+ use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::MsgIter; my @classes = qw(PublicInbox::Eml); SKIP: { require_mods('Email::MIME', 1); push @classes, 'PublicInbox::MIME'; }; use_ok $_ for @classes; local $SIG{__WARN__} = sub {}; # needed for old Email::Simple (used by E::M) for my $cls (@classes) { my $msg = $cls->new(<<'EOF'); From: Richard Hansen To: git@vger.kernel.org Cc: Richard Hansen Subject: [PATCH 0/2] minor diff orderfile documentation improvements Date: Mon, 9 Jan 2017 19:40:29 -0500 Message-Id: <20170110004031.57985-1-hansenr@google.com> X-Mailer: git-send-email 2.11.0.390.gc69c2f50cf-goog Content-Type: multipart/signed; protocol="application/pkcs7-signature"; micalg=sha-256; boundary="94eb2c0bc864b76ba30545b2bca9" --94eb2c0bc864b76ba30545b2bca9 Richard Hansen (2): diff: document behavior of relative diff.orderFile diff: document the pattern format for diff.orderFile Documentation/diff-config.txt | 5 ++++- Documentation/diff-options.txt | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) --94eb2c0bc864b76ba30545b2bca9 Content-Type: application/pkcs7-signature; name="smime.p7s" Content-Transfer-Encoding: base64 Content-Disposition: attachment; filename="smime.p7s" Content-Description: (truncated) S/MIME Cryptographic Signature dkTlB69771K2eXK4LcHSH/2LqX+VYa3K44vrx1ruzjXdNWzIpKBy0weFNiwnJCGofvCysM2RCSI1 --94eb2c0bc864b76ba30545b2bca9-- EOF my @parts = $msg->subparts; my $exp = <body, $exp, 'body matches expected'); my $raw = <<'EOF'; Date: Wed, 18 Jan 2017 13:28:32 -0500 From: Santiago Torres To: Junio C Hamano Cc: git@vger.kernel.org, peff@peff.net, sunshine@sunshineco.com, walters@verbum.org, Lukas Puehringer Subject: Re: [PATCH v6 4/6] builtin/tag: add --format argument for tag -v Message-ID: <20170118182831.pkhqu2np3bh2puei@LykOS.localdomain> References: <20170117233723.23897-1-santiago@nyu.edu> <20170117233723.23897-5-santiago@nyu.edu> MIME-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha256; protocol="application/pgp-signature"; boundary="r24xguofrazenjwe" Content-Disposition: inline In-Reply-To: --r24xguofrazenjwe Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable your tree directly?=20 --r24xguofrazenjwe Content-Type: application/pgp-signature; name="signature.asc" -----BEGIN PGP SIGNATURE----- =7wIb -----END PGP SIGNATURE----- --r24xguofrazenjwe-- EOF $msg = $cls->new($raw); my $nr = 0; msg_iter($msg, sub { my ($part, $level, @ex) = @{$_[0]}; is($level, 1, 'at expected level'); if (join('fail if $#ex > 0', @ex) eq '1') { is($part->body_str, "your tree directly? \r\n", 'body OK'); } elsif (join('fail if $#ex > 0', @ex) eq '2') { is($part->body, "-----BEGIN PGP SIGNATURE-----\n\n" . "=7wIb\n" . "-----END PGP SIGNATURE-----\n", 'sig "matches"'); } else { fail "unexpected part\n"; } $nr++; }); is($nr, 2, 'got 2 parts'); is($msg->as_string, $raw, 'stringified sufficiently close to original'); } done_testing(); public-inbox-1.9.0/t/miscsearch.t000066400000000000000000000030611430031475700166710ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; require_mods(qw(Search::Xapian DBD::SQLite)); use_ok 'PublicInbox::MiscSearch'; use_ok 'PublicInbox::MiscIdx'; my ($tmp, $for_destroy) = tmpdir(); my $eidx = { xpfx => "$tmp/eidx", -no_fsync => 1 }; # mock ExtSearchIdx my $v1 = create_inbox 'hope', address => [ 'nope@example.com' ], indexlevel => 'basic', -no_gc => 1, sub { my ($im, $ibx) = @_; open my $fh, '>', "$ibx->{inboxdir}/description" or BAIL_OUT "open: $!"; print $fh "Everything sucks this year\n" or BAIL_OUT "print $!"; close $fh or BAIL_OUT "close $!"; }; my $midx = PublicInbox::MiscIdx->new($eidx); $midx->index_ibx($v1); $midx->commit_txn; undef $v1; my $ms = PublicInbox::MiscSearch->new("$tmp/eidx/misc"); my $mset = $ms->mset('"everything sucks today"'); is(scalar($mset->items), 0, 'no match on description phrase'); $mset = $ms->mset('"everything sucks this year"'); is(scalar($mset->items), 1, 'match phrase on description'); $mset = $ms->mset('everything sucks'); is(scalar($mset->items), 1, 'match words in description'); $mset = $ms->mset('nope@example.com'); is(scalar($mset->items), 1, 'match full address'); $mset = $ms->mset('nope'); is(scalar($mset->items), 1, 'match partial address'); $mset = $ms->mset('hope'); is(scalar($mset->items), 1, 'match name'); my $mi = ($mset->items)[0]; my $doc = $mi->get_document; is($doc->get_data, '{}', 'stored empty data'); done_testing; public-inbox-1.9.0/t/msg_iter-nested.eml000066400000000000000000000005451430031475700201570ustar00rootroot00000000000000From: root@localhost MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="outer" --outer From: sub@localhost MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="inner" --inner MIME-Version: 1.0 a --inner MIME-Version: 1.0 b --inner-- --outer MIME-Version: 1.0 sig --outer-- public-inbox-1.9.0/t/msg_iter-order.eml000066400000000000000000000002561430031475700200070ustar00rootroot00000000000000From: root@localhost MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b MIME-Version: 1.0 a --b MIME-Version: 1.0 b --b-- public-inbox-1.9.0/t/msg_iter.t000066400000000000000000000077411430031475700163720ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Hval qw(ascii_html); use MIME::QuotedPrint 3.05 qw(encode_qp); use_ok('PublicInbox::MsgIter'); { my $mime = eml_load 't/msg_iter-order.eml'; my @parts; msg_iter($mime, sub { my ($part, $level, @ex) = @{$_[0]}; my $s = $part->body_str; $s =~ s/\s+//s; push @parts, [ $s, $level, @ex ]; }); is_deeply(\@parts, [ [ qw(a 1 1) ], [ qw(b 1 2) ] ], 'order is fine'); } { my $mime = eml_load 't/msg_iter-nested.eml'; my @parts; msg_iter($mime, sub { my ($part, $level, @ex) = @{$_[0]}; my $s = $part->body_str; $s =~ s/\s+//s; push @parts, [ $s, $level, @ex ]; }); is_deeply(\@parts, [ [qw(a 2 1.1)], [qw(b 2 1.2)], [qw(sig 1 2)] ], 'nested part shows up properly'); } { my $mime = eml_load 't/iso-2202-jp.eml'; my $raw = ''; msg_iter($mime, sub { my ($part, $level, @ex) = @{$_[0]}; my ($s, $err) = msg_part_text($part, 'text/plain'); ok(!$err, 'no error'); $raw .= $s; }); ok(length($raw) > 0, 'got non-empty message'); is(index($raw, '$$$'), -1, 'no unescaped $$$'); } { my $mime = eml_load 't/x-unknown-alpine.eml'; my $raw = ''; msg_iter($mime, sub { my ($part, $level, @ex) = @{$_[0]}; my ($s, $err) = msg_part_text($part, 'text/plain'); $raw .= $s; }); like($raw, qr!^\thttps://!ms, 'tab expanded with X-UNKNOWN'); like(ascii_html($raw), qr/• bullet point/s, 'got bullet point when X-UNKNOWN assumes UTF-8'); } { # API not finalized my @warn; local $SIG{__WARN__} = sub { push @warn, [ @_ ] }; my $attr = "So and so wrote:\n"; my $q = "> hello world\n" x 10; my $nq = "hello world\n" x 10; my @sections = PublicInbox::MsgIter::split_quotes($attr . $q . $nq); is($sections[0], $attr, 'attribution matches'); is($sections[1], $q, 'quoted section matches'); is($sections[2], $nq, 'non-quoted section matches'); is(scalar(@sections), 3, 'only three sections for short message'); is_deeply(\@warn, [], 'no warnings'); $q x= 3300; $nq x= 3300; @sections = PublicInbox::MsgIter::split_quotes($attr . $q . $nq); is_deeply(\@warn, [], 'no warnings on giant message'); is(join('', @sections), $attr . $q . $nq, 'result matches expected'); is(shift(@sections), $attr, 'attribution is first section'); my @check = ('', ''); while (defined(my $l = shift @sections)) { next if $l eq ''; like($l, qr/\n\z/s, 'section ends with newline'); my $idx = ($l =~ /\A>/) ? 0 : 1; $check[$idx] .= $l; } is($check[0], $q, 'long quoted section matches'); is($check[1], $nq, 'long quoted section matches'); } { open my $fh, '<', 't/utf8.eml' or BAIL_OUT $!; my $expect = do { local $/; <$fh> }; my $qp_patch = encode_qp($expect, "\r\n"); my $common = <new(<each_part(sub { my ($part, $level, @ex) = @{$_[0]}; my ($s, $err) = msg_part_text($part, $part->content_type); push @parts, $s; }); $expect =~ s/\n/\r\n/sg; utf8::decode($expect); # aka "bytes2str" is_deeply(\@parts, [ "blah\r\n", $expect ], 'fallback to application/octet-stream as UTF-8 text'); my $qp_binary = encode_qp("Binary\0crap", "\r\n"); $eml = PublicInbox::Eml->new(<each_part(sub { my ($part, $level, @ex) = @{$_[0]}; my ($s, $err) = msg_part_text($part, $part->content_type); push @parts, $s; push @err, $err; }); is_deeply(\@parts, [ "blah\r\n", undef ], 'non-text ignored in octet-stream'); ok($err[1], 'got error for second element'); } done_testing(); public-inbox-1.9.0/t/msgmap.t000066400000000000000000000044211430031475700160350ustar00rootroot00000000000000# Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; require_mods('DBD::SQLite'); use_ok 'PublicInbox::Msgmap'; my ($tmpdir, $for_destroy) = tmpdir(); my $f = "$tmpdir/msgmap.sqlite3"; my $d = PublicInbox::Msgmap->new_file($f, 1); my %mid2num; my %num2mid; my @mids = qw(a@b c@d e@f g@h aa@bb aa@cc); is_deeply([$d->minmax], [0,0], "zero min max on new DB"); foreach my $mid (@mids) { my $n = $d->mid_insert($mid); ok($n, "mid $mid inserted"); $mid2num{$mid} = $n; $num2mid{$n} = $mid; } $@ = undef; my $ret = $d->mid_insert('a@b'); is($ret, undef, 'duplicate mid_insert in undef result'); is($d->num_for('a@b'), $mid2num{'a@b'}, 'existing number not clobbered'); my $next = (sort(keys %num2mid))[-1]; is($d->mid_insert('ok@unique'), $next + 1, 'got expected num after failing mid_insert'); foreach my $n (keys %num2mid) { is($d->mid_for($n), $num2mid{$n}, "num:$n maps correctly"); } foreach my $mid (@mids) { is($d->num_for($mid), $mid2num{$mid}, "mid:$mid maps correctly"); } is(undef, $d->last_commit, "last commit not set"); my $lc = 'deadbeef' x 5; is(undef, $d->last_commit($lc), 'previous last commit (undef) returned'); is($lc, $d->last_commit, 'last commit was set correctly'); my $nc = 'deaddead' x 5; is($lc, $d->last_commit($nc), 'returned previously set commit'); is($nc, $d->last_commit, 'new commit was set correctly'); is($d->mid_delete('a@b'), 1, 'deleted a@b'); is($d->mid_delete('a@b') + 0, 0, 'delete again returns zero'); is(undef, $d->num_for('a@b'), 'num_for fails on deleted msg'); $d = undef; ok($d = PublicInbox::Msgmap->new_file($f, 1), 'idempotent DB creation'); my ($min, $max) = $d->minmax; ok($min > 0, "article min OK"); ok($max > 0 && $max < 10, "article max OK"); ok($min < $max, "article counts OK"); my $orig = $d->mid_insert('spam@1'); $d->mid_delete('spam@1'); is($d->mid_insert('spam@2'), 1 + $orig, "last number not recycled"); my $tmp = $d->tmp_clone($tmpdir); is_deeply([$d->minmax], [$tmp->minmax], 'Cloned temporary DB matches'); ok($tmp->mid_delete('spam@2'), 'temporary DB is writable'); is(eval { $tmp->atfork_prepare; $tmp->atfork_parent; 'ok' }, 'ok', 'atfork_* work on tmp_clone'); done_testing(); public-inbox-1.9.0/t/msgtime.t000066400000000000000000000103461430031475700162210ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::MsgTime; use PublicInbox::TestCommon; our $received_date = 'Mon, 22 Jan 2007 13:16:24 -0500'; sub datestamp ($) { my ($date) = @_; local $SIG{__WARN__} = sub {}; # Suppress warnings my $mime = PublicInbox::Eml->new(<<"EOF"); From: a\@example.com To: b\@example.com Subject: this is a subject Message-ID: Date: $date Received: (majordomo\@vger.kernel.org) by vger.kernel.org via listexpand id S932173AbXAVSQY (ORCPT ); $received_date hello world EOF my @ts = PublicInbox::MsgTime::msg_datestamp($mime->header_obj); return \@ts; } sub timestamp ($) { my ($received) = @_; local $SIG{__WARN__} = sub {}; # Suppress warnings my $mime = PublicInbox::Eml->new(<<"EOF"); From: a\@example.com To: b\@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 Received: (majordomo\@vger.kernel.org) by vger.kernel.org via listexpand id S932173AbXAVSQY (ORCPT ); $received hello world EOF my @ts = PublicInbox::MsgTime::msg_timestamp($mime->header_obj); return \@ts; } # Verify that the parser sucks up the timezone for dates for (my $min = -1440; $min <= 1440; $min += 30) { my $sign = ($min < 0) ? '-': '+'; my $h = abs(int($min / 60)); my $m = $min % 60; my $ts_expect = 749520000 - ($min * 60); my $tz_expect = sprintf('%s%02d%02d', $sign, $h, $m); if ($tz_expect >= 1400 || $tz_expect <= -1400) { $tz_expect = '+0000'; } my $date = sprintf("Fri, 02 Oct 1993 00:00:00 %s%02d%02d", $sign, $h, $m); my $result = datestamp($date); is_deeply($result, [ $ts_expect, $tz_expect ], $date); } # Verify that the parser sucks up the timezone and for received timestamps for (my $min = -1440; $min <= 1440; $min += 30) { my $sign = ($min < 0) ? '-' : '+'; my $h = abs(int($min / 60)); my $m = $min %60; my $ts_expect = 1169471784 - ($min * 60); my $tz_expect = sprintf('%s%02d%02d', $sign, $h, $m); if ($tz_expect >= 1400 || $tz_expect <= -1400) { $tz_expect = '+0000'; } my $received = sprintf('Mon, 22 Jan 2007 13:16:24 %s%02d%02d', $sign, $h, $m); is_deeply(timestamp($received), [ $ts_expect, $tz_expect ], $received); } sub is_datestamp ($$) { my ($date, $expect) = @_; is_deeply(datestamp($date), $expect, $date); } is_datestamp('Wed, 13 Dec 2006 10:26:38 +1', [1166001998, '+0100']); is_datestamp('Fri, 3 Feb 2006 18:11:22 -00', [1138990282, '+0000']); is_datestamp('Thursday, 20 Feb 2003 01:14:34 +000', [1045703674, '+0000']); is_datestamp('Fri, 28 Jun 2002 12:54:40 -700', [1025294080, '-0700']); is_datestamp('Sat, 12 Jan 2002 12:52:57 -200', [1010847177, '-0200']); is_datestamp('Mon, 05 Nov 2001 10:36:16 -800', [1004985376, '-0800']); is_datestamp('Tue, 3 Jun 2003 8:58:23 --500', [1054648703, '-0500']); is_datestamp('Thu, 18 May 100 10:40:43 +0200 (MET DST)', [958639243, '+0200']); is_datestamp('Thu, 18 May 2000 10:40:43 +0200', [958639243, '+0200']); is_datestamp('Tue, 27 Feb 2007 16:23:25 -0060', [1172597005, '-0100']); is_datestamp('Wed, 20 Dec 2006 05:32:58 -0420', [1166608378, '-0420']); is_datestamp('Wed, 20 Dec 2006 05:32:58 +0420', [1166577178, '+0420']); is_datestamp('Thu, 14 Dec 2006 00:20:24 +0480', [1166036424, '+0520']); is_datestamp('Thu, 14 Dec 2006 00:20:24 -0480', [1166074824, '-0520']); is_datestamp('Mon, 14 Apr 2014 07:59:01 -0007', [1397462761, '-0007']); SKIP: { require_mods('Date::Parse', 1); my $now = time; if (join("\0", gmtime($now)) ne join("\0", localtime($now))) { skip('needs TZ=UTC to test zone-less parsing', 1); } is_datestamp('Sat, 27 Sep 1997 10:02:32', [875354552, '+0000']); } # obsolete formats described in RFC2822 for (qw(UT GMT Z)) { is_datestamp('Fri, 02 Oct 1993 00:00:00 '.$_, [ 749520000, '+0000']); } is_datestamp('Fri, 02 Oct 1993 00:00:00 EDT', [ 749534400, '-0400']); # fallback to Received: header if Date: is out-of-range: is_datestamp('Fri, 1 Jan 1904 10:12:31 +0100', PublicInbox::MsgTime::str2date_zone($received_date)); is_datestamp('Fri, 9 Mar 71685 18:45:56 +0000', # Y10K is not my problem :P PublicInbox::MsgTime::str2date_zone($received_date)); done_testing(); public-inbox-1.9.0/t/multi-mid.t000066400000000000000000000042401430031475700164510ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite)); require PublicInbox::SearchIdx; my $delay = $ENV{TEST_DELAY_CONVERT} // ''; my $addr = 'test@example.com'; my $bad = PublicInbox::Eml->new(< Message-ID: From: a\@example.com To: $addr Subject: bad EOF my $good = PublicInbox::Eml->new(< From: b\@example.com To: $addr Subject: good EOF my $nr = 0; for my $order ([$bad, $good], [$good, $bad]) { my ($tmpdir, $for_destroy) = tmpdir(); my $ibx = create_inbox "test$delay.$nr", indexlevel => 'basic', sub { my ($im) = @_; for my $eml (@$order) { $im->add($eml) or BAIL_OUT; sleep($delay) if $delay; } }; ++$nr; my $before = [ $ibx->mm->minmax ]; my @old = ($ibx->over->get_art(1), $ibx->over->get_art(2)); $ibx->cleanup; my $rdr = { 1 => \(my $out = ''), 2 => \(my $err = '') }; my $cmd = [ '-convert', $ibx->{inboxdir}, "$tmpdir/v2" ]; my $env = { PI_DIR => "$tmpdir/.public-inbox" }; ok(run_script($cmd, $env, $rdr), 'convert to v2'); $err =~ s!\AW: \Q$ibx->{inboxdir}\E not configured[^\n]+\n!!s; is($err, '', 'no errors or warnings from -convert'); $ibx->{version} = 2; $ibx->{inboxdir} = "$tmpdir/v2"; is_deeply([$ibx->mm->minmax], $before, 'min, max article numbers unchanged'); my @v2 = ($ibx->over->get_art(1), $ibx->over->get_art(2)); is_deeply(\@v2, \@old, 'v2 conversion times match'); xsys(qw(git clone -sq --mirror), "$tmpdir/v2/git/0.git", "$tmpdir/v2-clone/git/0.git") == 0 or die "clone: $?"; $cmd = [ '-init', '-Lbasic', '-V2', 'v2c', "$tmpdir/v2-clone", 'http://example.com/v2c', 'v2c@example.com' ]; ok(run_script($cmd, $env), 'init clone'); $cmd = [ qw(-index -j0), "$tmpdir/v2-clone" ]; sleep($delay) if $delay; ok(run_script($cmd, $env), 'index the clone'); $ibx->cleanup; $ibx->{inboxdir} = "$tmpdir/v2-clone"; my @v2c = ($ibx->over->get_art(1), $ibx->over->get_art(2)); is_deeply(\@v2c, \@old, 'v2 clone times match'); } done_testing(); public-inbox-1.9.0/t/net_reader-imap.t000066400000000000000000000032451430031475700176100ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_git 2.6; require_mods(qw(-imapd Search::Xapian Mail::IMAPClient)); use PublicInbox::Config; my ($tmpdir, $for_destroy) = tmpdir; my ($ro_home, $cfg_path) = setup_public_inboxes; my $cmd = [ '-imapd', '-W0', "--stdout=$tmpdir/1", "--stderr=$tmpdir/2" ]; my $sock = tcp_server; my $env = { PI_CONFIG => $cfg_path }; my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT "-imapd: $?"; my ($host, $port) = tcp_host_port $sock; require_ok 'PublicInbox::NetReader'; my $nrd = PublicInbox::NetReader->new; $nrd->add_url(my $url = "imap://$host:$port/t.v2.0"); is($nrd->errors, undef, 'no errors'); $nrd->{pi_cfg} = PublicInbox::Config->new($cfg_path); $nrd->imap_common_init; $nrd->{quiet} = 1; my (%eml, %urls, %args, $nr, @w); local $SIG{__WARN__} = sub { push(@w, @_) }; $nrd->imap_each($url, sub { my ($u, $uid, $kw, $eml, $arg) = @_; ++$urls{$u}; ++$args{$arg}; like($uid, qr/\A[0-9]+\z/, 'got digit UID '.$uid); ++$eml{ref($eml)}; ++$nr; }, 'blah'); is(scalar(@w), 0, 'no warnings'); ok($nr, 'got some emails'); is($eml{'PublicInbox::Eml'}, $nr, 'got expected Eml objects'); is(scalar keys %eml, 1, 'only got Eml objects'); is(scalar(grep(/\A\Q$url\E;UIDVALIDITY=\d+\z/, keys %urls)), scalar(keys %urls), 'UIDVALIDITY added to URL passed to callback'); is_deeply([values %urls], [$nr], 'one URL expected number of times'); is(scalar keys %urls, 1, 'only got one URL'); is($args{blah}, $nr, 'got arg expected number of times'); is(scalar keys %args, 1, 'only got one arg'); done_testing; public-inbox-1.9.0/t/netd.t000066400000000000000000000050121430031475700155000ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use v5.12; use Socket qw(IPPROTO_TCP SOL_SOCKET); use PublicInbox::TestCommon; # IO::Poll and Net::NNTP are part of the standard library, but # distros may split them off... require_mods(qw(-imapd IO::Socket::SSL Mail::IMAPClient IO::Poll Net::NNTP)); my $imap_client = 'Mail::IMAPClient'; $imap_client->can('starttls') or plan skip_all => 'Mail::IMAPClient does not support TLS'; Net::NNTP->can('starttls') or plan skip_all => 'Net::NNTP does not support TLS'; my $cert = 'certs/server-cert.pem'; my $key = 'certs/server-key.pem'; unless (-r $key && -r $cert) { plan skip_all => "certs/ missing for $0, run $^X ./create-certs.perl in certs/"; } use_ok 'PublicInbox::TLS'; use_ok 'IO::Socket::SSL'; require_git('2.6'); my ($tmpdir, $for_destroy) = tmpdir(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $pi_config; my $group = 'test-netd'; my $addr = $group . '@example.com'; # ensure we have free, low-numbered contiguous FDs from 3.. FD inheritance my @pad_pipes; for (1..3) { pipe(my ($r, $w)) or xbail "pipe: $!"; push @pad_pipes, $r, $w; }; my %srv = map { $_ => tcp_server() } qw(imap nntp imaps nntps); my $ibx = create_inbox 'netd', version => 2, -primary_address => $addr, indexlevel => 'basic', sub { my ($im, $ibx) = @_; $im->add(eml_load('t/data/0001.patch')) or BAIL_OUT '->add'; $pi_config = "$ibx->{inboxdir}/pi_config"; open my $fh, '>', $pi_config or BAIL_OUT "open: $!"; print $fh <{inboxdir} address = $addr indexlevel = basic newsgroup = $group EOF close $fh or BAIL_OUT "close: $!\n"; }; $pi_config //= "$ibx->{inboxdir}/pi_config"; my @args = ("--cert=$cert", "--key=$key"); my $rdr = {}; my $fd = 3; while (my ($k, $v) = each %srv) { push @args, "-l$k://".tcp_host_port($v); $rdr->{$fd++} = $v; } my $cmd = [ '-netd', '-W0', @args, "--stdout=$out", "--stderr=$err" ]; my $env = { PI_CONFIG => $pi_config }; my $td = start_script($cmd, $env, $rdr); @pad_pipes = (); undef $rdr; my %o = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); { my $c = tcp_connect($srv{imap}); my $msg = <$c>; like($msg, qr/IMAP4rev1/, 'connected to IMAP'); } { my $c = tcp_connect($srv{nntp}); my $msg = <$c>; like($msg, qr/^201 .*? ready - post via email/, 'connected to NNTP'); } # TODO: more tests done_testing; public-inbox-1.9.0/t/nntp.t000066400000000000000000000074551430031475700155420ustar00rootroot00000000000000# Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Eml; require_mods(qw(DBD::SQLite Data::Dumper)); use_ok 'PublicInbox::NNTP'; use_ok 'PublicInbox::Inbox'; use PublicInbox::Config; { sub quote_str { my (undef, $s) = split(/ = /, Data::Dumper::Dumper($_[0]), 2); $s =~ s/;\n//; $s; } sub wm_prepare { my ($wm) = @_; my $orig = qq{'$wm'}; PublicInbox::NNTP::wildmat2re($_[0]); my $new = quote_str($_[0]); ($orig, $new); } sub wildmat_like { my ($str, $wm) = @_; my ($orig, $new) = wm_prepare($wm); like($str, $wm, "$orig matches '$str' using $new"); } sub wildmat_unlike { my ($str, $wm, $check_ex) = @_; if ($check_ex) { use re 'eval'; my $re = qr/$wm/; like($str, $re, "normal re with $wm matches, but ..."); } my ($orig, $new) = wm_prepare($wm); unlike($str, $wm, "$orig does not match '$str' using $new"); } wildmat_like('[foo]', '[\[foo\]]'); wildmat_like('any', '*'); wildmat_unlike('bar.foo.bar', 'foo.*'); # no code execution wildmat_unlike('HI', '(?{"HI"})', 1); wildmat_unlike('HI', '[(?{"HI"})]', 1); } { sub ngpat_like { my ($str, $pat) = @_; my $orig = $pat; PublicInbox::NNTP::ngpat2re($pat); like($str, $pat, "'$orig' matches '$str' using $pat"); } ngpat_like('any', '*'); ngpat_like('a.s.r', 'a.t,a.s.r'); ngpat_like('a.s.r', 'a.t,a.s.*'); } { use POSIX qw(strftime); sub time_roundtrip { my ($date, $time, $gmt) = @_; my $m = join(' ', @_); my $ts = PublicInbox::NNTP::parse_time(@_); my @t = $gmt ? gmtime($ts) : localtime($ts); my ($d, $t) = split(' ', strftime('%Y%m%d %H%M%S', @t)); if (length($date) != 8) { # Net::NNTP uses YYMMDD :< $d =~ s/^[0-9]{2}//; } is_deeply([$d, $t], [$date, $time], "roundtripped: $m"); $ts; } my $x1 = time_roundtrip(qw(20141109 060606 GMT)); my $x2 = time_roundtrip(qw(141109 060606 GMT)); my $x3 = time_roundtrip(qw(930724 060606 GMT)); my $x5 = time_roundtrip(qw(710101 000000)); my $x6 = time_roundtrip(qw(720101 000000)); SKIP: { skip('YYMMDD test needs updating', 6) if (time > 0x7fffffff); # our world probably ends in 2038, but if not we'll try to # remember to update the test then is($x1, $x2, 'YYYYMMDD and YYMMDD parse identically'); is(strftime('%Y', gmtime($x3)), '1993', '930724 was in 1993'); my $epoch = time_roundtrip(qw(700101 000000 GMT)); is($epoch, 0, 'epoch parsed correctly'); ok($x6 > $x5, '1972 > 1971'); ok($x5 > $epoch, '1971 > Unix epoch'); } } { # test setting NNTP headers in HEAD and ARTICLE requests my $u = 'https://example.com/a/'; my $ibx = PublicInbox::Inbox->new({ name => 'test', inboxdir => 'test.git', address => 'a@example.com', -primary_address => 'a@example.com', newsgroup => 'test', domain => 'example.com', url => [ '//example.com/a' ]}); is($ibx->base_url, $u, 'URL expanded'); my $mid = 'a@b'; my $mime = PublicInbox::Eml->new("Message-ID: <$mid>\r\n\r\n"); my $hdr = $mime->header_obj; my $mock_self = { nntpd => { servername => 'example.com', pi_cfg => bless {}, 'PublicInbox::Config', }, ibx => $ibx, }; my $smsg = { num => 1, mid => $mid, nntp => $mock_self, -ibx => $ibx }; PublicInbox::NNTP::set_nntp_headers($hdr, $smsg); is_deeply([ $mime->header('Message-ID') ], [ "<$mid>" ], 'Message-ID unchanged'); is_deeply([ $mime->header('Newsgroups') ], [ 'test' ], 'Newsgroups: set'); is_deeply([ $mime->header('Xref') ], [ 'example.com test:1' ], 'Xref: set'); $smsg->{num} = 2; PublicInbox::NNTP::set_nntp_headers($hdr, $smsg); is_deeply([ $mime->header('Message-ID') ], [ "<$mid>" ], 'Message-ID unchanged'); is_deeply([ $mime->header('Xref') ], [ 'example.com test:2' ], 'Old Xref: clobbered'); } done_testing(); public-inbox-1.9.0/t/nntpd-tls.t000066400000000000000000000156021430031475700164770ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use Socket qw(SOCK_STREAM IPPROTO_TCP SOL_SOCKET); # IO::Poll and Net::NNTP are part of the standard library, but # distros may split them off... require_mods(qw(DBD::SQLite IO::Socket::SSL Net::NNTP IO::Poll)); Net::NNTP->can('starttls') or plan skip_all => 'Net::NNTP does not support TLS'; my $cert = 'certs/server-cert.pem'; my $key = 'certs/server-key.pem'; unless (-r $key && -r $cert) { plan skip_all => "certs/ missing for $0, run $^X ./create-certs.perl in certs/"; } use_ok 'PublicInbox::TLS'; use_ok 'IO::Socket::SSL'; our $need_zlib; eval { require Compress::Raw::Zlib } or $need_zlib = 'Compress::Raw::Zlib missing'; my $version = 2; # v2 needs newer git require_git('2.6') if $version >= 2; my ($tmpdir, $for_destroy) = tmpdir(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $group = 'test-nntpd-tls'; my $addr = $group . '@example.com'; my $starttls = tcp_server(); my $nntps = tcp_server(); my $pi_config; my $ibx = create_inbox "v$version", version => $version, indexlevel => 'basic', sub { my ($im, $ibx) = @_; $pi_config = "$ibx->{inboxdir}/pi_config"; open my $fh, '>', $pi_config or BAIL_OUT "open: $!"; print $fh <{inboxdir} address = $addr indexlevel = basic newsgroup = $group EOF close $fh or BAIL_OUT "close: $!"; $im->add(eml_load 't/data/0001.patch') or BAIL_OUT; }; $pi_config //= "$ibx->{inboxdir}/pi_config"; undef $ibx; my $nntps_addr = tcp_host_port($nntps); my $starttls_addr = tcp_host_port($starttls); my $env = { PI_CONFIG => $pi_config }; my $td; for my $args ( [ "--cert=$cert", "--key=$key", "-lnntps://$nntps_addr", "-lnntp://$starttls_addr" ], ) { for ($out, $err) { open my $fh, '>', $_ or die "truncate: $!"; } my $cmd = [ '-nntpd', '-W0', @$args, "--stdout=$out", "--stderr=$err" ]; $td = start_script($cmd, $env, { 3 => $starttls, 4 => $nntps }); my %o = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); my $expect = { $group => [qw(1 1 n)] }; # start negotiating a slow TLS connection my $slow = tcp_connect($nntps, Blocking => 0); $slow = IO::Socket::SSL->start_SSL($slow, SSL_startHandshake => 0, %o); my $slow_done = $slow->connect_SSL; my @poll; if ($slow_done) { diag('W: connect_SSL early OK, slow client test invalid'); use PublicInbox::Syscall qw(EPOLLIN EPOLLOUT); @poll = (fileno($slow), EPOLLIN | EPOLLOUT); } else { @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } # we should call connect_SSL much later... # NNTPS my $c = Net::NNTP->new($nntps_addr, %o, SSL => 1); my $list = $c->list; is_deeply($list, $expect, 'NNTPS LIST works'); unlike(get_capa($c), qr/\bSTARTTLS\r\n/, 'STARTTLS not advertised for NNTPS'); is($c->command('QUIT')->response(), Net::Cmd::CMD_OK(), 'QUIT works'); is(0, sysread($c, my $buf, 1), 'got EOF after QUIT'); # STARTTLS $c = Net::NNTP->new($starttls_addr, %o); $list = $c->list; is_deeply($list, $expect, 'plain LIST works'); ok($c->starttls, 'STARTTLS succeeds'); is($c->code, 382, 'got 382 for STARTTLS'); $list = $c->list; is_deeply($list, $expect, 'LIST works after STARTTLS'); unlike(get_capa($c), qr/\bSTARTTLS\r\n/, 'STARTTLS not advertised after STARTTLS'); # Net::NNTP won't let us do dumb things, but we need to test # dumb things, so use Net::Cmd directly: my $n = $c->command('STARTTLS')->response(); is($n, Net::Cmd::CMD_ERROR(), 'error attempting STARTTLS again'); is($c->code, 502, '502 according to RFC 4642 sec#2.2.1'); # STARTTLS with bad hostname $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.invalid'; $c = Net::NNTP->new($starttls_addr, %o); like(get_capa($c), qr/\bSTARTTLS\r\n/, 'STARTTLS advertised'); $list = $c->list; is_deeply($list, $expect, 'plain LIST works again'); ok(!$c->starttls, 'STARTTLS fails with bad hostname'); $c = Net::NNTP->new($starttls_addr, %o); $list = $c->list; is_deeply($list, $expect, 'not broken after bad negotiation'); # NNTPS with bad hostname $c = Net::NNTP->new($nntps_addr, %o, SSL => 1); is($c, undef, 'NNTPS fails with bad hostname'); $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.local'; $c = Net::NNTP->new($nntps_addr, %o, SSL => 1); ok($c, 'NNTPS succeeds again with valid hostname'); # slow TLS connection did not block the other fast clients while # connecting, finish it off: until ($slow_done) { IO::Poll::_poll(-1, @poll); $slow_done = $slow->connect_SSL and last; @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } $slow->blocking(1); ok(sysread($slow, my $greet, 4096) > 0, 'slow got greeting'); like($greet, qr/\A201 /, 'got expected greeting'); is(syswrite($slow, "QUIT\r\n"), 6, 'slow wrote QUIT'); ok(sysread($slow, my $end, 4096) > 0, 'got EOF'); is(sysread($slow, my $eof, 4096), 0, 'got EOF'); $slow = undef; test_lei(sub { lei_ok qw(ls-mail-source), "nntp://$starttls_addr", \'STARTTLS not used by default'; ok(!lei(qw(ls-mail-source -c nntp.starttls=true), "nntp://$starttls_addr"), 'STARTTLS verify fails'); like $lei_err, qr/STARTTLS requested/, 'STARTTLS noted in stderr'; }); SKIP: { skip 'TCP_DEFER_ACCEPT is Linux-only', 2 if $^O ne 'linux'; my $var = eval { Socket::TCP_DEFER_ACCEPT() } // 9; defined(my $x = getsockopt($nntps, IPPROTO_TCP, $var)) or die; ok(unpack('i', $x) > 0, 'TCP_DEFER_ACCEPT set on NNTPS'); defined($x = getsockopt($starttls, IPPROTO_TCP, $var)) or die; is(unpack('i', $x), 0, 'TCP_DEFER_ACCEPT is 0 on plain NNTP'); }; SKIP: { skip 'SO_ACCEPTFILTER is FreeBSD-only', 2 if $^O ne 'freebsd'; if (system('kldstat -m accf_data >/dev/null')) { skip 'accf_data not loaded? kldload accf_data', 2; } require PublicInbox::Daemon; my $x = getsockopt($nntps, SOL_SOCKET, $PublicInbox::Daemon::SO_ACCEPTFILTER); like($x, qr/\Adataready\0+\z/, 'got dataready accf for NNTPS'); $x = getsockopt($starttls, IPPROTO_TCP, $PublicInbox::Daemon::SO_ACCEPTFILTER); is($x, undef, 'no BSD accept filter for plain NNTP'); }; $c = undef; $td->kill; $td->join; is($?, 0, 'no error in exited process'); my $eout = eval { open my $fh, '<', $err or die "open $err failed: $!"; local $/; <$fh>; }; unlike($eout, qr/wide/i, 'no Wide character warnings'); } done_testing(); sub get_capa { my ($sock) = @_; syswrite($sock, "CAPABILITIES\r\n"); my $capa = ''; do { my $r = sysread($sock, $capa, 8192, length($capa)); die "unexpected: $!" unless defined($r); die 'unexpected EOF' if $r == 0; } until $capa =~ /\.\r\n\z/; my $deflate_capa = qr/\r\nCOMPRESS DEFLATE\r\n/; if ($need_zlib) { unlike($capa, $deflate_capa, 'COMPRESS DEFLATE NOT advertised '.$need_zlib); } else { like($capa, $deflate_capa, 'COMPRESS DEFLATE advertised'); } $capa; } 1; public-inbox-1.9.0/t/nntpd-v2.t000066400000000000000000000002741430031475700162230ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ local $ENV{PI_TEST_VERSION} = 2; require './t/nntpd.t'; public-inbox-1.9.0/t/nntpd.t000066400000000000000000000340701430031475700156770ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite)); use PublicInbox::Eml; use Socket qw(IPPROTO_TCP TCP_NODELAY); use Net::NNTP; use Sys::Hostname; use POSIX qw(_exit); use Digest::SHA; # t/nntpd-v2.t wraps this for v2 my $version = $ENV{PI_TEST_VERSION} || 1; require_git('2.6') if $version == 2; use_ok 'PublicInbox::Msgmap'; my $lsof = require_cmd('lsof', 1); my $fast_idle = eval { require Linux::Inotify2; 1 } // eval { require IO::KQueue; 1 }; my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/pi-home"; my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $inboxdir = "$tmpdir/inbox"; my $group = 'test-nntpd'; my $addr = $group . '@example.com'; my $sock = tcp_server(); my $host_port = tcp_host_port($sock); my $td; my $eml = PublicInbox::Eml->new(< From: =?utf-8?Q?El=C3=A9anor?= Cc: $addr Message-Id: Content-Type: text/plain; charset=utf-8 Subject: Testing for =?utf-8?Q?El=C3=A9anor?= Date: Thu, 01 Jan 1970 06:06:06 +0000 Content-Transfer-Encoding: 8bit References: This is a test message for El\xc3\xa9anor EOF my $list_id = $addr; $list_id =~ s/@/./; $eml->header_set('List-Id', "<$list_id>"); my $str = $eml->as_string; $str =~ s/(? $version, indexlevel => 'basic', tmpdir => $inboxdir, sub { my ($im, $ibx) = @_; $im->add($eml) or BAIL_OUT; }; undef $eml; my $other = create_inbox "other$version", version => $version, indexlevel => 'basic', sub { my ($im) = @_; $im->add(eml_load 't/utf8.eml') or BAIL_OUT; }; local $ENV{HOME} = $home; mkdir $home or BAIL_OUT $!; mkdir "$home/.public-inbox" or BAIL_OUT $!; open my $cfgfh, '>', "$home/.public-inbox/config" or BAIL_OUT $!; print $cfgfh <{inboxdir} url = http://example.com/xyz address = e\@example.com indexlevel = basic newsgroup = x.y.z [publicinboxMda] spamcheck = none EOF close $cfgfh or BAIL_OUT; { my $cmd = [ '-nntpd', '-W0', "--stdout=$out", "--stderr=$err" ]; $td = start_script($cmd, undef, { 3 => $sock }); my $n = Net::NNTP->new($host_port); my $list = $n->list; ok(delete $list->{'x.y.z'}, 'deleted x.y.z group'); is_deeply($list, { $group => [ qw(1 1 n) ] }, 'LIST works'); is_deeply([$n->group($group)], [ qw(0 1 1), $group ], 'GROUP works'); is_deeply($n->listgroup($group), [1], 'listgroup OK'); # TODO: Net::NNTP::listgroup does not support range at the moment { my $expect = [ qw(Subject: From: Date: Message-ID: References: Bytes: Lines: Xref:full) ]; is_deeply($n->overview_fmt, $expect, 'RFC3977 8.4.2 compliant LIST OVERVIEW.FMT'); } SKIP: { $n->can('starttls') or skip('Net::NNTP too old to support STARTTLS', 2); require_mods('IO::Socket::SSL', 2); ok(!$n->starttls, 'STARTTLS fails when unconfigured'); is($n->code, 580, 'got 580 code on server w/o TLS'); }; my $mid = ''; my %xhdr = ( 'message-id' => $mid, subject => "Testing for El\xc3\xa9anor", 'date' => 'Thu, 01 Jan 1970 06:06:06 +0000', 'from' => "El\xc3\xa9anor ", 'to' => "El\xc3\xa9anor ", 'cc' => $addr, 'xref' => hostname . " $group:1", 'references' => '', ); my $s = tcp_connect($sock); sysread($s, my $buf, 4096); is($buf, "201 " . hostname . " ready - post via email\r\n", 'got greeting'); ok(syswrite($s, " \r\n"), 'wrote spaces'); ok(syswrite($s, "\r\n"), 'wrote nothing'); syswrite($s, "NEWGROUPS\t19990424 000000 \033GMT\007\r\n"); is(0, sysread($s, $buf, 4096), 'GOT EOF on cntrl'); $s = tcp_connect($sock); sysread($s, $buf, 4096); is($buf, "201 " . hostname . " ready - post via email\r\n", 'got greeting'); syswrite($s, "CAPABILITIES\r\n"); $buf = read_til_dot($s); like($buf, qr/\r\nVERSION 2\r\n/s, 'CAPABILITIES works'); unlike($buf, qr/STARTTLS/s, 'STARTTLS not advertised'); my $deflate_capa = qr/\r\nCOMPRESS DEFLATE\r\n/; if (eval { require Compress::Raw::Zlib }) { like($buf, $deflate_capa, 'DEFLATE advertised'); } else { unlike($buf, $deflate_capa, 'DEFLATE not advertised (Compress::Raw::Zlib missing)'); } syswrite($s, "NEWGROUPS 19990424 000000 GMT\r\n"); $buf = read_til_dot($s); like($buf, qr/\A231 list of /, 'newgroups OK'); while (my ($k, $v) = each %xhdr) { is_deeply($n->xhdr("$k $mid"), { $mid => $v }, "XHDR $k by message-id works"); is_deeply($n->xhdr("$k 1"), { 1 => $v }, "$k by article number works"); is_deeply($n->xhdr("$k 1-"), { 1 => $v }, "$k by article range works"); $buf = ''; syswrite($s, "HDR $k $mid\r\n"); $buf = read_til_dot($s); my @r = split("\r\n", $buf); like($r[0], qr/\A225 /, '225 response for HDR'); is($r[1], "0 $v", 'got expected response for HDR'); } { my $nogroup = Net::NNTP->new($host_port); while (my ($k, $v) = each %xhdr) { is_deeply($nogroup->xhdr("$k $mid"), { $mid => $v }, "$k by message-id works without group"); } } is_deeply($n->xover('1-'), { '1' => ["Testing for El\xc3\xa9anor", "El\xc3\xa9anor ", 'Thu, 01 Jan 1970 06:06:06 +0000', '', '', $len, '1', 'Xref: '. hostname . ' test-nntpd:1'] }, "XOVER range works"); is_deeply($n->xover('1'), { '1' => ["Testing for El\xc3\xa9anor", "El\xc3\xa9anor ", 'Thu, 01 Jan 1970 06:06:06 +0000', '', '', $len, '1', 'Xref: '. hostname . ' test-nntpd:1'] }, "XOVER by article works"); is_deeply($n->head(1), $n->head(''), 'HEAD OK'); is_deeply($n->body(1), $n->body(''), 'BODY OK'); is_deeply($n->nntpstat(1), '', 'STAT'); is($n->body(1)->[0], "This is a test message for El\xc3\xa9anor\n", 'body really matches'); my $art = $n->article(1); is(ref($art), 'ARRAY', 'got array for ARTICLE'); is_deeply($art, $n->article(''), 'ARTICLE OK'); is($n->article(999), undef, 'non-existent num'); is($n->article(''), undef, 'non-existent mid'); { syswrite($s, "OVER $mid\r\n"); $buf = read_til_dot($s); my @r = split("\r\n", $buf); like($r[0], qr/^224 /, 'got 224 response for OVER'); is($r[1], "0\tTesting for El\xc3\xa9anor\t" . "El\xc3\xa9anor \t" . "Thu, 01 Jan 1970 06:06:06 +0000\t" . "$mid\t\t$len\t1" . "\tXref: " . hostname . " test-nntpd:0", 'OVER by Message-ID works'); is($r[2], '.', 'correctly terminated response'); } is_deeply($n->xhdr(qw(Cc 1-)), { 1 => 'test-nntpd@example.com' }, 'XHDR Cc 1- works'); is_deeply($n->xhdr(qw(References 1-)), { 1 => '' }, 'XHDR References 1- works)'); is_deeply($n->xhdr(qw(list-id 1-)), {}, 'XHDR on invalid header returns empty'); my $mids = $n->newnews(0, $group); is_deeply($mids, [''], 'NEWNEWS works'); { my $t0 = time; my $date = $n->date; my $t1 = time; ok($date >= $t0, 'valid date after start'); ok($date <= $t1, 'valid date before stop'); } if ('leafnode interop') { my $for_leafnode = PublicInbox::Eml->new(<<""); From: longheader\@example.com To: $addr Subject: none Date: Fri, 02 Oct 1993 00:00:00 +0000 my $long_hdr = 'for-leafnode-'.('y'x200).'@example.com'; $for_leafnode->header_set('Message-ID', "<$long_hdr>"); my $im = $ibx->importer(0); $im->add($for_leafnode); $im->done; my $hdr = $n->head("<$long_hdr>"); my $expect = qr/\AMessage-ID: /i . qr/\Q<$long_hdr>\E/; ok(scalar(grep(/$expect/, @$hdr)), 'Message-ID not folded'); ok(scalar(grep(/^Path:/, @$hdr)), 'Path: header found'); # it's possible for v2 messages to have 2+ Message-IDs, # but leafnode can't handle it if ($version != 1) { my @mids = ("<$long_hdr>", '<2mid@wtf>'); $for_leafnode->header_set('Message-ID', @mids); $for_leafnode->body_set('not-a-dupe'); my $warn = ''; local $SIG{__WARN__} = sub { $warn .= join('', @_) }; $im->add($for_leafnode); $im->done; like($warn, qr/reused/, 'warned for reused MID'); $hdr = $n->head('<2mid@wtf>'); my @hmids = grep(/\AMessage-ID: /i, @$hdr); is(scalar(@hmids), 1, 'Single Message-ID in header'); like($hmids[0], qr/: <2mid\@wtf>/, 'got expected mid'); } } ok($n->article(''), 'cross newsgroup ARTICLE by Message-ID'); ok($n->body(''), 'cross newsgroup BODY by Message-ID'); ok($n->head(''), 'cross newsgroup HEAD by Message-ID'); is($n->xpath(''), 'x.y.z/1', 'xpath hit'); is($n->xpath(''), undef, 'xpath miss'); # pipelined requests: { my $nreq = 90; my $nart = 2; syswrite($s, "GROUP $group\r\n"); my $res = <$s>; my $rdr = fork; if ($rdr == 0) { for (1..$nreq) { <$s> =~ /\A224 / or _exit(1); <$s> =~ /\A1/ or _exit(2); <$s> eq ".\r\n" or _exit(3); } my %sums; for (1..$nart) { <$s> =~ /\A220 / or _exit(4); my $dig = Digest::SHA->new(1); while (my $l = <$s>) { last if $l eq ".\r\n"; $dig->add($l); } $dig = $dig->hexdigest; $sums{$dig}++; } if ($nart) { scalar(keys(%sums)) == 1 or _exit(5); (values(%sums))[0] == $nart or _exit(6); } _exit(0); } for (1..$nreq) { syswrite($s, "XOVER 1\r\n"); } syswrite($s, "ARTICLE 1\r\n" x $nart); is($rdr, waitpid($rdr, 0), 'reader done'); is($? >> 8, 0, 'no errors'); } my $noerr = { 2 => \(my $null) }; SKIP: { if ($INC{'Search/Xapian.pm'} && ($ENV{TEST_RUN_MODE}//2)) { skip 'Search/Xapian.pm pre-loaded (by t/run.perl?)', 1; } $lsof or skip 'lsof missing', 1; my @of = xqx([$lsof, '-p', $td->{pid}], undef, $noerr); skip('lsof broken', 1) if (!scalar(@of) || $?); my @xap = grep m!Search/Xapian!, @of; is_deeply(\@xap, [], 'Xapian not loaded in nntpd'); } # -compact requires Xapian SKIP: { require_mods('Search::Xapian', 2); have_xapian_compact or skip 'xapian-compact missing', 2; is(xsys(qw(git config), "--file=$home/.public-inbox/config", "publicinbox.$group.indexlevel", 'medium'), 0, 'upgraded indexlevel'); my $ex = eml_load('t/data/0001.patch'); is($n->article($ex->header('Message-ID')), undef, 'article did not exist'); my $im = $ibx->importer(0); $im->add($ex); $im->done; { my $f = $ibx->mm->{dbh}->sqlite_db_filename; my $tmp = "$tmpdir/tmp.sqlite3"; $ibx->mm->{dbh}->sqlite_backup_to_file($tmp); delete $ibx->{mm}; rename($tmp, $f) or BAIL_OUT "rename($tmp, $f): $!"; } ok(run_script([qw(-index -c -j0 --reindex), $ibx->{inboxdir}], undef, $noerr), '-compacted'); select(undef, undef, undef, $fast_idle ? 0.1 : 2.1); $art = $n->article($ex->header('Message-ID')); ok($art, 'new article retrieved after compact'); $lsof or skip 'lsof missing', 1; ($^O =~ /\A(?:linux)\z/) or skip "lsof /(deleted)/ check untested on $^O", 1; my @lsof = xqx([$lsof, '-p', $td->{pid}], undef, $noerr); my $d = [ grep(/\(deleted\)/, @lsof) ]; is_deeply($d, [], 'no deleted files') or diag explain($d); }; SKIP: { test_watch($tmpdir, $host_port, $group) }; { setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1); syswrite($s, 'HDR List-id 1-'); select(undef, undef, undef, 0.15); ok($td->kill, 'killed nntpd'); select(undef, undef, undef, 0.15); syswrite($s, "\r\n"); $buf = ''; do { sysread($s, $buf, 4096, length($buf)); } until ($buf =~ /\r\n\z/); my @r = split("\r\n", $buf); like($r[0], qr/^5\d\d /, 'got 5xx response for unoptimized HDR'); is(scalar @r, 1, 'only one response line'); } $n = $s = undef; $td->join; is($?, 0, 'no error in exited process'); my $eout = do { open my $fh, '<', $err or die "open $err failed: $!"; local $/; <$fh>; }; unlike($eout, qr/wide/i, 'no Wide character warnings'); } $td = undef; done_testing(); sub read_til_dot { my ($s) = @_; my $buf = ''; do { sysread($s, $buf, 4096, length($buf)); } until ($buf =~ /\r\n\.\r\n\z/); $buf; } sub test_watch { my ($tmpdir, $host_port, $group) = @_; use_ok 'PublicInbox::Watch'; use_ok 'PublicInbox::InboxIdle'; use_ok 'PublicInbox::Config'; require_git('1.8.5', 1) or skip('git 1.8.5+ needed for --urlmatch', 4); my $old_env = { HOME => $ENV{HOME} }; my $home = "$tmpdir/watch_home"; mkdir $home or BAIL_OUT $!; mkdir "$home/.public-inbox" or BAIL_OUT $!; local $ENV{HOME} = $home; my $name = 'watchnntp'; my $addr = "i1\@example.com"; my $url = "http://example.com/i1"; my $inboxdir = "$tmpdir/watchnntp"; my $cmd = ['-init', '-V1', '-Lbasic', $name, $inboxdir, $url, $addr]; my $nntpurl = "nntp://$host_port/$group"; run_script($cmd) or BAIL_OUT("init $name"); xsys(qw(git config), "--file=$home/.public-inbox/config", "publicinbox.$name.watch", $nntpurl) == 0 or BAIL_OUT "git config $?"; # try again with polling xsys(qw(git config), "--file=$home/.public-inbox/config", 'nntp.PollInterval', 0.11) == 0 or BAIL_OUT "git config $?"; my $cfg = PublicInbox::Config->new; PublicInbox::DS->Reset; my $ii = PublicInbox::InboxIdle->new($cfg); my $cb = sub { PublicInbox::DS->SetPostLoopCallback(sub {}) }; my $obj = bless \$cb, 'PublicInbox::TestCommon::InboxWakeup'; $cfg->each_inbox(sub { $_[0]->subscribe_unlock('ident', $obj) }); my $watcherr = "$tmpdir/watcherr"; open my $err_wr, '>', $watcherr or BAIL_OUT $!; open my $err, '<', $watcherr or BAIL_OUT $!; my $w = start_script(['-watch'], undef, { 2 => $err_wr }); diag 'waiting for initial fetch...'; PublicInbox::DS::event_loop(); diag 'inbox unlocked on initial fetch'; $w->kill; $w->join; is($?, 0, 'no error in exited -watch process'); $cfg->each_inbox(sub { shift->unsubscribe_unlock('ident') }); $ii->close; PublicInbox::DS->Reset; my @err = grep(!/^(?:I:|#)/, <$err>); is(@err, 0, 'no warnings/errors from -watch'.join(' ', @err)); my @ls = xqx(['git', "--git-dir=$inboxdir", qw(ls-tree -r HEAD)]); isnt(scalar(@ls), 0, 'imported something'); } 1; public-inbox-1.9.0/t/nodatacow.t000066400000000000000000000027301430031475700165310ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use File::Temp 0.19; use_ok 'PublicInbox::Syscall'; # btrfs on Linux is copy-on-write (COW) by default. As of Linux 5.7, # this still leads to fragmentation for SQLite and Xapian files where # random I/O happens, so we disable COW just for SQLite files and Xapian # directories. Disabling COW disables checksumming, so we only do this # for regeneratable files, and not canonical git storage (git doesn't # checksum refs, only data under $GIT_DIR/objects). SKIP: { my $nr = 2; skip 'test is Linux-only', $nr if $^O ne 'linux'; my $dir = $ENV{BTRFS_TESTDIR}; skip 'BTRFS_TESTDIR not defined', $nr unless defined $dir; my $lsattr = require_cmd('lsattr', 1) or skip 'lsattr(1) not installed', $nr; my $tmp = File::Temp->newdir('nodatacow-XXXX', DIR => $dir); my $dn = $tmp->dirname; my $name = "$dn/pp.f"; open my $fh, '>', $name or BAIL_OUT "open($name): $!"; PublicInbox::Syscall::nodatacow_fh($fh); my $res = xqx([$lsattr, $name]); BAIL_OUT "lsattr(1) fails in $dir" if $?; like($res, qr/C.*\Q$name\E/, "`C' attribute set on fd with pure Perl"); $name = "$dn/pp.d"; mkdir($name) or BAIL_OUT "mkdir($name) $!"; PublicInbox::Syscall::nodatacow_dir($name); $res = xqx([$lsattr, '-d', $name]); like($res, qr/C.*\Q$name\E/, "`C' attribute set on dir with pure Perl"); }; done_testing; public-inbox-1.9.0/t/nulsubject.t000066400000000000000000000014111430031475700167230ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use_ok 'PublicInbox::Import'; use_ok 'PublicInbox::Git'; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; { my $git = PublicInbox::Git->new($git_dir); my $im = PublicInbox::Import->new($git, 'testbox', 'test@example'); $im->init_bare; $im->add(PublicInbox::Eml->new(<<'EOF')); From: a@example.com To: b@example.com Subject: A subject line with a null =?iso-8859-1?q?=00?= see! Message-ID: hello world EOF $im->done; is(xsys(qw(git --git-dir), $git_dir, 'fsck', '--strict'), 0, 'git fsck ok'); } done_testing(); 1; public-inbox-1.9.0/t/on_destroy.t000066400000000000000000000017331430031475700167410ustar00rootroot00000000000000#!perl -w use strict; use v5.10.1; use Test::More; require_ok 'PublicInbox::OnDestroy'; my @x; my $od = PublicInbox::OnDestroy->new(sub { push @x, 'hi' }); is_deeply(\@x, [], 'not called, yet'); undef $od; is_deeply(\@x, [ 'hi' ], 'no args works'); $od = PublicInbox::OnDestroy->new(sub { $x[0] = $_[0] }, 'bye'); is_deeply(\@x, [ 'hi' ], 'nothing changed while alive'); undef $od; is_deeply(\@x, [ 'bye' ], 'arg passed'); $od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y)); undef $od; is_deeply(\@x, [ 'x', 'y' ], '2 args passed'); open my $tmp, '+>>', undef or BAIL_OUT $!; $tmp->autoflush(1); $od = PublicInbox::OnDestroy->new(1, sub { print $tmp "$$ DESTROY\n" }); undef $od; is(-s $tmp, 0, '$tmp is empty on pid mismatch'); $od = PublicInbox::OnDestroy->new($$, sub { $tmp = $$ }); undef $od; is($tmp, $$, '$tmp set to $$ by callback'); if (my $nr = $ENV{TEST_LEAK_NR}) { for (0..$nr) { $od = PublicInbox::OnDestroy->new(sub { @x = @_ }, qw(x y)); } } done_testing; public-inbox-1.9.0/t/over.t000066400000000000000000000067121430031475700155310ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Compress::Zlib qw(compress); use PublicInbox::TestCommon; require_mods('DBD::SQLite'); use_ok 'PublicInbox::OverIdx'; my ($tmpdir, $for_destroy) = tmpdir(); my $over = PublicInbox::OverIdx->new("$tmpdir/over.sqlite3"); $over->dbh; # open file is($over->max, 0, 'max is zero on new DB (scalar context)'); is_deeply([$over->max], [0], 'max is zero on new DB (list context)'); my $x = $over->next_tid; is(int($x), $x, 'integer tid'); my $y = $over->next_tid; is($y, $x+1, 'tid increases'); $x = $over->sid('hello-world'); is(int($x), $x, 'integer sid'); $y = $over->sid('hello-WORLD'); is($y, $x+1, 'sid increases'); is($over->sid('hello-world'), $x, 'idempotent'); ok(!$over->{dbh}->{ReadOnly}, 'OverIdx is not ReadOnly'); $over->dbh_close; $over = PublicInbox::Over->new("$tmpdir/over.sqlite3"); ok($over->dbh->{ReadOnly}, 'Over is ReadOnly'); $over = PublicInbox::OverIdx->new("$tmpdir/over.sqlite3"); $over->dbh; is($over->sid('hello-world'), $x, 'idempotent across reopen'); $over->each_by_mid('never', sub { fail('should not be called') }); $x = $over->resolve_mid_to_tid('never'); is(int($x), $x, 'integer tid for ghost'); $y = $over->resolve_mid_to_tid('NEVAR'); is($y, $x + 1, 'integer tid for ghost increases'); my $ddd = compress(''); my $msg = sub { { ts => 0, ds => 0, num => $_[0] } }; foreach my $s ('', undef) { $over->add_over($msg->(98), [ 'a' ], [], $s, $ddd); $over->add_over($msg->(99), [ 'b' ], [], $s, $ddd); my $msgs = [ map { $_->{num} } @{$over->get_thread('a')} ]; is_deeply([98], $msgs, 'messages not linked by empty subject'); } $over->add_over($msg->(98), [ 'a' ], [], 's', $ddd); $over->add_over($msg->(99), [ 'b' ], [], 's', $ddd); foreach my $mid (qw(a b)) { my $msgs = [ map { $_->{num} } @{$over->get_thread('a')} ]; is_deeply([98, 99], $msgs, 'linked messages by subject'); } $over->add_over($msg->(98), [ 'a' ], [], 's', $ddd); $over->add_over($msg->(99), [ 'b' ], ['a'], 'diff', $ddd); foreach my $mid (qw(a b)) { my $msgs = [ map { $_->{num} } @{$over->get_thread($mid)} ]; is_deeply([98, 99], $msgs, "linked messages by Message-ID: <$mid>"); } isnt($over->max, 0, 'max is non-zero'); $over->rollback_lazy; # L my $v = eval 'v'.$over->{dbh}->{sqlite_version}; SKIP: { skip("no WAL in SQLite version $v < 3.7.0", 1) if $v lt v3.7.0; $over->{dbh}->do('PRAGMA journal_mode = WAL'); $over = PublicInbox::OverIdx->new("$tmpdir/over.sqlite3"); is($over->dbh->selectrow_array('PRAGMA journal_mode'), 'wal', 'WAL journal_mode not clobbered if manually set'); } # ext index additions $over->eidx_prep; { my @arg = qw(1349 2019 adeadba7cafe example.key); ok($over->add_xref3(@arg), 'first add'); ok($over->add_xref3(@arg), 'add idempotent'); my $xref3 = $over->get_xref3(1349); is_deeply($xref3, [ 'example.key:2019:adeadba7cafe' ], 'xref3 works'); @arg = qw(1349 2018 deadbeefcafe example.kee); ok($over->add_xref3(@arg), 'add another xref3'); $xref3 = $over->get_xref3(1349); is_deeply($xref3, [ 'example.key:2019:adeadba7cafe', 'example.kee:2018:deadbeefcafe' ], 'xref3 works forw two'); is($over->dbh->do(<<''), 1, 'remove first'); DELETE FROM xref3 WHERE xnum = 2019 AND docid = 1349 $xref3 = $over->get_xref3(1349); is_deeply($xref3, [ 'example.kee:2018:deadbeefcafe' ], 'confirm removal successful'); $over->rollback_lazy; } done_testing(); public-inbox-1.9.0/t/plack-2-txt-bodies.eml000066400000000000000000000004171430031475700203750ustar00rootroot00000000000000From: a@example.com Subject: blargh Message-ID: In-Reply-To: MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain hi --b Content-Type: text/plain bye --b-- public-inbox-1.9.0/t/plack-attached-patch.eml000066400000000000000000000005321430031475700210240ustar00rootroot00000000000000From: a@example.com Subject: [PATCH] asdf Message-ID: MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain hi, see attached --b Content-Type: text/plain Content-Disposition: inline; filename="foo&.patch" --- a/file +++ b/file @@ -49, 7 +49,34 @@ --b-- public-inbox-1.9.0/t/plack-qp.eml000066400000000000000000000002421430031475700165700ustar00rootroot00000000000000From: qp@example.com Subject: QP Message-ID: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain hi =3D bye= public-inbox-1.9.0/t/plack.t000066400000000000000000000226421430031475700156500ustar00rootroot00000000000000#!perl -w # Copyright (C) 2014-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; my $psgi = "./examples/public-inbox.psgi"; my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape); require_mods(@mods); foreach my $mod (@mods) { use_ok $mod; } ok(-f $psgi, "psgi example file found"); my $pfx = 'http://example.com/test'; my $eml = eml_load('t/iso-2202-jp.eml'); # ensure successful message deliveries my $ibx = create_inbox('test-1', sub { my ($im, $ibx) = @_; my $addr = $ibx->{-primary_address}; $im->add($eml) or xbail '->add'; $eml->header_set('Content-Type', "text/plain; charset=\rso\rb\0gus\rithurts"); $eml->header_set('Message-ID', ''); $im->add($eml) or xbail '->add'; $im->add(PublicInbox::Eml->new(<add'; From: Me To: You Cc: $addr Message-Id: Subject: hihi Date: Fri, 02 Oct 1993 00:00:00 +0000 Content-Type: text/plain; charset=iso-8859-1 > quoted text zzzzzz EOF # multipart with two text bodies $im->add(eml_load('t/plack-2-txt-bodies.eml')) or BAIL_OUT '->add'; # multipart with attached patch + filename $im->add(eml_load('t/plack-attached-patch.eml')) or BAIL_OUT '->add'; # multipart collapsed to single quoted-printable text/plain $im->add(eml_load('t/plack-qp.eml')) or BAIL_OUT '->add'; my $crlf = < To: $addr Message-Id: Subject: carriage return in long subject Date: Fri, 02 Oct 1993 00:00:00 +0000 :( EOF $crlf =~ s/\n/\r\n/sg; $im->add(PublicInbox::Eml->new($crlf)) or BAIL_OUT '->add'; open my $fh, '>', "$ibx->{inboxdir}/description" or BAIL_OUT "open: $!"; print $fh "test for public-inbox\n" or BAIL_OUT; close $fh or BAIL_OUT "close: $!"; open $fh, '>', "$ibx->{inboxdir}/pi_config"; print $fh <{inboxdir} newsgroup = inbox.test address = $addr url = $pfx/ EOF close $fh or BAIL_OUT "close: $!"; }); local $ENV{PI_CONFIG} = "$ibx->{inboxdir}/pi_config"; my $app = require $psgi; test_psgi($app, sub { my ($cb) = @_; foreach my $u (qw(robots.txt favicon.ico .well-known/foo)) { my $res = $cb->(GET("http://example.com/$u")); is($res->code, 404, "$u is missing"); } }); test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET('http://example.com/test/crlf@example.com/')); is($res->code, 200, 'retrieved CRLF as HTML'); like($res->content, qr/mailto:me\@example/, 'no %40, per RFC 6068'); unlike($res->content, qr/\r/, 'no CR in HTML'); $res = $cb->(GET('http://example.com/test/crlf@example.com/raw')); is($res->code, 200, 'retrieved CRLF raw'); like($res->content, qr/\r/, 'CR preserved in raw message'); $res = $cb->(GET('http://example.com/test/bogus@example.com/raw')); is($res->code, 404, 'missing /raw is 404'); }); # redirect with newsgroup test_psgi($app, sub { my ($cb) = @_; my $from = 'http://example.com/inbox.test'; my $to = 'http://example.com/test/'; my $res = $cb->(GET($from)); is($res->code, 301, 'newsgroup name is permanent redirect'); is($to, $res->header('Location'), 'redirect location matches'); $from .= '/'; is($res->code, 301, 'newsgroup name/ is permanent redirect'); is($to, $res->header('Location'), 'redirect location matches'); }); # redirect with trailing / test_psgi($app, sub { my ($cb) = @_; my $from = 'http://example.com/test'; my $to = "$from/"; my $res = $cb->(GET($from)); is(301, $res->code, 'is permanent redirect'); is($to, $res->header('Location'), 'redirect location matches with trailing slash'); }); foreach my $t (qw(t T)) { test_psgi($app, sub { my ($cb) = @_; my $u = $pfx . "/blah\@example.com/$t"; my $res = $cb->(GET($u)); is(301, $res->code, "redirect for missing /"); my $location = $res->header('Location'); like($location, qr!/\Q$t\E/#u\z!, 'redirected with missing /'); }); } foreach my $t (qw(f)) { test_psgi($app, sub { my ($cb) = @_; my $u = $pfx . "/blah\@example.com/$t"; my $res = $cb->(GET($u)); is(301, $res->code, "redirect for legacy /f"); my $location = $res->header('Location'); like($location, qr!/blah\@example\.com/\z!, 'redirected with missing /'); }); } test_psgi($app, sub { my ($cb) = @_; my $atomurl = 'http://example.com/test/new.atom'; my $res = $cb->(GET('http://example.com/test/new.html')); is(200, $res->code, 'success response received'); like($res->content, qr!href="new\.atom"!, 'atom URL generated'); like($res->content, qr!href="blah\@example\.com/"!, 'index generated'); like($res->content, qr!1993-10-02!, 'date set'); }); test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET($pfx . '/atom.xml')); is(200, $res->code, 'success response received for atom'); my $body = $res->content; like($body, qr!link\s+href="\Q$pfx\E/blah\@example\.com/"!s, 'atom feed generated correct URL'); like($body, qr/test for public-inbox/, "set title in XML feed"); like($body, qr/zzzzzz/, 'body included'); $res = $cb->(GET($pfx . '/description')); like($res->content, qr/test for public-inbox/, 'got description'); }); test_psgi($app, sub { my ($cb) = @_; my $path = '/blah@example.com/'; my $res = $cb->(GET($pfx . $path)); is(200, $res->code, "success for $path"); my $html = $res->content; like($html, qr!<title>hihi - Me!, 'HTML returned'); like($html, qr!(GET($pfx . $path)); is(301, $res->code, "redirect for $path"); my $location = $res->header('Location'); like($location, qr!/blah\@example\.com/\z!, '/$MESSAGE_ID/f/ redirected to /$MESSAGE_ID/'); $res = $cb->(GET($pfx . '/multipart@example.com/')); like($res->content, qr/hi\n.*-- Attachment #2.*\nbye\n/s, 'multipart split'); $res = $cb->(GET($pfx . '/patch@example.com/')); $html = $res->content; like($html, qr!see attached!, 'original body'); like($html, qr!.*Attachment #2: foo&(?:amp|#38);\.patch --!, 'parts split with filename'); $res = $cb->(GET($pfx . '/qp@example.com/')); like($res->content, qr/\bhi = bye\b/, "HTML output decoded QP"); }); test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET($pfx . '/blah@example.com/raw')); is(200, $res->code, 'success response received for /*/raw'); like($res->content, qr!^From !sm, "mbox returned"); is($res->header('Content-Type'), 'text/plain; charset=iso-8859-1', 'charset from message used'); $res = $cb->(GET($pfx . '/broken@example.com/raw')); is($res->header('Content-Type'), 'text/plain; charset=UTF-8', 'broken charset ignored'); $res = $cb->(GET($pfx . '/199707281508.AAA24167@hoyogw.example/raw')); is($res->header('Content-Type'), 'text/plain; charset=ISO-2022-JP', 'ISO-2002-JP returned'); chomp(my $body = $res->content); my $raw = PublicInbox::Eml->new(\$body); is($raw->body_raw, $eml->body_raw, 'ISO-2022-JP body unmodified'); $res = $cb->(GET($pfx . '/blah@example.com/t.mbox.gz')); is(501, $res->code, '501 when overview missing'); like($res->content, qr!\bOverview\b!, 'overview omission noted'); }); # legacy redirects foreach my $t (qw(m f)) { test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET($pfx . "/$t/blah\@example.com.txt")); is(301, $res->code, "redirect for old $t .txt link"); my $location = $res->header('Location'); like($location, qr!/blah\@example\.com/raw\z!, ".txt redirected to /raw"); }); } my %umap = ( 'm' => '', 'f' => '', 't' => 't/', ); while (my ($t, $e) = each %umap) { test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET($pfx . "/$t/blah\@example.com.html")); is(301, $res->code, "redirect for old $t .html link"); my $location = $res->header('Location'); like($location, qr!/blah\@example\.com/$e(?:#u)?\z!, ".html redirected to new location"); }); } foreach my $sfx (qw(mbox mbox.gz)) { test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET($pfx . "/t/blah\@example.com.$sfx")); is(301, $res->code, 'redirect for old thread link'); my $location = $res->header('Location'); like($location, qr!/blah\@example\.com/t\.mbox(?:\.gz)?\z!, "$sfx redirected to /mbox.gz"); }); } test_psgi($app, sub { my ($cb) = @_; # for a while, we used to support /$INBOX/$X40/ # when we "compressed" long Message-IDs to SHA-1 # Now we're stuck supporting them forever :< foreach my $path ('f2912279bd7bcd8b7ab3033234942d58746d56f7') { my $from = "http://example.com/test/$path/"; my $res = $cb->(GET($from)); is(301, $res->code, 'is permanent redirect'); like($res->header('Location'), qr!/test/blah\@example\.com/!, 'redirect from x40 MIDs works'); } }); # dumb HTTP clone/fetch support test_psgi($app, sub { my ($cb) = @_; my $path = '/test/info/refs'; my $req = HTTP::Request->new('GET' => $path); my $res = $cb->($req); is(200, $res->code, 'refs readable'); my $orig = $res->content; $req->header('Range', 'bytes=5-10'); $res = $cb->($req); is(206, $res->code, 'got partial response'); is($res->content, substr($orig, 5, 6), 'partial body OK'); $req->header('Range', 'bytes=5-'); $res = $cb->($req); is(206, $res->code, 'got partial another response'); is($res->content, substr($orig, 5), 'partial body OK past end'); }); # things which should fail test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(PUT('/')); is(405, $res->code, 'no PUT to / allowed'); $res = $cb->(PUT('/test/')); is(405, $res->code, 'no PUT /$INBOX allowed'); # TODO # $res = $cb->(GET('/')); }); done_testing(); public-inbox-1.9.0/t/pop3d.t000066400000000000000000000300771430031475700156040ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use v5.12; use PublicInbox::TestCommon; use Socket qw(IPPROTO_TCP SOL_SOCKET); # Net::POP3 is part of the standard library, but distros may split it off... require_mods(qw(DBD::SQLite Net::POP3 IO::Socket::SSL)); require_git('2.6'); # for v2 require_mods(qw(File::FcntlLock)) if $^O !~ /\A(?:linux|freebsd)\z/; use_ok 'IO::Socket::SSL'; use_ok 'PublicInbox::TLS'; my ($tmpdir, $for_destroy) = tmpdir(); mkdir("$tmpdir/p3state") or xbail "mkdir: $!"; my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $olderr = "$tmpdir/plain.err"; my $group = 'test-pop3'; my $addr = $group . '@example.com'; my $stls = tcp_server(); my $plain = tcp_server(); my $pop3s = tcp_server(); my $patch = eml_load('t/data/0001.patch'); my $ibx = create_inbox 'pop3d', version => 2, -primary_address => $addr, indexlevel => 'basic', sub { my ($im, $ibx) = @_; $im->add(eml_load('t/plack-qp.eml')) or BAIL_OUT '->add'; $im->add($patch) or BAIL_OUT '->add'; }; my $pi_config = "$tmpdir/pi_config"; open my $fh, '>', $pi_config or BAIL_OUT "open: $!"; print $fh <{inboxdir} address = $addr indexlevel = basic newsgroup = $group EOF close $fh or BAIL_OUT "close: $!\n"; my $pop3s_addr = tcp_host_port($pop3s); my $stls_addr = tcp_host_port($stls); my $plain_addr = tcp_host_port($plain); my $env = { PI_CONFIG => $pi_config }; my $cert = 'certs/server-cert.pem'; my $key = 'certs/server-key.pem'; unless (-r $key && -r $cert) { plan skip_all => "certs/ missing for $0, run $^X ./create-certs.perl in certs/"; } my $old = start_script(['-pop3d', '-W0', "--stdout=$tmpdir/plain.out", "--stderr=$olderr" ], $env, { 3 => $plain }); my @old_args = ($plain->sockhost, Port => $plain->sockport); my $oldc = Net::POP3->new(@old_args); my $locked_mb = ('e'x32)."\@$group"; ok($oldc->apop("$locked_mb.0", 'anonymous'), 'APOP to old'); my $dbh = DBI->connect("dbi:SQLite:dbname=$tmpdir/p3state/db.sqlite3",'','', { AutoCommit => 1, RaiseError => 1, PrintError => 0, sqlite_use_immediate_transaction => 1, sqlite_see_if_its_a_number => 1, }); { # locking within the same process my $x = Net::POP3->new(@old_args); ok(!$x->apop("$locked_mb.0", 'anonymous'), 'APOP lock failure'); like($x->message, qr/unable to lock/, 'diagnostic message'); $x = Net::POP3->new(@old_args); ok($x->apop($locked_mb, 'anonymous'), 'APOP lock acquire'); my $y = Net::POP3->new(@old_args); ok(!$y->apop($locked_mb, 'anonymous'), 'APOP lock fails once'); undef $x; $y = Net::POP3->new(@old_args); ok($y->apop($locked_mb, 'anonymous'), 'APOP lock works after release'); } for my $args ( [ "--cert=$cert", "--key=$key", "-lpop3s://$pop3s_addr", "-lpop3://$stls_addr" ], ) { for ($out, $err) { open my $fh, '>', $_ or BAIL_OUT "truncate: $!" } my $cmd = [ '-netd', '-W0', @$args, "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, $env, { 3 => $stls, 4 => $pop3s }); my %o = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); # start negotiating a slow TLS connection my $slow = tcp_connect($pop3s, Blocking => 0); $slow = IO::Socket::SSL->start_SSL($slow, SSL_startHandshake => 0, %o); my $slow_done = $slow->connect_SSL; my @poll; if ($slow_done) { diag('W: connect_SSL early OK, slow client test invalid'); use PublicInbox::Syscall qw(EPOLLIN EPOLLOUT); @poll = (fileno($slow), EPOLLIN | EPOLLOUT); } else { @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } my @p3s_args = ($pop3s->sockhost, Port => $pop3s->sockport, SSL => 1, %o); my $p3s = Net::POP3->new(@p3s_args); my $capa = $p3s->capa; ok(!exists $capa->{STLS}, 'no STLS CAPA for POP3S'); ok($p3s->quit, 'QUIT works w/POP3S'); { $p3s = Net::POP3->new(@p3s_args); ok(!$p3s->apop("$locked_mb.0", 'anonymous'), 'APOP lock failure w/ another daemon'); like($p3s->message, qr/unable to lock/, 'diagnostic message'); } # slow TLS connection did not block the other fast clients while # connecting, finish it off: until ($slow_done) { IO::Poll::_poll(-1, @poll); $slow_done = $slow->connect_SSL and last; @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } $slow->blocking(1); ok(sysread($slow, my $greet, 4096) > 0, 'slow got a greeting'); my @np3_args = ($stls->sockhost, Port => $stls->sockport); my $np3 = Net::POP3->new(@np3_args); ok($np3->quit, 'plain QUIT works'); $np3 = Net::POP3->new(@np3_args, %o); $capa = $np3->capa; ok(exists $capa->{STLS}, 'STLS CAPA advertised before STLS'); ok($np3->starttls, 'STLS works'); $capa = $np3->capa; ok(!exists $capa->{STLS}, 'STLS CAPA not advertised after STLS'); ok($np3->quit, 'QUIT works after STLS'); for my $mailbox (('x'x32)."\@$group", $group, ('a'x32)."\@z.$group") { $np3 = Net::POP3->new(@np3_args); ok(!$np3->user($mailbox), "USER $mailbox reject"); ok($np3->quit, 'QUIT after USER fail'); $np3 = Net::POP3->new(@np3_args); ok(!$np3->apop($mailbox, 'anonymous'), "APOP $mailbox reject"); ok($np3->quit, "QUIT after APOP fail $mailbox"); } # we do connect+QUIT bumps to try ensuring non-QUIT disconnects # get processed below: for my $mailbox ($group, "$group.0") { my $u = ('f'x32)."\@$mailbox"; undef $np3; ok(Net::POP3->new(@np3_args)->quit, 'connect+QUIT bump'); $np3 = Net::POP3->new(@np3_args); my $n0 = $dbh->selectrow_array('SELECT COUNT(*) FROM deletes'); my $u0 = $dbh->selectrow_array('SELECT COUNT(*) FROM users'); ok($np3->user($u), "UUID\@$mailbox accept"); ok($np3->pass('anonymous'), 'pass works'); my $n1 = $dbh->selectrow_array('SELECT COUNT(*) FROM deletes'); is($n1 - $n0, 1, 'deletes bumped while connected'); ok($np3->quit, 'client QUIT'); $n1 = $dbh->selectrow_array('SELECT COUNT(*) FROM deletes'); is($n1, $n0, 'deletes row gone on no-op after QUIT'); my $u1 = $dbh->selectrow_array('SELECT COUNT(*) FROM users'); is($u1, $u0, 'users row gone on no-op after QUIT'); $np3 = Net::POP3->new(@np3_args); ok($np3->user($u), "UUID\@$mailbox accept"); ok($np3->pass('anonymous'), 'pass works'); my $list = $np3->list; my $uidl = $np3->uidl; is_deeply([sort keys %$list], [sort keys %$uidl], 'LIST and UIDL keys match'); ok($_ > 0, 'bytes in LIST result') for values %$list; like($_, qr/\A[a-z0-9]{40,}\z/, 'blob IDs in UIDL result') for values %$uidl; ok($np3->quit, 'QUIT after LIST+UIDL'); $n1 = $dbh->selectrow_array('SELECT COUNT(*) FROM deletes'); is($n1, $n0, 'deletes row gone on no-op after LIST+UIDL'); $n0 = $n1; $np3 = Net::POP3->new(@np3_args); ok($np3->user($u), "UUID\@$mailbox accept"); ok($np3->pass('anonymous'), 'pass works'); undef $np3; # QUIT-less disconnect ok(Net::POP3->new(@np3_args)->quit, 'connect+QUIT bump'); $u1 = $dbh->selectrow_array('SELECT COUNT(*) FROM users'); is($u1, $u0, 'users row gone on QUIT-less disconnect'); $n1 = $dbh->selectrow_array('SELECT COUNT(*) FROM deletes'); is($n1, $n0, 'deletes row gone on QUIT-less disconnect'); $n0 = $n1; $np3 = Net::POP3->new(@np3_args); ok(!$np3->apop($u, 'anonumuss'), 'APOP wrong pass reject'); $n1 = $dbh->selectrow_array('SELECT COUNT(*) FROM deletes'); is($n1, $n0, 'deletes row not bumped w/ wrong pass'); undef $np3; # QUIT-less disconnect ok(Net::POP3->new(@np3_args)->quit, 'connect+QUIT bump'); $n1 = $dbh->selectrow_array('SELECT COUNT(*) FROM deletes'); is($n1, $n0, 'deletes row not bumped w/ wrong pass'); $np3 = Net::POP3->new(@np3_args); ok($np3->apop($u, 'anonymous'), "APOP UUID\@$mailbox"); my @res = $np3->popstat; is($res[0], 2, 'STAT knows about 2 messages'); my $msg = $np3->get(2); $msg = join('', @$msg); $msg =~ s/\r\n/\n/g; is_deeply(PublicInbox::Eml->new($msg), $patch, 't/data/0001.patch round-tripped'); ok(!$np3->get(22), 'missing message'); $msg = $np3->top(2, 0); $msg = join('', @$msg); $msg =~ s/\r\n/\n/g; is($msg, $patch->header_obj->as_string . "\n", 'TOP numlines=0'); ok(!$np3->top(2, -1), 'negative TOP numlines'); $msg = $np3->top(2, 1); $msg = join('', @$msg); $msg =~ s/\r\n/\n/g; is($msg, $patch->header_obj->as_string . <top(2, 10000); $msg = join('', @$msg); $msg =~ s/\r\n/\n/g; is_deeply(PublicInbox::Eml->new($msg), $patch, 'TOP numlines=10000 (excess)'); $np3 = Net::POP3->new(@np3_args, %o); ok($np3->starttls, 'STLS works before APOP'); ok($np3->apop($u, 'anonymous'), "APOP UUID\@$mailbox w/ STLS"); # undocumented: ok($np3->_NOOP, 'NOOP works') if $np3->can('_NOOP'); } SKIP: { skip 'TCP_DEFER_ACCEPT is Linux-only', 2 if $^O ne 'linux'; my $var = eval { Socket::TCP_DEFER_ACCEPT() } // 9; my $x = getsockopt($pop3s, IPPROTO_TCP, $var) // xbail "IPPROTO_TCP: $!"; ok(unpack('i', $x) > 0, 'TCP_DEFER_ACCEPT set on POP3S'); $x = getsockopt($stls, IPPROTO_TCP, $var) // xbail "IPPROTO_TCP: $!"; is(unpack('i', $x), 0, 'TCP_DEFER_ACCEPT is 0 on plain POP3'); }; SKIP: { skip 'SO_ACCEPTFILTER is FreeBSD-only', 2 if $^O ne 'freebsd'; system('kldstat -m accf_data >/dev/null') and skip 'accf_data not loaded? kldload accf_data', 2; require PublicInbox::Daemon; my $x = getsockopt($pop3s, SOL_SOCKET, $PublicInbox::Daemon::SO_ACCEPTFILTER); like($x, qr/\Adataready\0+\z/, 'got dataready accf for pop3s'); $x = getsockopt($stls, IPPROTO_TCP, $PublicInbox::Daemon::SO_ACCEPTFILTER); is($x, undef, 'no BSD accept filter for plain IMAP'); }; $td->kill; $td->join; is($?, 0, 'no error in exited -netd'); open my $fh, '<', $err or BAIL_OUT "open $err failed: $!"; my $eout = do { local $/; <$fh> }; unlike($eout, qr/wide/i, 'no Wide character warnings in -netd'); } { my $capa = $oldc->capa; ok(defined($capa->{PIPELINING}), 'pipelining supported by CAPA'); is($capa->{EXPIRE}, 0, 'EXPIRE 0 set'); ok(!exists $capa->{STLS}, 'STLS unset w/o daemon certs'); # ensure TOP doesn't trigger "EXPIRE 0" like RETR does (cf. RFC2449) my $list = $oldc->list; ok(scalar keys %$list, 'got a listing of messages'); ok($oldc->top($_, 1), "TOP $_ 1") for keys %$list; ok($oldc->quit, 'QUIT after TOP'); # clients which see "EXPIRE 0" can elide DELE requests $oldc = Net::POP3->new(@old_args); ok($oldc->apop("$locked_mb.0", 'anonymous'), 'APOP for RETR'); is_deeply($oldc->capa, $capa, 'CAPA unchanged'); is_deeply($oldc->list, $list, 'LIST unchanged by previous TOP'); ok($oldc->get($_), "RETR $_") for keys %$list; ok($oldc->quit, 'QUIT after RETR'); $oldc = Net::POP3->new(@old_args); ok($oldc->apop("$locked_mb.0", 'anonymous'), 'APOP reconnect'); my $cont = $oldc->list; is_deeply($cont, {}, 'no messages after implicit DELE from EXPIRE 0'); ok($oldc->quit, 'QUIT on noop'); # test w/o checking CAPA to trigger EXPIRE 0 $oldc = Net::POP3->new(@old_args); ok($oldc->apop($locked_mb, 'anonymous'), 'APOP on latest slice'); my $l2 = $oldc->list; is_deeply($l2, $list, 'different mailbox, different deletes'); ok($oldc->get($_), "RETR $_") for keys %$list; ok($oldc->quit, 'QUIT w/o EXPIRE nor DELE'); $oldc = Net::POP3->new(@old_args); ok($oldc->apop($locked_mb, 'anonymous'), 'APOP again on latest'); $l2 = $oldc->list; is_deeply($l2, $list, 'no DELE nor EXPIRE preserves messages'); ok($oldc->delete(2), 'explicit DELE on latest'); ok($oldc->quit, 'QUIT w/ highest DELE'); # this is non-standard behavior, but necessary if we expect hundreds # of thousands of users on cheap HW $oldc = Net::POP3->new(@old_args); ok($oldc->apop($locked_mb, 'anonymous'), 'APOP yet again on latest'); is_deeply($oldc->list, {}, 'highest DELE deletes older messages, too'); } # TODO: more tests, but mpop was really helpful in helping me # figure out bugs with larger newsgroups (>50K messages) which # probably isn't suited for this test suite. $old->kill; $old->join; is($?, 0, 'no error in exited -pop3d'); open $fh, '<', $olderr or BAIL_OUT "open $olderr failed: $!"; my $eout = do { local $/; <$fh> }; unlike($eout, qr/wide/i, 'no Wide character warnings in -pop3d'); done_testing; public-inbox-1.9.0/t/precheck.t000066400000000000000000000036201430031475700163350ustar00rootroot00000000000000# Copyright (C) 2014-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::MDA; sub do_checks { my ($s) = @_; my $recipient = 'foo@example.com'; ok(!PublicInbox::MDA->precheck($s, $recipient), "wrong ORIGINAL_RECIPIENT rejected"); $recipient = 'b@example.com'; ok(PublicInbox::MDA->precheck($s, $recipient), "ORIGINAL_RECIPIENT in To: is OK"); $recipient = 'c@example.com'; ok(PublicInbox::MDA->precheck($s, $recipient), "ORIGINAL_RECIPIENT in Cc: is OK"); $recipient = [ 'c@example.com', 'd@example.com' ]; ok(PublicInbox::MDA->precheck($s, $recipient), "alias list is OK"); } { my $s = PublicInbox::Eml->new(<<'EOF'); From: abc@example.com To: abc@example.com Cc: c@example.com, another-list@example.com Content-Type: text/plain Subject: list is fine Message-ID: Date: Wed, 09 Apr 2014 01:28:34 +0000 hello world EOF my $addr = [ 'c@example.com', 'd@example.com' ]; ok(PublicInbox::MDA->precheck($s, $addr), 'Cc list is OK'); } { do_checks(PublicInbox::Eml->new(<<'EOF')); From: a@example.com To: b@example.com Cc: c@example.com Content-Type: text/plain Subject: this is a subject Message-ID: Date: Wed, 09 Apr 2014 01:28:34 +0000 hello world EOF } { do_checks(PublicInbox::Eml->new(<<'EOF')); From: a@example.com To: b+plus@example.com Cc: John Doe Content-Type: text/plain Subject: this is a subject Message-ID: Date: Wed, 09 Apr 2014 01:28:34 +0000 hello world EOF } { my $recipient = 'b@example.com'; my $s = PublicInbox::Eml->new(<<'EOF'); To: b@example.com Cc: c@example.com Content-Type: text/plain Subject: this is a subject Message-ID: Date: Wed, 09 Apr 2014 01:28:34 +0000 hello world EOF ok(!PublicInbox::MDA->precheck($s, $recipient), "missing From: is rejected"); } done_testing(); public-inbox-1.9.0/t/psgi_attach.eml000066400000000000000000000011671430031475700173550ustar00rootroot00000000000000From: root@z Message-Id: Subject: hi MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain Content-Transfer-Encoding: quoted-printable Content-Disposition: inline; filename="queue-pee" abcdef=3Dg =3D=3Dblah --b Content-Type: appication/octet-stream Content-Transfer-Encoding: base64 Content-Disposition: inline; filename="bayce-sixty-four" YjY03q2+7wo= --b Content-Type: text/plain Content-Disposition: inline; filename="noop.txt" plain text pass through --b Content-Type: text/plain Content-Disposition: inline; filename=".dotfile" dotfile --b-- public-inbox-1.9.0/t/psgi_attach.t000066400000000000000000000103071430031475700170370ustar00rootroot00000000000000#!perl -w # Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; my @mods = qw(HTTP::Request::Common Plack::Builder Plack::Test URI::Escape); require_mods(@mods); use_ok $_ foreach @mods; use_ok 'PublicInbox::WWW'; use PublicInbox::Config; use PublicInbox::Eml; use_ok 'PublicInbox::WwwAttach'; my $cfgpath; my $creat_cb = sub { my ($im, $ibx) = @_; $im->add(eml_load('t/psgi_attach.eml')) or BAIL_OUT; $im->add(eml_load('t/data/message_embed.eml')) or BAIL_OUT; $cfgpath = "$ibx->{inboxdir}/pi_config"; open my $fh, '>', $cfgpath or BAIL_OUT $!; print $fh <{-primary_address} inboxdir = $ibx->{inboxdir} EOF close $fh or BAIL_OUT $!; }; my $ibx = create_inbox 'test', $creat_cb; $cfgpath //= "$ibx->{inboxdir}/pi_config"; my $qp = "abcdef=g\n==blah\n"; my $b64 = "b64\xde\xad\xbe\xef\n"; my $txt = "plain\ntext\npass\nthrough\n"; my $dot = "dotfile\n"; my $www = PublicInbox::WWW->new(PublicInbox::Config->new($cfgpath)); my $client = sub { my ($cb) = @_; my $res; $res = $cb->(GET('/test/Z%40B/')); my @href = ($res->content =~ /^href="([^"]+)"/gms); @href = grep(/\A[\d\.]+-/, @href); is_deeply([qw(1-queue-pee 2-bayce-sixty-four 3-noop.txt 4-a.txt)], \@href, 'attachment links generated'); $res = $cb->(GET('/test/Z%40B/1-queue-pee')); my $qp_res = $res->content; ok(length($qp_res) >= length($qp), 'QP length is close'); like($qp_res, qr/\n\z/s, 'trailing newline exists'); # is(index($qp_res, $qp), 0, 'QP trailing newline is there'); $qp_res =~ s/\r\n/\n/g; is(index($qp_res, $qp), 0, 'QP trailing newline is there'); $res = $cb->(GET('/test/Z%40B/2-base-sixty-four')); is(quotemeta($res->content), quotemeta($b64), 'Base64 matches exactly'); $res = $cb->(GET('/test/Z%40B/3-noop.txt')); my $txt_res = $res->content; ok(length($txt_res) >= length($txt), 'plain text almost matches'); like($txt_res, qr/\n\z/s, 'trailing newline exists in text'); is(index($txt_res, $txt), 0, 'plain text not truncated'); $res = $cb->(GET('/test/Z%40B/4-a.txt')); my $dot_res = $res->content; ok(length($dot_res) >= length($dot), 'dot almost matches'); $res = $cb->(GET('/test/Z%40B/4-any-filename.txt')); is($res->content, $dot_res, 'user-specified filename is OK'); my $mid = '20200418222508.GA13918@dcvr'; my $irt = '20200418222020.GA2745@dcvr'; $res = $cb->(GET("/test/$mid/")); unlike($res->content, qr! multipart/mixed, Size: 0 bytes!, '0-byte download not offered'); like($res->content, qr/\bhref="2-embed2x\.eml"/s, 'href to message/rfc822 attachment visible'); like($res->content, qr/\bhref="2\.1\.2-test\.eml"/s, 'href to nested message/rfc822 attachment visible'); $res = $cb->(GET("/test/$mid/2-embed2x.eml")); my $eml = PublicInbox::Eml->new(\($res->content)); is_deeply([ $eml->header_raw('Message-ID') ], [ "<$irt>" ], 'got attached eml'); my @subs = $eml->subparts; is(scalar(@subs), 2, 'attachment had 2 subparts'); like($subs[0]->body_str, qr/^testing embedded message\n*\z/sm, '1st attachment is as expected'); is($subs[1]->header('Content-Type'), 'message/rfc822', '2nd attachment is as expected'); $res = $cb->(GET("/test/$mid/2.1.2-test.eml")); $eml = PublicInbox::Eml->new(\($res->content)); is_deeply([ $eml->header_raw('Message-ID') ], [ '<20200418214114.7575-1-e@yhbt.net>' ], 'nested eml retrieved'); }; test_psgi(sub { $www->call(@_) }, $client); SKIP: { require_mods(qw(DBD::SQLite Plack::Test::ExternalServer), 18); $ibx = create_inbox 'test-indexed', indexlevel => 'basic', $creat_cb; $cfgpath = "$ibx->{inboxdir}/pi_config"; my $env = { PI_CONFIG => $cfgpath }; $www = PublicInbox::WWW->new(PublicInbox::Config->new($cfgpath)); test_psgi(sub { $www->call(@_) }, $client); my $sock = tcp_server() or die; my ($tmpdir, $for_destroy) = tmpdir(); my ($out, $err) = map { "$tmpdir/std$_.log" } qw(out err); my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, $env, { 3 => $sock }); my ($h, $p) = tcp_host_port($sock); local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; Plack::Test::ExternalServer::test_psgi(client => $client); } done_testing; public-inbox-1.9.0/t/psgi_bad_mids.t000066400000000000000000000041241430031475700173350ustar00rootroot00000000000000#!perl -w # Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::Config; my @mods = qw(DBD::SQLite HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); require_git 2.6; require_mods(@mods); use_ok($_) for @mods; use_ok 'PublicInbox::WWW'; my $msgs = <<''; F1V5OR6NMF.3M649JTLO9IXD@tux.localdomain/hehe1"'/foo F1V5NB0PTU.3U0DCVGAJ750Z@tux&.ampersand F1V5MIHGCU.2ABINKW6WBE8N@tux.localdomain/raw F1V5LF9D9C.2QT5PGXZQ050E@tux.localdomain/t.atom F1V58X3CMU.2DCCVAKQZGADV@tux.localdomain/../../../../foo F1TVKINT3G.2S6I36MXMHYG6@tux.localdomain" onclick="alert(1)" my @mids = split(/\n/, $msgs); my $ibx = create_inbox 'bad-mids', version => 2, indexlevel => 'basic', sub { my ($im) = @_; my $i = 0; for my $mid (@mids) { $im->add(PublicInbox::Eml->new(<<"")) or BAIL_OUT; Subject: test Message-ID: <$mid> From: a\@example.com To: b\@example.com Date: Fri, 02 Oct 1993 00:00:0$i +0000 $i++; } }; my $cfgpfx = "publicinbox.bad-mids"; my $cfg = <{-primary_address} $cfgpfx.inboxdir=$ibx->{inboxdir} EOF my $config = PublicInbox::Config->new(\$cfg); my $www = PublicInbox::WWW->new($config); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; my $res = $cb->(GET('/bad-mids/')); is($res->code, 200, 'got 200 OK listing'); my $raw = $res->content; foreach my $mid (@mids) { ok(index($raw, $mid) < 0, "escaped $mid"); } my (@xmids) = ($raw =~ m!\bhref="([^"]+?)/T/#u"!sg); is(scalar(@xmids), scalar(@mids), 'got escaped links to all messages'); @xmids = reverse @xmids; my %uxs = ( gt => '>', lt => '<' ); foreach my $i (0..$#xmids) { my $uri = $xmids[$i]; $uri =~ s/&#([0-9]+);/sprintf("%c", $1)/sge; $uri =~ s/&(lt|gt);/$uxs{$1}/sge; $res = $cb->(GET("/bad-mids/$uri/raw")); is($res->code, 200, 'got 200 OK raw message '.$uri); like($res->content, qr/Message-ID: <\Q$mids[$i]\E>/s, 'retrieved correct message'); } }); done_testing; public-inbox-1.9.0/t/psgi_mount.t000066400000000000000000000050361430031475700167400ustar00rootroot00000000000000#!perl -w # Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::Eml; use PublicInbox::TestCommon; use PublicInbox::Config; my ($tmpdir, $for_destroy) = tmpdir(); my $v1dir = "$tmpdir/v1.git"; my $cfgpfx = "publicinbox.test"; my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape Plack::Builder Plack::App::URLMap); require_mods(@mods); use_ok $_ foreach @mods; use_ok 'PublicInbox::WWW'; my $ibx = create_inbox 'test', tmpdir => $v1dir, sub { my ($im, $ibx) = @_; $im->add(PublicInbox::Eml->new(< To: You Cc: $ibx->{-primary_address} Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 zzzzzz EOF }; my $cfg = PublicInbox::Config->new(\<{-primary_address} $cfgpfx.inboxdir=$v1dir EOF my $www = PublicInbox::WWW->new($cfg); my $app = builder(sub { enable('Head'); mount('/a' => builder(sub { sub { $www->call(@_) } })); mount('/b' => builder(sub { sub { $www->call(@_) } })); }); test_psgi($app, sub { my ($cb) = @_; my $res; # Atom feed: $res = $cb->(GET('/a/test/new.atom')); like($res->content, qr!\bhttp://[^/]+/a/test/!, 'URLs which exist in Atom feed are mount-aware'); unlike($res->content, qr!\b\Qhttp://[^/]+/test/\E!, 'No URLs which are not mount-aware'); $res = $cb->(GET('/a/test/_/text/mirror/')); like($res->content, qr!git clone --mirror\s+.*?http://[^/]+/a/test\b!s, 'clone URL in /text/mirror is mount-aware'); $res = $cb->(GET('/a/test/blah%40example.com/raw')); is($res->code, 200, 'OK with URLMap mount'); like($res->content, qr/^Message-Id: \n/sm, 'headers appear in /raw'); # redirects $res = $cb->(GET('/a/test/m/blah%40example.com.html')); is($res->header('Location'), 'http://localhost/a/test/blah@example.com/', 'redirect functions properly under mount'); $res = $cb->(GET('/test/blah%40example.com/')); is($res->code, 404, 'intentional 404 with URLMap mount'); }); SKIP: { require_mods(qw(DBD::SQLite Search::Xapian IO::Uncompress::Gunzip), 3); require_ok 'PublicInbox::SearchIdx'; PublicInbox::SearchIdx->new($ibx, 1)->index_sync; test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET('/a/test/blah@example.com/t.mbox.gz')); my $gz = $res->content; my $raw; IO::Uncompress::Gunzip::gunzip(\$gz => \$raw); like($raw, qr!^Message-Id:\x20\n!sm, 'headers appear in /t.mbox.gz mboxrd'); }); } done_testing(); public-inbox-1.9.0/t/psgi_multipart_not.t000066400000000000000000000027541430031475700205030ustar00rootroot00000000000000#!perl -w # Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::Config; require_git 2.6; my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test URI::Escape Plack::Builder Plack::Test); require_mods(@mods); use_ok($_) for (qw(HTTP::Request::Common Plack::Test)); use_ok 'PublicInbox::WWW'; my $ibx = create_inbox 'v2', version => 2, sub { my ($im) = @_; $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; Message-Id: <200308111450.h7BEoOu20077@mail.osdl.org> To: linux-kernel@vger.kernel.org Subject: [OSDL] linux-2.6.0-test3 reaim results Mime-Version: 1.0 Content-Type: multipart/mixed ; boundary="==_Exmh_120757360" Date: Mon, 11 Aug 2003 07:50:24 -0700 From: exmh user Freed^Wmultipart ain't what it used to be EOF }; my $cfgpfx = "publicinbox.v2test"; my $cfg = <{-primary_address} $cfgpfx.inboxdir=$ibx->{inboxdir} EOF my $www = PublicInbox::WWW->new(PublicInbox::Config->new(\$cfg)); my ($res, $raw); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; for my $u ('/v2test/?q=%22ain\'t what it used to be%22&x=t', '/v2test/new.atom', '/v2test/new.html') { $res = $cb->(GET($u)); $raw = $res->content; ok(index($raw, 'Freed^Wmultipart') >= 0, $u); ok(index($raw, 'Warning: decoded text') >= 0, $u.' warns'); } }); done_testing; public-inbox-1.9.0/t/psgi_scan_all.t000066400000000000000000000026661430031475700173600ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::Config; my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape DBD::SQLite); require_git 2.6; require_mods(@mods); use_ok 'PublicInbox::WWW'; foreach my $mod (@mods) { use_ok $mod; } my $cfg = ''; foreach my $i (1..2) { my $ibx = create_inbox "test-$i", version => 2, indexlevel => 'basic', sub { my ($im, $ibx) = @_; $im->add(PublicInbox::Eml->new(<{-primary_address} Subject: s$i Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF }; my $cfgpfx = "publicinbox.test-$i"; $cfg .= "$cfgpfx.address=$ibx->{-primary_address}\n"; $cfg .= "$cfgpfx.inboxdir=$ibx->{inboxdir}\n"; $cfg .= "$cfgpfx.url=http://example.com/$i\n"; } my $www = PublicInbox::WWW->new(PublicInbox::Config->new(\$cfg)); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; foreach my $i (1..2) { foreach my $end ('', '/') { my $res = $cb->(GET("/a-mid-$i\@b$end")); is($res->code, 302, 'got 302'); is($res->header('Location'), "http://example.com/$i/a-mid-$i\@b/", "redirected OK to $i"); } } foreach my $x (qw(inv@lid inv@lid/ i/v/a l/i/d/)) { my $res = $cb->(GET("/$x")); is($res->code, 404, "404 on $x"); } }); done_testing; public-inbox-1.9.0/t/psgi_search.t000066400000000000000000000127031430031475700170420ustar00rootroot00000000000000#!perl -w # Copyright (C) 2017-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use IO::Uncompress::Gunzip qw(gunzip); use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::Inbox; my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); require_mods(@mods); use_ok($_) for (qw(HTTP::Request::Common Plack::Test)); use_ok 'PublicInbox::WWW'; use_ok 'PublicInbox::SearchIdx'; my ($tmpdir, $for_destroy) = tmpdir(); local $ENV{TZ} = 'UTC'; my $digits = '10010260936330'; my $ua = 'Pine.LNX.4.10'; my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com"; my $ibx = create_inbox 'git', indexlevel => 'full', tmpdir => "$tmpdir/1", sub { my ($im) = @_; # n.b. these headers are not properly RFC2047-encoded $im->add(PublicInbox::Eml->new(< From: Ævar Arnfjörð Bjarmason To: git\@vger.kernel.org EOF $im->add(PublicInbox::Eml->new(<<"")) or BAIL_OUT; Message-ID: From: replier In-Reply-To: <$mid> Subject: mismatch $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; Subject: Message-ID: From: blank subject To: git@vger.kernel.org EOF $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; Message-ID: From: no subject at all To: git@vger.kernel.org EOF }; my $cfgpfx = "publicinbox.test"; my $cfg = PublicInbox::Config->new(\<{inboxdir} EOF my $www = PublicInbox::WWW->new($cfg); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; my ($html, $res); my $approxidate = 'now'; for my $req ('/test/?q=%C3%86var', '/test/?q=%25C3%2586var') { $res = $cb->(GET($req."+d:..$approxidate")); $html = $res->content; like($html, qr/Ævar d:\.\.\Q$approxidate\E/, 'HTML escaped in title, "d:..$APPROXIDATE" preserved'); my @res = ($html =~ m/\?q=(.+var)\+d:\.\.\Q$approxidate\E/g); ok(scalar(@res), 'saw query strings'); my %uniq = map { $_ => 1 } @res; is(1, scalar keys %uniq, 'all query values identical in HTML'); is('%C3%86var', (keys %uniq)[0], 'matches original query'); ok(index($html, 'by Ævar Arnfjörð Bjarmason') >= 0, "displayed Ævar's name properly in HTML"); like($html, qr/download mbox\.gz: .*?"full threads"/s, '"full threads" download option shown'); } like($html, qr/Initial query\b.*?returned no.results, used:.*instead/s, 'noted retry on double-escaped query {-uxs_retried}'); my $warn = []; local $SIG{__WARN__} = sub { push @$warn, @_ }; $res = $cb->(GET('/test/?q=s:test&l=5e')); is($res->code, 200, 'successful search result'); is_deeply([], $warn, 'no warnings from non-numeric comparison'); $res = $cb->(GET('/test/?&q=s:test')); is($res->code, 200, 'successful search result'); is_deeply([], $warn, 'no warnings from black parameter'); $res = $cb->(POST('/test/?q=s:bogus&x=m')); is($res->code, 404, 'failed search result gives 404'); is_deeply([], $warn, 'no warnings'); my $mid_re = qr/\Q$mid\E/o; while (length($digits) > 8) { $res = $cb->(GET("/test/$ua.$digits/")); is($res->code, 300, 'partial match found while truncated'); like($res->content, qr/\b1 partial match found\b/); like($res->content, $mid_re, 'found mid in response'); chop($digits); } $res = $cb->(GET('/test/')); $html = $res->content; like($html, qr/\bhref="no-subject-at-all[^>]+>\(no subject\)</, 'subject-less message linked from "/$INBOX/"'); like($html, qr/\bhref="blank-subject[^>]+>\(no subject\)</, 'blank subject message linked from "/$INBOX/"'); like($html, qr/test Ævar/, "displayed Ævar's name properly in topic view"); $res = $cb->(GET('/test/?q=tc:git')); like($html, qr/\bhref="no-subject-at-all[^>]+>\(no subject\)</, 'subject-less message linked from "/$INBOX/?q=..."'); like($html, qr/\bhref="blank-subject[^>]+>\(no subject\)</, 'blank subject message linked from "/$INBOX/?q=..."'); $res = $cb->(GET('/test/no-subject-at-all@example.com/raw')); like($res->header('Content-Disposition'), qr/filename=no-subject\.txt/); $res = $cb->(GET('/test/no-subject-at-all@example.com/t.mbox.gz')); like($res->header('Content-Disposition'), qr/filename=no-subject\.mbox\.gz/); # "full threads" mbox.gz download $res = $cb->(POST("/test/?q=s:test+d:..$approxidate&x=m&t")); is($res->code, 200, 'successful mbox download with threads'); gunzip(\($res->content) => \(my $before)); is_deeply([ "Message-ID: <$mid>\n", "Message-ID: <reply\@asdf>\n" ], [ grep(/^Message-ID:/m, split(/^/m, $before)) ], 'got full thread'); # clobber has_threadid to emulate old versions: { my $sidx = PublicInbox::SearchIdx->new($ibx, 0); my $xdb = $sidx->idx_acquire; $xdb->set_metadata('has_threadid', '0'); $sidx->idx_release; } $cfg->each_inbox(sub { delete $_[0]->{search} }); $res = $cb->(GET('/test/?q=s:test')); is($res->code, 200, 'successful search w/o has_threadid'); unlike($html, qr/download mbox\.gz: .*?"full threads"/s, '"full threads" download option not shown w/o has_threadid'); # in case somebody uses curl to bypass <form> $res = $cb->(POST("/test/?q=s:test+d:..$approxidate&x=m&t")); is($res->code, 200, 'successful mbox download w/ threads'); gunzip(\($res->content) => \(my $after)); isnt($before, $after); }); done_testing(); �������������������������������������������������������������public-inbox-1.9.0/t/psgi_text.t��������������������������������������������������������������������0000664�0000000�0000000�00000004036�14300314757�0016561�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); my $maindir = "$tmpdir/main.git"; my $addr = 'test-public@example.com'; my $cfgpfx = "publicinbox.test"; my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); require_mods(@mods, 'IO::Uncompress::Gunzip'); use_ok $_ foreach @mods; use PublicInbox::Import; use PublicInbox::Git; use PublicInbox::Config; use_ok 'PublicInbox::WWW'; use_ok 'PublicInbox::WwwText'; my $config = PublicInbox::Config->new(\<<EOF); $cfgpfx.address=$addr $cfgpfx.inboxdir=$maindir EOF PublicInbox::Import::init_bare($maindir); my $www = PublicInbox::WWW->new($config); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; my $gunzipped; my $req = GET('/test/_/text/help/'); my $res = $cb->($req); my $content = $res->content; like($content, qr!<title>public-inbox help.*!, 'default help'); $req->header('Accept-Encoding' => 'gzip'); $res = $cb->($req); is($res->header('Content-Encoding'), 'gzip', 'got gzip encoding'); is($res->header('Content-Type'), 'text/html; charset=UTF-8', 'got gzipped HTML'); IO::Uncompress::Gunzip::gunzip(\($res->content) => \$gunzipped); is($gunzipped, $content, 'gzipped content is correct'); $req = GET('/test/_/text/config/raw'); $res = $cb->($req); $content = $res->content; my $olen = $res->header('Content-Length'); my $f = "$tmpdir/cfg"; open my $fh, '>', $f or die; print $fh $content or die; close $fh or die; my $cfg = PublicInbox::Config->new($f); is($cfg->{"$cfgpfx.address"}, $addr, 'got expected address in config'); $req->header('Accept-Encoding' => 'gzip'); $res = $cb->($req); is($res->header('Content-Encoding'), 'gzip', 'got gzip encoding'); ok($res->header('Content-Length') < $olen, 'gzipped help is smaller'); IO::Uncompress::Gunzip::gunzip(\($res->content) => \$gunzipped); is($gunzipped, $content); }); done_testing(); public-inbox-1.9.0/t/psgi_v2-new.eml000066400000000000000000000004121430031475700172170ustar00rootroot00000000000000From: root@z Message-ID: Subject: hi MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain blah --b Content-Type: text/plain Content-Disposition: inline; filename="attach.txt" new --b-- public-inbox-1.9.0/t/psgi_v2-old.eml000066400000000000000000000004121430031475700172040ustar00rootroot00000000000000From: root@z Message-ID: Subject: hi MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain blah --b Content-Type: text/plain Content-Disposition: inline; filename="attach.txt" old --b-- public-inbox-1.9.0/t/psgi_v2.t000066400000000000000000000245351430031475700161320ustar00rootroot00000000000000#!perl -w # Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_git(2.6); use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::MID qw(mids); require_mods(qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test URI::Escape Plack::Builder HTTP::Date)); use_ok($_) for (qw(HTTP::Request::Common Plack::Test)); use_ok 'PublicInbox::WWW'; my ($tmpdir, $for_destroy) = tmpdir(); my $eml = PublicInbox::Eml->new(<<'EOF'); From oldbug-pre-a0c07cba0e5d8b6a Fri Oct 2 00:00:00 1993 From: a@example.com To: test@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 Content-Type: text/plain; charset=iso-8859-1 hello world EOF my $new_mid; my $ibx = create_inbox 'v2-1', version => 2, indexlevel => 'medium', tmpdir => "$tmpdir/v2", sub { my ($im, $ibx) = @_; $im->add($eml) or BAIL_OUT; $eml->body_set("hello world!\n"); my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; $eml->header_set(Date => 'Fri, 02 Oct 1993 00:01:00 +0000'); $im->add($eml) or BAIL_OUT; is(scalar(@warn), 1, 'got one warning'); my $mids = mids($eml->header_obj); $new_mid = $mids->[1]; open my $fh, '>', "$ibx->{inboxdir}/new_mid" or BAIL_OUT; print $fh $new_mid or BAIL_OUT; close $fh or BAIL_OUT; }; $new_mid //= do { open my $fh, '<', "$ibx->{inboxdir}/new_mid" or BAIL_OUT; local $/; <$fh>; }; my $cfgpath = "$ibx->{inboxdir}/pi_config"; { open my $fh, '>', $cfgpath or BAIL_OUT $!; print $fh <{inboxdir} address = $ibx->{-primary_address} EOF close $fh or BAIL_OUT; } my $msg = $ibx->msg_by_mid('a-mid@b'); like($$msg, qr/\AFrom oldbug/s, '"From_" line stored to test old bug workaround'); my $cfg = PublicInbox::Config->new($cfgpath); my $www = PublicInbox::WWW->new($cfg); my ($res, $raw, @from_); my $client0 = sub { my ($cb) = @_; $res = $cb->(GET('/v2test/description')); like($res->content, qr!\$INBOX_DIR/description missing!, 'got v2 description missing message'); $res = $cb->(GET('/v2test/a-mid@b/raw')); is($res->header('Content-Type'), 'text/plain; charset=iso-8859-1', 'charset from message used'); $raw = $res->content; unlike($raw, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($raw, qr/^hello world$/m, 'got first message'); like($raw, qr/^hello world!$/m, 'got second message'); @from_ = ($raw =~ m/^From /mg); is(scalar(@from_), 2, 'two From_ lines'); $res = $cb->(GET("/v2test/$new_mid/raw")); $raw = $res->content; like($raw, qr/^hello world!$/m, 'second message with new Message-Id'); @from_ = ($raw =~ m/^From /mg); is(scalar(@from_), 1, 'only one From_ line'); # Atom feed should sort by Date: (if Received is missing) $res = $cb->(GET('/v2test/new.atom')); my @bodies = ($res->content =~ />(hello [^<]+)(GET('/v2test/new.html')); @bodies = ($res->content =~ /^(hello [^<]+)$/mg); is_deeply(\@bodies, [ "hello world!\n", "hello world\n" ], 'new.html ordering is chronological'); $res = $cb->(GET('/v2test/new.atom')); my @dates = ($res->content =~ m!title>([^<]+)!g); is_deeply(\@dates, [ "1993-10-02T00:01:00Z", "1993-10-02T00:00:00Z" ], 'Date headers made it through'); }; test_psgi(sub { $www->call(@_) }, $client0); my $env = { TMPDIR => $tmpdir, PI_CONFIG => $cfgpath }; test_httpd($env, $client0, 9); $eml->header_set('Message-ID', 'a-mid@b'); $eml->body_set("hello ghosts\n"); my $im = $ibx->importer(0); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; ok($im->add($eml), 'added 3rd duplicate-but-different message'); is(scalar(@warn), 1, 'got another warning'); like($warn[0], qr/mismatched/, 'warned about mismatched messages'); } my $mids = mids($eml->header_obj); my $third = $mids->[-1]; $im->done; my $client1 = sub { my ($cb) = @_; $res = $cb->(GET('/v2test/_/text/config/raw')); my $lm = $res->header('Last-Modified'); ok($lm, 'Last-Modified set w/ ->mm'); $lm = HTTP::Date::str2time($lm); is($lm, $ibx->mm->created_at, 'Last-Modified for text/config/raw matches ->created_at'); delete $ibx->{mm}; $res = $cb->(GET("/v2test/$third/raw")); $raw = $res->content; like($raw, qr/^hello ghosts$/m, 'got third message'); @from_ = ($raw =~ m/^From /mg); is(scalar(@from_), 1, 'one From_ line'); $res = $cb->(GET('/v2test/a-mid@b/raw')); $raw = $res->content; like($raw, qr/^hello world$/m, 'got first message'); like($raw, qr/^hello world!$/m, 'got second message'); like($raw, qr/^hello ghosts$/m, 'got third message'); @from_ = ($raw =~ m/^From /mg); is(scalar(@from_), 3, 'three From_ lines'); $cfg->each_inbox(sub { $_[0]->search->reopen }); SKIP: { eval { require IO::Uncompress::Gunzip }; skip 'IO::Uncompress::Gunzip missing', 6 if $@; my ($in, $out, $status); my $req = GET('/v2test/a-mid@b/raw'); $req->header('Accept-Encoding' => 'gzip'); $res = $cb->($req); is($res->header('Content-Encoding'), 'gzip', 'gzip encoding'); $in = $res->content; IO::Uncompress::Gunzip::gunzip(\$in => \$out); is($out, $raw, 'gzip response matches'); $res = $cb->(GET('/v2test/a-mid@b/t.mbox.gz')); $in = $res->content; $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in t.mbox.gz'); like($out, qr/^hello world!$/m, 'got second in t.mbox.gz'); like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz'); @from_ = ($out =~ m/^From /mg); is(scalar(@from_), 3, 'three From_ lines in t.mbox.gz'); # search interface $res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m')); $in = $res->content; $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in mbox POST'); like($out, qr/^hello world!$/m, 'got second in mbox POST'); like($out, qr/^hello ghosts$/m, 'got third in mbox POST'); @from_ = ($out =~ m/^From /mg); is(scalar(@from_), 3, 'three From_ lines in mbox POST'); # all.mbox.gz interface $res = $cb->(GET('/v2test/all.mbox.gz')); $in = $res->content; $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in all.mbox'); like($out, qr/^hello world!$/m, 'got second in all.mbox'); like($out, qr/^hello ghosts$/m, 'got third in all.mbox'); @from_ = ($out =~ m/^From /mg); is(scalar(@from_), 3, 'three From_ lines in all.mbox'); }; $res = $cb->(GET('/v2test/?q=m:a-mid@b&x=t')); is($res->code, 200, 'success with threaded search'); my $raw = $res->content; ok($raw =~ s/\A.*>Results 1-3 of 3\b//s, 'got all results'); my @over = ($raw =~ m/\d{4}-\d+-\d+\s+\d+:\d+ +(?:\d+\% )?(.+)$/gm); is_deeply(\@over, [ '(GET('/v2test/?q=m:a-mid@b&x=A')); is($res->code, 200, 'success with Atom search'); SKIP: { require_mods(qw(XML::TreePP), 2); my $t = XML::TreePP->new->parse($res->content); like($t->{feed}->{-xmlns}, qr/\bAtom\b/, 'looks like an an Atom feed'); is(scalar @{$t->{feed}->{entry}}, 3, 'parsed three entries'); }; local $SIG{__WARN__} = 'DEFAULT'; $res = $cb->(GET('/v2test/a-mid@b/')); $raw = $res->content; like($raw, qr/^hello world$/m, 'got first message'); like($raw, qr/^hello world!$/m, 'got second message'); like($raw, qr/^hello ghosts$/m, 'got third message'); @from_ = ($raw =~ m/>From: /mg); is(scalar(@from_), 3, 'three From: lines'); foreach my $mid ('a-mid@b', $new_mid, $third) { like($raw, qr!>\Q$mid\E!s, "Message-ID $mid shown"); } like($raw, qr/\b3\+ messages\b/, 'thread overview shown'); }; test_psgi(sub { $www->call(@_) }, $client1); test_httpd($env, $client1, 38); { my $exp = [ qw( ) ]; $eml->header_set('Message-Id', @$exp); $eml->header_set('Subject', '4th dupe'); local $SIG{__WARN__} = sub {}; ok($im->add($eml), 'added one message'); $im->done; my @h = $eml->header('Message-ID'); is_deeply($exp, \@h, 'reused existing Message-ID'); $cfg->each_inbox(sub { $_[0]->search->reopen }); } my $client2 = sub { my ($cb) = @_; my $res = $cb->(GET('/v2test/new.atom')); my @ids = ($res->content =~ m!urn:uuid:([^<]+)!sg); my %ids; $ids{$_}++ for @ids; is_deeply([qw(1 1 1 1)], [values %ids], 'feed ids unique'); $res = $cb->(GET('/v2test/reuse@mid/T/')); $raw = $res->content; like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /T/'); my @over = ($raw =~ m/^\d{4}-\d+-\d+\s+\d+:\d+ (.+)$/gm); is_deeply(\@over, [ '(GET('/v2test/reuse@mid/t/')); $raw = $res->content; like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /t/'); $res = $cb->(GET('/v2test/0/info/refs')); is($res->code, 200, 'got info refs for dumb clones'); $res = $cb->(GET('/v2test/0.git/info/refs')); is($res->code, 200, 'got info refs for dumb clones w/ .git suffix'); $res = $cb->(GET('/v2test/info/refs')); is($res->code, 404, 'v2 git URL w/o shard fails'); }; test_psgi(sub { $www->call(@_) }, $client2); test_httpd($env, $client2, 8); { # ensure conflicted attachments can be resolved local $SIG{__WARN__} = sub {}; foreach my $body (qw(old new)) { $im->add(eml_load "t/psgi_v2-$body.eml") or BAIL_OUT; } $im->done; } $cfg->each_inbox(sub { $_[0]->search->reopen }); my $client3 = sub { my ($cb) = @_; my $res = $cb->(GET('/v2test/a@dup/')); my @links = ($res->content =~ m!"\.\./([^/]+/2-attach\.txt)\"!g); is(scalar(@links), 2, 'both attachment links exist'); isnt($links[0], $links[1], 'attachment links are different'); { my $old = $cb->(GET('/v2test/' . $links[0])); my $new = $cb->(GET('/v2test/' . $links[1])); is($old->content, 'old', 'got expected old content'); is($new->content, 'new', 'got expected new content'); } $res = $cb->(GET('/v2test/?t=1970'.'01'.'01'.'000000')); is($res->code, 404, '404 for out-of-range t= param'); my @warn = (); local $SIG{__WARN__} = sub { push @warn, @_ }; $res = $cb->(GET('/v2test/?t=1970'.'01'.'01')); is_deeply(\@warn, [], 'no warnings on YYYYMMDD only'); }; test_psgi(sub { $www->call(@_) }, $client3); test_httpd($env, $client3, 4); done_testing; public-inbox-1.9.0/t/purge.t000066400000000000000000000051551430031475700157000ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite)); use Cwd qw(abs_path); # we need this since we chdir below local $ENV{HOME} = abs_path('t'); my $purge = abs_path('blib/script/public-inbox-purge'); my ($tmpdir, $for_destroy) = tmpdir(); use_ok 'PublicInbox::V2Writable'; my $inboxdir = "$tmpdir/v2"; my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'test-v2purge', version => 2, -no_fsync => 1, -primary_address => 'test@example.com', indexlevel => 'basic', }); my $raw = <<'EOF'; From: a@example.com To: test@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 Hello World EOF my $cfgfile = "$tmpdir/config"; local $ENV{PI_CONFIG} = $cfgfile; open my $cfg_fh, '>', $cfgfile or die "open: $!"; my $v2w = PublicInbox::V2Writable->new($ibx, {nproc => 1}); my $mime = PublicInbox::Eml->new($raw); ok($v2w->add($mime), 'add message to be purged'); $v2w->done; # failing cases, first: my $in = "$raw\nMOAR\n"; my ($out, $err) = ('', ''); my $opt = { 0 => \$in, 1 => \$out, 2 => \$err }; ok(run_script([$purge, '-f', $inboxdir], undef, $opt), 'purge -f OK'); $out = $err = ''; ok(!run_script([$purge, $inboxdir], undef, $opt), 'mismatch fails without -f'); is($? >> 8, 1, 'missed purge exits with 1'); # a successful case: $opt->{0} = \$raw; ok(run_script([$purge, $inboxdir], undef, $opt), 'match OK'); like($out, qr/\b[a-f0-9]{40,}/m, 'removed commit noted'); # add (old) vger filter to config file print $cfg_fh <add($mime), 'add vger-signatured message to be purged'); $v2w->done; my $pre_scrub = $raw . <<'EOF'; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ EOF $out = $err = ''; ok(chdir('/'), "chdir / OK for --all test"); $opt->{0} = \$pre_scrub; ok(run_script([$purge, '--all'], undef, $opt), 'scrub purge OK'); like($out, qr/\b[a-f0-9]{40,}/m, 'removed commit noted'); # diag "out: $out"; diag "err: $err"; $out = $err = ''; ok(!run_script([$purge, '--all' ], undef, $opt), 'scrub purge not idempotent without -f'); # diag "out: $out"; diag "err: $err"; done_testing(); public-inbox-1.9.0/t/qspawn.t000066400000000000000000000037541430031475700160720ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use_ok 'PublicInbox::Qspawn'; { my $cmd = [qw(sh -c), 'echo >&2 err; echo out']; my $qsp = PublicInbox::Qspawn->new($cmd, {}, { 2 => 1 }); my $res; $qsp->psgi_qx({}, undef, sub { $res = ${$_[0]} }); is($res, "err\nout\n", 'captured stderr and stdout'); $res = undef; $qsp = PublicInbox::Qspawn->new($cmd, {}, { 2 => \*STDOUT }); $qsp->psgi_qx({}, undef, sub { $res = ${$_[0]} }); is($res, "err\nout\n", 'captured stderr and stdout'); } sub finish_err ($) { my ($qsp) = @_; $qsp->finish; $qsp->{err}; } my $limiter = PublicInbox::Qspawn::Limiter->new(1); { my $x = PublicInbox::Qspawn->new([qw(true)]); my $run = 0; $x->start($limiter, sub { my ($self) = @_; is(0, sysread($self->{rpipe}, my $buf, 1), 'read zero bytes'); ok(!finish_err($self), 'no error on finish'); $run = 1; }); is($run, 1, 'callback ran alright'); } { my $x = PublicInbox::Qspawn->new([qw(false)]); my $run = 0; $x->start($limiter, sub { my ($self) = @_; is(0, sysread($self->{rpipe}, my $buf, 1), 'read zero bytes from false'); ok(finish_err($self), 'error on finish'); $run = 1; }); is($run, 1, 'callback ran alright'); } foreach my $cmd ([qw(sleep 1)], [qw(sh -c), 'sleep 1; false']) { my $s = PublicInbox::Qspawn->new($cmd); my @run; $s->start($limiter, sub { my ($self) = @_; push @run, 'sleep'; is(0, sysread($self->{rpipe}, my $buf, 1), 'read zero bytes'); }); my $n = 0; my @t = map { my $i = $n++; my $x = PublicInbox::Qspawn->new([qw(true)]); $x->start($limiter, sub { my ($self) = @_; push @run, $i; }); [$x, $i] } (0..2); if ($cmd->[-1] =~ /false\z/) { ok(finish_err($s), 'got error on false after sleep'); } else { ok(!finish_err($s), 'no error on sleep'); } ok(!finish_err($_->[0]), "true $_->[1] succeeded") foreach @t; is_deeply([qw(sleep 0 1 2)], \@run, 'ran in order'); } done_testing(); 1; public-inbox-1.9.0/t/reindex-time-range.t000066400000000000000000000032221430031475700202330ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods qw(DBD::SQLite); my $tmp = tmpdir(); my $eml; my $cb = sub { my ($im, $ibx) = @_; $eml //= eml_load 't/utf8.eml'; for my $i (1..3) { $eml->header_set('Message-ID', "<$i\@example.com>"); my $d = "Thu, 01 Jan 1970 0$i:30:00 +0000"; $eml->header_set('Date', $d); $im->add($eml); } }; my %ibx = map {; "v$_" => create_inbox("v$_", version => $_, indexlevel => 'basic', tmpdir => "$tmp/v$_", $cb); } (1, 2); my $env = { TZ => 'UTC' }; my ($out, $err); for my $v (sort keys %ibx) { my $opt = { -C => $ibx{$v}->{inboxdir}, 1 => \$out, 2 => \$err }; ($out, $err) = ('', ''); run_script([ qw(-index -vv) ], $env, $opt); is($?, 0, 'no error on initial index'); for my $x (qw(until before)) { ($out, $err) = ('', ''); run_script([ qw(-index --reindex -vv), "--$x=1970-01-01T02:00:00Z" ], $env, $opt); is($?, 0, "no error with --$x"); like($err, qr! 1/1\b!, "$x only indexed one message"); } for my $x (qw(after since)) { ($out, $err) = ('', ''); run_script([ qw(-index --reindex -vv), "--$x=1970-01-01T02:00:00Z" ], $env, $opt); is($?, 0, "no error with --$x"); like($err, qr! 2/2\b!, "$x only indexed one message"); } ($out, $err) = ('', ''); run_script([ qw(-index --reindex -vv) ], $env, $opt); is($?, 0, 'no error on initial index'); for my $x (qw(since before after until)) { ($out, $err) = ('', ''); run_script([ qw(-index -v), "--$x=1970-01-01T02:00:00Z" ], $env, $opt); isnt($?, 0, "--$x fails on --reindex"); } } done_testing; public-inbox-1.9.0/t/rename_noreplace.t000066400000000000000000000016321430031475700200510ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use_ok 'PublicInbox::Syscall', 'rename_noreplace'; my ($tmpdir, $for_destroy) = tmpdir; open my $fh, '>', "$tmpdir/a" or xbail $!; my @sa = stat($fh); is(rename_noreplace("$tmpdir/a", "$tmpdir/b"), 1, 'rename_noreplace'); my @sb = stat("$tmpdir/b"); ok(scalar(@sb), 'new file exists'); ok(!-e "$tmpdir/a", 'original gone'); is("@sa[0,1]", "@sb[0,1]", 'same st_dev + st_ino'); is(rename_noreplace("$tmpdir/a", "$tmpdir/c"), undef, 'undef on ENOENT'); ok($!{ENOENT}, 'ENOENT set when missing'); open $fh, '>', "$tmpdir/a" or xbail $!; is(rename_noreplace("$tmpdir/a", "$tmpdir/b"), undef, 'undef on EEXIST'); ok($!{EEXIST}, 'EEXIST set when missing'); is_deeply([stat("$tmpdir/b")], \@sb, 'target unchanged on EEXIST'); done_testing; public-inbox-1.9.0/t/replace.t000066400000000000000000000140771430031475700161740ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::InboxWritable; use PublicInbox::TestCommon; use Cwd qw(abs_path); require_git(2.6); # replace is v2 only, for now... require_mods(qw(DBD::SQLite)); local $ENV{HOME} = abs_path('t'); sub test_replace ($$$) { my ($v, $level, $opt) = @_; diag "v$v $level replace"; my $this = "pi-$v-$level-replace"; my ($tmpdir, $for_destroy) = tmpdir($this); my $ibx = PublicInbox::Inbox->new({ inboxdir => "$tmpdir/testbox", name => $this, version => $v, -no_fsync => 1, -primary_address => 'test@example.com', indexlevel => $level, }); my $orig = PublicInbox::Eml->new(<<'EOF'); From: Barbra Streisand To: test@example.com Subject: confidential Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 Top secret info about my house in Malibu... EOF my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer(0); # fake a bunch of epochs $im->{rotate_bytes} = $opt->{rotate_bytes} if $opt->{rotate_bytes}; if ($opt->{pre}) { $opt->{pre}->($im, 1, 2); $orig->header_set('References', '<1@example.com>'); } ok($im->add($orig), 'add message to be replaced'); if ($opt->{post}) { $opt->{post}->($im, 3, { 4 => 'replace@example.com' }); } $im->done; my $thread_a = $ibx->over->get_thread('replace@example.com'); my %before = map {; delete($_->{blob}) => $_ } @{$ibx->recent}; my $reject = PublicInbox::Eml->new($orig->as_string); foreach my $mid (['', ''], [], ['']) { $reject->header_set('Message-ID', @$mid); my $ok = eval { $im->replace($orig, $reject) }; like($@, qr/Message-ID.*may not be changed/, '->replace died on Message-ID change'); ok(!$ok, 'no replacement happened'); } # prepare the replacement my $expect = "Move along, nothing to see here\n"; my $repl = PublicInbox::Eml->new($orig->as_string); $repl->header_set('From', ''); $repl->header_set('Subject', 'redacted'); $repl->header_set('Date', 'Sat, 02 Oct 2010 00:00:00 +0000'); $repl->body_str_set($expect); my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; ok(my $cmts = $im->replace($orig, $repl), 'replaced message'); my $changed_epochs = 0; for my $tip (@$cmts) { next if !defined $tip; $changed_epochs++; like($tip, qr/\A[a-f0-9]{40,}\z/, 'replace returned current commit'); } is($changed_epochs, 1, 'only one epoch changed'); $im->done; my $m = PublicInbox::Eml->new($ibx->msg_by_mid('replace@example.com')); is($m->body, $expect, 'replaced message'); is_deeply(\@warn, [], 'no warnings on noop'); my @cat = qw(cat-file --buffer --batch --batch-all-objects); my $git = $ibx->git; my @all = $git->qx(@cat); is_deeply([grep(/confidential/, @all)], [], 'nothing confidential'); is_deeply([grep(/Streisand/, @all)], [], 'Streisand who?'); is_deeply([grep(/\bOct 1993\b/, @all)], [], 'nothing from Oct 1993'); my $t19931002 = qr/ 749520000 /; is_deeply([grep(/$t19931002/, @all)], [], "nothing matches $t19931002"); for my $dir (glob("$ibx->{inboxdir}/git/*.git")) { my ($bn) = ($dir =~ m!([^/]+)\z!); is(xsys(qw(git --git-dir), $dir, qw(fsck --strict --no-progress)), 0, "git fsck is clean in epoch $bn"); } my $thread_b = $ibx->over->get_thread('replace@example.com'); is_deeply([sort map { $_->{mid} } @$thread_b], [sort map { $_->{mid} } @$thread_a], 'threading preserved'); if (my $srch = $ibx->search) { for my $q ('f:streisand', 's:confidential', 'malibu') { my $mset = $srch->mset($q); is($mset->size, 0, "no match for $q"); } my @ok = ('f:redactor', 's:redacted', 'nothing to see'); if ($opt->{pre}) { push @ok, 'm:1@example.com', 'm:2@example.com', 's:message2', 's:message1'; } if ($opt->{post}) { push @ok, 'm:3@example.com', 'm:4@example.com', 's:message3', 's:message4'; } for my $q (@ok) { my $mset = $srch->mset($q); ok($mset->size, "got match for $q"); } } # check overview matches: my %after = map {; delete($_->{blob}) => $_ } @{$ibx->recent}; my @before_blobs = keys %before; foreach my $blob (@before_blobs) { delete $before{$blob} if delete $after{$blob}; } is(scalar keys %before, 1, 'one unique blob from before left'); is(scalar keys %after, 1, 'one unique blob from after left'); foreach my $blob (keys %before) { is($git->check($blob), undef, 'old blob not found'); my $smsg = $before{$blob}; is($smsg->{subject}, 'confidential', 'before subject'); is($smsg->{mid}, 'replace@example.com', 'before MID'); } foreach my $blob (keys %after) { ok($git->check($blob), 'new blob found'); my $smsg = $after{$blob}; is($smsg->{subject}, 'redacted', 'after subject'); is($smsg->{mid}, 'replace@example.com', 'before MID'); } # $git->cleanup; # needed if $im->{parallel}; @warn = (); is($im->replace($orig, $repl), undef, 'no-op replace returns undef'); is($im->purge($orig), undef, 'no-op purge returns undef'); is_deeply(\@warn, [], 'no warnings on noop'); # $im->done; # needed if $im->{parallel} } sub pad_msgs { my ($im, @range) = @_; for my $i (@range) { my $irt; if (ref($i) eq 'HASH') { ($i, $irt) = each %$i; } my $sec = sprintf('%0d', $i); my $mime = PublicInbox::Eml->new(< Date: Fri, 02, Jan 1970 00:00:$sec +0000 Subject: message$i message number$i EOF if (defined($irt)) { $mime->header_set('References', "<$irt>"); } $im->add($mime); } } my $opt = { pre => \&pad_msgs }; test_replace(2, 'basic', {}); test_replace(2, 'basic', $opt); test_replace(2, 'basic', $opt = { %$opt, post => \&pad_msgs }); test_replace(2, 'basic', $opt = { %$opt, rotate_bytes => 1 }); SKIP: { require_mods(qw(Search::Xapian), 8); for my $l (qw(medium)) { test_replace(2, $l, {}); $opt = { pre => \&pad_msgs }; test_replace(2, $l, $opt); test_replace(2, $l, $opt = { %$opt, post => \&pad_msgs }); test_replace(2, $l, $opt = { %$opt, rotate_bytes => 1 }); } }; done_testing(); public-inbox-1.9.0/t/reply.t000066400000000000000000000045741430031475700157150ustar00rootroot00000000000000#!perl -w # Copyright (C) 2017-2021 all contributors # License: AGPL-3+ use strict; use Test::More; use PublicInbox::Config; use PublicInbox::Eml; use_ok 'PublicInbox::Reply'; my @q = ( 'foo@bar', 'foo@bar', 'a b', "'a b'", "a'b", "'a'\\''b'", ); while (@q) { my $input = shift @q; my $expect = shift @q; my $res = PublicInbox::Config::squote_maybe($input); is($res, $expect, "quote $input => $res"); } my $mime = PublicInbox::Eml->new(<<'EOF'); From: from To: to Cc: cc@example.com Message-Id: Subject: hihi EOF my $hdr = $mime->header_obj; my $ibx = { -primary_address => 'primary@example.com' }; my ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); my $exp = [ '--in-reply-to=blah@example.com', '--to=from@example.com', '--cc=cc@example.com', '--cc=to@example.com', ]; is_deeply($arg, $exp, 'default reply is to :all'); $ibx->{replyto} = ':all'; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); is_deeply($arg, $exp, '":all" also works'); $exp = [ '--in-reply-to=blah@example.com', '--to=primary@example.com' ]; $ibx->{replyto} = ':list'; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); is_deeply($arg, $exp, '":list" works for centralized lists'); $exp = [ '--in-reply-to=blah@example.com', '--to=primary@example.com', '--cc=cc@example.com', '--cc=to@example.com', ]; $ibx->{replyto} = ':list,Cc,To'; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); is_deeply($arg, $exp, '":list,Cc,To" works for kinda centralized lists'); $ibx->{replyto} = 'new@example.com'; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); $exp = [ '--in-reply-to=blah@example.com', '--to=new@example.com' ]; is_deeply($arg, $exp, 'explicit address works, too'); $ibx->{replyto} = ':all'; $ibx->{obfuscate} = 1; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); $exp = [ '--in-reply-to=blah@example.com', '--to=from@example$(echo .)com', '--cc=cc@example$(echo .)com', '--cc=to@example$(echo .)com', ]; is_deeply($arg, $exp, 'address obfuscation works'); is($link, '', 'no mailto: link given'); $ibx->{replyto} = ':none=dead list'; $ibx->{obfuscate} = 1; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); is($$arg, 'dead list', ':none= works'); done_testing(); public-inbox-1.9.0/t/run.perl000077500000000000000000000175541430031475700160720ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # # Parallel test runner which preloads code and reuses worker processes # to give a nice speedup over prove(1). It also generates per-test # .log files (similar to automake tests). # # *.t files run by this should not rely on global state. # # Usage: $PERL -I lib -w t/run.perl -j4 # Or via prove(1): prove -lvw t/run.perl :: -j4 use strict; use v5.10.1; use IO::Handle; # ->autoflush use PublicInbox::TestCommon; use PublicInbox::Spawn; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use Errno qw(EINTR); use Fcntl qw(:seek); use POSIX qw(WNOHANG); use File::Temp (); my $jobs = 1; my $repeat = 1; $| = 1; our $log_suffix = '.log'; my ($shuffle, %pids, @err); GetOptions('j|jobs=i' => \$jobs, 'repeat=i' => \$repeat, 'log=s' => \$log_suffix, 's|shuffle' => \$shuffle, ) or die "Usage: $0 [-j JOBS] [--log=SUFFIX] [--repeat RUNS]"; if (($ENV{TEST_RUN_MODE} // 2) == 0) { die "$0 is not compatible with TEST_RUN_MODE=0\n"; } my @tests = scalar(@ARGV) ? @ARGV : glob('t/*.t'); open my $cwd_fh, '<', '.' or die "open .: $!"; open my $OLDOUT, '>&STDOUT' or die "dup STDOUT: $!"; open my $OLDERR, '>&STDERR' or die "dup STDERR: $!"; $OLDOUT->autoflush(1); $OLDERR->autoflush(1); my ($run_log, $tmp_rl); my $rl = $ENV{TEST_RUN_LOG}; unless ($rl) { $tmp_rl = File::Temp->new(CLEANUP => 1); $rl = $tmp_rl->filename; } open $run_log, '+>>', $rl or die "open $rl: $!"; $run_log->autoflush(1); # one reader, many writers key2sub($_) for @tests; # precache my ($for_destroy, $lei_env, $lei_daemon_pid, $owner_pid); # TEST_LEI_DAEMON_PERSIST is currently broken. I get ECONNRESET from # lei even with high kern.ipc.soacceptqueue=1073741823 or SOMAXCONN, not # sure why. Also, testing our internal inotify usage is unreliable # because lei-daemon uses a single inotify FD for all clients. if ($ENV{TEST_LEI_DAEMON_PERSIST} && !$ENV{TEST_LEI_DAEMON_PERSIST_DIR} && (PublicInbox::Spawn->can('recv_cmd4') || eval { require Socket::MsgHdr })) { $lei_env = {}; ($lei_env->{XDG_RUNTIME_DIR}, $for_destroy) = tmpdir; $ENV{TEST_LEI_DAEMON_PERSIST_DIR} = $lei_env->{XDG_RUNTIME_DIR}; run_script([qw(lei daemon-pid)], $lei_env, { 1 => \$lei_daemon_pid }); chomp $lei_daemon_pid; $lei_daemon_pid =~ /\A[0-9]+\z/ or die "no daemon pid: $lei_daemon_pid"; kill(0, $lei_daemon_pid) or die "kill $lei_daemon_pid: $!"; if (my $t = $ENV{GNU_TAIL}) { system("$t --pid=$lei_daemon_pid -F " . "$lei_env->{XDG_RUNTIME_DIR}/lei/errors.log >&2 &"); } if (my $strace_cmd = $ENV{STRACE_CMD}) { system("$strace_cmd -p $lei_daemon_pid &"); } $owner_pid = $$; } if ($shuffle) { require List::Util; } elsif (open(my $prove_state, '<', '.prove') && eval { require YAML::XS }) { # reuse "prove --state=save" data to start slowest tests, first my $state = YAML::XS::Load(do { local $/; <$prove_state> }); my $t = $state->{tests}; @tests = sort { ($t->{$b}->{elapsed} // 0) <=> ($t->{$a}->{elapsed} // 0) } @tests; } our $tb = Test::More->builder; sub DIE (;$) { print $OLDERR @_; exit(1); } our ($worker, $worker_test); sub test_status () { $? = 255 if $? == 0 && !$tb->is_passing; my $status = $? ? 'not ok' : 'ok'; chdir($cwd_fh) or DIE "fchdir: $!"; if ($log_suffix ne '') { my $log = $worker_test; $log =~ s/\.t\z/$log_suffix/; my $skip = ''; if (open my $fh, '<', $log) { my @not_ok = grep(!/^(?:ok |[ \t]*#)/ms, <$fh>); my $last = $not_ok[-1] // ''; pop @not_ok if $last =~ /^[0-9]+\.\.[0-9]+$/; my $pfx = "# $log: "; print $OLDERR map { $pfx.$_ } @not_ok; seek($fh, 0, SEEK_SET) or die "seek: $!"; # show unique skip texts and the number of times # each text was skipped local $/; my @sk = (<$fh> =~ m/^ok [0-9]+ (# skip [^\n]+)/mgs); if (@sk) { my %nr; my @err = grep { !$nr{$_}++ } @sk; print $OLDERR "$pfx$_ ($nr{$_})\n" for @err; $skip = ' # total skipped: '.scalar(@sk); } } else { print $OLDERR "could not open: $log: $!\n"; } print $OLDOUT "$status $worker_test$skip\n"; } } # Test::Builder or Test2::Hub may call exit() from plan(skip_all => ...) END { test_status() if (defined($worker_test) && $worker == $$) } sub run_test ($) { my ($test) = @_; syswrite($run_log, "$$ $test\n"); my $log_fh; if ($log_suffix ne '') { my $log = $test; $log =~ s/\.[^\.]+\z/$log_suffix/ or DIE "can't log for $test"; open $log_fh, '>', $log or DIE "open $log: $!"; $log_fh->autoflush(1); $tb->output($log_fh); $tb->failure_output($log_fh); $tb->todo_output($log_fh); open STDOUT, '>&', $log_fh or DIE "1>$log: $!"; open STDERR, '>&', $log_fh or DIE "2>$log: $!"; } $worker_test = $test; run_script([$test]); test_status(); $worker_test = undef; push @err, "$test ($?)" if $?; } sub UINT_SIZE () { 4 } # worker processes will SIGUSR1 the producer process when it # sees EOF on the pipe. On FreeBSD 11.2 and Perl 5.30.0, # sys/ioctl.ph gives the wrong value for FIONREAD(). my $producer = $$; my $eof; # we stop respawning if true my $start_worker = sub { my ($j, $rd, $wr, $todo) = @_; my $pid = fork // DIE "fork: $!"; if ($pid == 0) { close $wr if $wr; $SIG{USR1} = undef; # undo parent $SIG{USR1} $worker = $$; while (1) { my $r = sysread($rd, my $buf, UINT_SIZE); if (!defined($r)) { next if $! == EINTR; DIE "sysread: $!"; } last if $r == 0; DIE "short read $r" if $r != UINT_SIZE; my $t = unpack('I', $buf); run_test($todo->[$t]); $tb->reset; } kill 'USR1', $producer if !$eof; # sets $eof in $producer if (@err) { # write to run_log for $sigchld handler syswrite($run_log, "$$ @err\n"); DIE join('', map { "E: $_\n" } @err); } exit(0); } else { $pids{$pid} = $j; } }; # negative $repeat means loop forever: for (my $i = $repeat; $i != 0; $i--) { my @todo = $shuffle ? List::Util::shuffle(@tests) : @tests; # single-producer, multi-consumer queue relying on POSIX pipe semantics # POSIX.1-2008 stipulates a regular file should work, but Linux <3.14 # had broken read(2) semantics according to the read(2) manpage pipe(my ($rd, $wr)) or DIE "pipe: $!"; # fill the queue before forking so children can start earlier my $n = (POSIX::PIPE_BUF / UINT_SIZE); if ($n >= $#todo) { print $wr join('', map { pack('I', $_) } (0..$#todo)) or DIE; undef $wr; } else { # write what we can... $wr->autoflush(1); print $wr join('', map { pack('I', $_) } (0..$n)) or DIE; $n += 1; # and send more ($n..$#todo), later } $eof = undef; local $SIG{USR1} = sub { $eof = 1 }; my $sigchld = sub { my ($sig) = @_; my $flags = $sig ? WNOHANG : 0; while (1) { my $pid = waitpid(-1, $flags) or return; return if $pid < 0; my $j = delete $pids{$pid}; if (!defined($j)) { push @err, "reaped unknown $pid ($?)"; next; } if ($?) { seek($run_log, 0, SEEK_SET); chomp(my @t = grep(/^$pid /, <$run_log>)); $t[0] //= "$pid unknown"; push @err, "job[$j] ($?) PID=$t[-1]"; } # skip_all can exit(0), respawn if needed: if (!$eof) { print $OLDERR "# respawning job[$j]\n"; $start_worker->($j, $rd, $wr, \@todo); } } }; # start the workers to consume the queue for (my $j = 0; $j < $jobs; $j++) { $start_worker->($j, $rd, $wr, \@todo); } if ($wr) { local $SIG{CHLD} = $sigchld; # too many tests to fit in the pipe before starting workers, # send the rest now the workers are running print $wr join('', map { pack('I', $_) } ($n..$#todo)) or DIE; undef $wr; } $sigchld->(0) while scalar(keys(%pids)); DIE join('', map { "E: $_\n" } @err) if @err; } print $OLDOUT "1..".($repeat * scalar(@tests))."\n" if $repeat >= 0; if ($lei_env && $$ == $owner_pid) { my $opt = { 1 => $OLDOUT, 2 => $OLDERR }; my $cur_daemon_pid; run_script([qw(lei daemon-pid)], $lei_env, { 1 => \$cur_daemon_pid }); run_script([qw(lei daemon-kill)], $lei_env, $opt); DIE "lei daemon restarted\n" if $cur_daemon_pid != $lei_daemon_pid; } public-inbox-1.9.0/t/search-amsg.eml000066400000000000000000000010571430031475700172570ustar00rootroot00000000000000Subject: see attachment Message-ID: From: "John Smith" To: list@example.com MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="attached_fart.txt" inside the attachment= --b Content-Type: text/plain; charset="US-ASCII" Content-Disposition: attachment; filename="part_deux.txt" Content-Transfer-Encoding: quoted-printable inside another= --b-- public-inbox-1.9.0/t/search-thr-index.t000066400000000000000000000054041430031475700177200ustar00rootroot00000000000000#!perl -w # Copyright (C) 2017-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use PublicInbox::MID qw(mids); use PublicInbox::Eml; require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::SearchIdx; require PublicInbox::Smsg; require PublicInbox::Inbox; use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; PublicInbox::Import::init_bare($git_dir); my $ibx = PublicInbox::Inbox->new({inboxdir => $git_dir}); my $rw = PublicInbox::SearchIdx->new($ibx, 1); ok($rw, "search indexer created"); my $data = <<'EOF'; Subject: [RFC 00/14] Message-Id: <1-bw@g> Subject: [RFC 09/14] Message-Id: <10-bw@g> In-Reply-To: <1-bw@g> References: <1-bw@g> Subject: [RFC 03/14] Message-Id: <4-bw@g> In-Reply-To: <1-bw@g> References: <1-bw@g> EOF my $num = 0; # nb. using internal API, fragile! my $xdb = $rw->begin_txn_lazy; my @mids; foreach (reverse split(/\n\n/, $data)) { $_ .= "\n"; my $mime = PublicInbox::Eml->new(\$_); $mime->header_set('From' => 'bw@g'); $mime->header_set('To' => 'git@vger.kernel.org'); my $bytes = length($mime->as_string); my $mid = mids($mime->header_obj)->[0]; my $smsg = bless { bytes => $bytes, num => ++$num, mid => $mid, blob => '', }, 'PublicInbox::Smsg'; my $doc_id = $rw->add_message($mime, $smsg); push @mids, $mid; ok($doc_id, 'message added: '. $mid); } my $prev; my %tids; my $dbh = $rw->{oidx}->dbh; foreach my $mid (@mids) { my $msgs = $rw->{oidx}->get_thread($mid); is(3, scalar(@$msgs), "got all messages from $mid"); foreach my $m (@$msgs) { my $tid = $dbh->selectrow_array(<<'', undef, $m->{num}); SELECT tid FROM over WHERE num = ? LIMIT 1 $tids{$tid}++; } } is(scalar keys %tids, 1, 'all messages have the same tid'); $rw->commit_txn_lazy; $xdb = $rw->begin_txn_lazy; { my $mime = PublicInbox::Eml->new(<<''); Subject: [RFC 00/14] Message-Id: <1-bw@g> From: bw@g To: git@vger.kernel.org my $dbh = $rw->{oidx}->dbh; my ($id, $prev); my $reidx = $rw->{oidx}->next_by_mid('1-bw@g', \$id, \$prev); ok(defined $reidx); my $num = $reidx->{num}; my $tid0 = $dbh->selectrow_array(<<'', undef, $num); SELECT tid FROM over WHERE num = ? LIMIT 1 my $bytes = length($mime->as_string); my $mid = mids($mime->header_obj)->[0]; my $smsg = bless { bytes => $bytes, num => $num, mid => $mid, blob => '', }, 'PublicInbox::Smsg'; my $doc_id = $rw->add_message($mime, $smsg); ok($doc_id, 'message reindexed'. $mid); is($doc_id, $num, "article number unchanged: $num"); my $tid1 = $dbh->selectrow_array(<<'', undef, $num); SELECT tid FROM over WHERE num = ? LIMIT 1 is($tid1, $tid0, 'tid unchanged on reindex'); } $rw->commit_txn_lazy; done_testing(); 1; public-inbox-1.9.0/t/search.t000066400000000000000000000512111430031475700160150ustar00rootroot00000000000000# Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::SearchIdx; require PublicInbox::Inbox; require PublicInbox::InboxWritable; use PublicInbox::Eml; use POSIX qw(strftime); my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; my $ibx = PublicInbox::Inbox->new({ inboxdir => $git_dir }); my ($root_id, $last_id); is(0, xsys(qw(git init --shared -q --bare), $git_dir), "git init (main)") or BAIL_OUT("`git init --shared' failed, weird FS or seccomp?"); eval { PublicInbox::Search->new($ibx)->xdb }; ok($@, "exception raised on non-existent DB"); my $rw = PublicInbox::SearchIdx->new($ibx, 1); $ibx->with_umask(sub { $rw->idx_acquire; $rw->idx_release; }); $rw = undef; my $rw_commit = sub { $rw->commit_txn_lazy if $rw; $rw = PublicInbox::SearchIdx->new($ibx, 1); $rw->{qp_flags} = 0; # quiet a warning $rw->begin_txn_lazy; $ibx->search->reopen; }; sub oct_is ($$$) { my ($got, $exp, $msg) = @_; is(sprintf('0%03o', $got), sprintf('0%03o', $exp), $msg); } { # git repository perms oct_is($ibx->_git_config_perm(), &PublicInbox::InboxWritable::PERM_GROUP, 'undefined permission is group'); my @t = ( [ '0644', 0022, '644 => umask(0022)' ], [ '0600', 0077, '600 => umask(0077)' ], [ '0640', 0027, '640 => umask(0027)' ], [ 'group', 0007, 'group => umask(0007)' ], [ 'everybody', 0002, 'everybody => umask(0002)' ], [ 'umask', umask, 'umask => existing umask' ], ); for (@t) { my ($perm, $exp, $msg) = @$_; my $got = PublicInbox::InboxWritable::_umask_for( PublicInbox::InboxWritable->_git_config_perm($perm)); oct_is($got, $exp, $msg); } } { my $crlf_adjust = \&PublicInbox::Smsg::crlf_adjust; is($crlf_adjust->("hi\r\nworld\r\n"), 0, 'no adjustment needed'); is($crlf_adjust->("hi\nworld\n"), 2, 'LF-only counts two CR'); is($crlf_adjust->("hi\r\nworld\n"), 1, 'CRLF/LF-mix 1 counts 1 CR'); is($crlf_adjust->("hi\nworld\r\n"), 1, 'CRLF/LF-mix 2 counts 1 CR'); } $ibx->with_umask(sub { my $root = PublicInbox::Eml->new(<<'EOF'); Date: Fri, 02 Oct 1993 00:00:00 +0000 Subject: Hello world Message-ID: From: John Smith To: list@example.com List-Id: I'm not mad \m/ EOF my $last = PublicInbox::Eml->new(<<'EOF'); Date: Sat, 02 Oct 2010 00:00:00 +0000 Subject: Re: Hello world In-Reply-To: Message-ID: From: John Smith To: list@example.com Cc: foo@example.com List-Id: there's nothing goodbye forever :< EOF my $rv; $rw_commit->(); $root_id = $rw->add_message($root); is($root_id, int($root_id), "root_id is an integer: $root_id"); $last_id = $rw->add_message($last); is($last_id, int($last_id), "last_id is an integer: $last_id"); }); sub filter_mids { my ($msgs) = @_; sort(map { $_->{mid} } @$msgs); } my $query = sub { my ($query_string, $opt) = @_; my $mset = $ibx->search->mset($query_string, $opt); $ibx->search->mset_to_smsg($ibx, $mset); }; { $rw_commit->(); my $found = $query->('m:root@s'); is(scalar(@$found), 1, "message found"); is($found->[0]->{mid}, 'root@s', 'mid set correctly') if @$found; my ($res, @res); my @exp = sort qw(root@s last@s); $res = $query->('s:(Hello world)'); @res = filter_mids($res); is_deeply(\@res, \@exp, 'got expected results for s:() match'); $res = $query->('s:"Hello world"'); @res = filter_mids($res); is_deeply(\@res, \@exp, 'got expected results for s:"" match'); $res = $query->('s:"Hello world"', {limit => 1}); is(scalar @$res, 1, "limit works"); my $first = $res->[0]; $res = $query->('s:"Hello world"', {offset => 1}); is(scalar @$res, 1, "offset works"); my $second = $res->[0]; isnt($first, $second, "offset returned different result from limit"); } # ghost vivication $ibx->with_umask(sub { $rw_commit->(); my $rmid = ''; my $reply_to_ghost = PublicInbox::Eml->new(<<"EOF"); Date: Sat, 02 Oct 2010 00:00:00 +0000 Subject: Re: ghosts Message-ID: In-Reply-To: $rmid From: Time Traveler To: list\@example.com -_- EOF my $rv; my $reply_id = $rw->add_message($reply_to_ghost); is($reply_id, int($reply_id), "reply_id is an integer: $reply_id"); my $was_ghost = PublicInbox::Eml->new(<<"EOF"); Date: Sat, 02 Oct 2010 00:00:01 +0000 Subject: ghosts Message-ID: $rmid From: Laggy Sender To: list\@example.com are real EOF my $ghost_id = $rw->add_message($was_ghost); is($ghost_id, int($ghost_id), "ghost_id is an integer: $ghost_id"); my $msgs = $rw->{oidx}->get_thread('ghost-message@s'); is(scalar(@$msgs), 2, 'got both messages in ghost thread'); foreach (qw(sid tid)) { is($msgs->[0]->{$_}, $msgs->[1]->{$_}, "{$_} match"); } isnt($msgs->[0]->{num}, $msgs->[1]->{num}, "num do not match"); ok($_->{num} > 0, 'positive art num') foreach @$msgs }); # search thread on ghost { $rw_commit->(); # subject my $res = $query->('ghost'); my @exp = sort qw(ghost-message@s ghost-reply@s); my @res = filter_mids($res); is_deeply(\@res, \@exp, 'got expected results for Subject match'); # body $res = $query->('goodbye'); is(scalar(@$res), 1, "goodbye message found"); is($res->[0]->{mid}, 'last@s', 'got goodbye message body') if @$res; # datestamp $res = $query->('dt:20101002000001..20101002000001'); @res = filter_mids($res); is_deeply(\@res, ['ghost-message@s'], 'exact Date: match works'); $res = $query->('dt:20101002000002..20101002000002'); is_deeply($res, [], 'exact Date: match down to the second'); } # long message-id $ibx->with_umask(sub { $rw_commit->(); my $long_mid = 'last' . ('x' x 60). '@s'; my $long = PublicInbox::Eml->new(< In-Reply-To: Message-ID: <$long_mid>, From: "Long I.D." To: list\@example.com wut EOF my $long_id = $rw->add_message($long); is($long_id, int($long_id), "long_id is an integer: $long_id"); $rw_commit->(); my $res; my @res; my $long_reply_mid = 'reply-to-long@1'; my $long_reply = PublicInbox::Eml->new(< In-Reply-To: <$long_mid> From: no1 To: list\@example.com no References EOF ok($rw->add_message($long_reply) > $long_id, "inserted long reply"); $rw_commit->(); my $t = $ibx->over->get_thread('root@s'); is(scalar(@$t), 4, "got all 4 messages in thread"); my @exp = sort($long_reply_mid, 'root@s', 'last@s', $long_mid); @res = filter_mids($t); is_deeply(\@res, \@exp, "get_thread works"); }); # quote prioritization $ibx->with_umask(sub { $rw_commit->(); $rw->add_message(PublicInbox::Eml->new(<<'EOF')); Date: Sat, 02 Oct 2010 00:00:01 +0000 Subject: Hello Message-ID: From: Quoter To: list@example.com > theatre illusions fade EOF $rw->add_message(PublicInbox::Eml->new(<<'EOF')); Date: Sat, 02 Oct 2010 00:00:02 +0000 Subject: Hello Message-ID: From: Non-Quoter To: list@example.com theatre fade EOF $rw_commit->(); my $res = $query->("theatre"); is(scalar(@$res), 2, "got both matches"); if (@$res == 2) { is($res->[0]->{mid}, 'nquote@a', 'non-quoted scores higher'); is($res->[1]->{mid}, 'quote@a', 'quoted result still returned'); } $res = $query->("illusions"); is(scalar(@$res), 1, "got a match for quoted text"); is($res->[0]->{mid}, 'quote@a', "quoted result returned if nothing else") if scalar(@$res); }); # circular references $ibx->with_umask(sub { my $s = 'foo://'. ('Circle' x 15).'/foo'; my $doc_id = $rw->add_message(PublicInbox::Eml->new(< References: In-Reply-To: From: Circle To: list\@example.com LOOP! EOF ok($doc_id > 0, "doc_id defined with circular reference"); $rw_commit->(); my $smsg = $query->('m:circle@a', {limit=>1})->[0]; is(defined($smsg), 1, 'found m:circl@a'); if (defined $smsg) { is($smsg->{references}, '', "no references created"); is($smsg->{subject}, $s, 'long subject not rewritten'); } }); { my $msgs = $query->('d:19931002..20101002'); ok(scalar(@$msgs) > 0, 'got results within range'); $msgs = $query->('d:20101003..'); is(scalar(@$msgs), 0, 'nothing after 20101003'); $msgs = $query->('d:..19931001'); is(scalar(@$msgs), 0, 'nothing before 19931001'); } $ibx->with_umask(sub { my $mime = eml_load 't/utf8.eml'; my $doc_id = $rw->add_message($mime); ok($doc_id > 0, 'message indexed doc_id with UTF-8'); $rw_commit->(); my $msg = $query->('m:testmessage@example.com', {limit => 1})->[0]; is(defined($msg), 1, 'found testmessage@example.com'); if (defined $msg) { is($mime->header('Subject'), $msg->{subject}, 'UTF-8 subject preserved'); } }); # names and addresses { my $mset = $ibx->search->mset('t:list@example.com'); is($mset->size, 9, 'searched To: successfully'); foreach my $m ($mset->items) { my $smsg = $ibx->over->get_art($m->get_docid); like($smsg->{to}, qr/\blist\@example\.com\b/, 'to appears'); my $doc = $m->get_document; my $col = PublicInbox::Search::BYTES(); my $bytes = PublicInbox::Search::int_val($doc, $col); like($bytes, qr/\A[0-9]+\z/, '$bytes stored as digit'); ok($bytes > 0, '$bytes is > 0'); is($bytes, $smsg->{bytes}, 'bytes Xapian value matches Over'); $col = PublicInbox::Search::UID(); my $uid = PublicInbox::Search::int_val($doc, $col); is($uid, $smsg->{num}, 'UID column matches {num}'); is($uid, $m->get_docid, 'UID column matches docid'); } $mset = $ibx->search->mset('tc:list@example.com'); is($mset->size, 9, 'searched To+Cc: successfully'); foreach my $m ($mset->items) { my $smsg = $ibx->over->get_art($m->get_docid); my $tocc = join("\n", $smsg->{to}, $smsg->{cc}); like($tocc, qr/\blist\@example\.com\b/, 'tocc appears'); } foreach my $pfx ('tcf:', 'c:') { my $mset = $ibx->search->mset($pfx . 'foo@example.com'); is($mset->items, 1, "searched $pfx successfully for Cc:"); foreach my $m ($mset->items) { my $smsg = $ibx->over->get_art($m->get_docid); like($smsg->{cc}, qr/\bfoo\@example\.com\b/, 'cc appears'); } } foreach my $pfx ('', 'tcf:', 'f:') { my $res = $query->($pfx . 'Laggy'); is(scalar(@$res), 1, "searched $pfx successfully for From:"); foreach my $smsg (@$res) { like($smsg->{from_name}, qr/Laggy Sender/, "From appears with $pfx"); } } } { $rw_commit->(); my $res = $query->('b:hello'); is(scalar(@$res), 0, 'no match on body search only'); $res = $query->('bs:smith'); is(scalar(@$res), 0, 'no match on body+subject search for From'); $res = $query->('q:theatre'); is(scalar(@$res), 1, 'only one quoted body'); like($res->[0]->{from_name}, qr/\AQuoter/, 'got quoted body') if (scalar(@$res)); $res = $query->('nq:theatre'); is(scalar @$res, 1, 'only one non-quoted body'); like($res->[0]->{from_name}, qr/\ANon-Quoter/, 'got non-quoted body') if (scalar(@$res)); foreach my $pfx (qw(b: bs:)) { $res = $query->($pfx . 'theatre'); is(scalar @$res, 2, "searched both bodies for $pfx"); like($res->[0]->{from_name}, qr/\ANon-Quoter/, "non-quoter first for $pfx") if scalar(@$res); } } $ibx->with_umask(sub { my $amsg = eml_load 't/search-amsg.eml'; my $oid = ('0'x40); my $smsg = bless { blob => $oid }, 'PublicInbox::Smsg'; ok($rw->add_message($amsg, $smsg), 'added attachment'); $rw_commit->(); my $n = $query->('n:attached_fart.txt'); is(scalar @$n, 1, 'got result for n:'); my $res = $query->('part_deux.txt'); is(scalar @$res, 1, 'got result without n:'); is($n->[0]->{mid}, $res->[0]->{mid}, 'same result with and without') if scalar(@$res); my $txt = $query->('"inside another"'); is(scalar @$txt, 1, 'found inside another'); is($txt->[0]->{mid}, $res->[0]->{mid}, 'search inside text attachments works') if scalar(@$txt); my $art; if (scalar(@$n) >= 1) { my $mid = $n->[0]->{mid}; my ($id, $prev); $art = $ibx->over->next_by_mid($mid, \$id, \$prev); ok($art, 'article exists in OVER DB'); } $rw->_msgmap_init; $rw->unindex_eml($oid, $amsg); $rw->commit_txn_lazy; SKIP: { skip('$art not defined', 1) unless defined $art; is($ibx->over->get_art($art->{num}), undef, 'gone from OVER DB'); }; }); my $all_mask = 07777; my $dir_mask = 02770; # FreeBSD and apparently OpenBSD does not allow non-root users to set S_ISGID, # so git doesn't set it, either (see DIR_HAS_BSD_GROUP_SEMANTICS in git.git) if ($^O =~ /(?:free|open)bsd/i) { $all_mask = 0777; $dir_mask = 0770; } foreach my $f ("$git_dir/public-inbox/msgmap.sqlite3", "$git_dir/public-inbox", glob("$git_dir/public-inbox/xapian*/"), glob("$git_dir/public-inbox/xapian*/*")) { my @st = stat($f); my ($bn) = (split(m!/!, $f))[-1]; oct_is($st[2] & $all_mask, -f _ ? 0660 : $dir_mask, "sharedRepository respected for $bn"); } $ibx->with_umask(sub { $rw_commit->(); my $digits = '10010260936330'; my $ua = 'Pine.LNX.4.10'; my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com"; is($ibx->search->mset("m:$digits")->size, 0, 'no results yet'); my $pine = PublicInbox::Eml->new(< From: torvalds\@transmeta To: list\@example.com EOF my $x = $rw->add_message($pine); $rw->commit_txn_lazy; $ibx->search->reopen; is($ibx->search->mset("m:$digits")->size, 1, 'searching only digit yielded result'); my $wild = $digits; for my $i (1..6) { chop($wild); is($ibx->search->mset("m:$wild*")->size, 1, "searching chopped($i) digit yielded result $wild "); } is($ibx->search->mset('m:Pine m:LNX m:10010260936330')->size, 1); }); { # List-Id searching my $found = $query->('lid:i.m.just.bored'); is_deeply([ filter_mids($found) ], [ 'root@s' ], 'got expected mid on exact lid: search'); $found = $query->('lid:just.bored'); is_deeply($found, [], 'got nothing on lid: search'); $found = $query->('lid:*.just.bored'); is_deeply($found, [], 'got nothing on lid: search'); $found = $query->('l:i.m.just.bored'); is_deeply([ filter_mids($found) ], [ 'root@s' ], 'probabilistic search works on full List-Id contents'); $found = $query->('l:just.bored'); is_deeply([ filter_mids($found) ], [ 'root@s' ], 'probabilistic search works on partial List-Id contents'); $found = $query->('lid:mad'); is_deeply($found, [], 'no match on phrase with lid:'); $found = $query->('lid:bored'); is_deeply($found, [], 'no match on partial List-Id with lid:'); $found = $query->('l:nothing'); is_deeply($found, [], 'matched on phrase with l:'); } $ibx->with_umask(sub { $rw_commit->(); my $doc_id = $rw->add_message(eml_load('t/data/message_embed.eml')); ok($doc_id > 0, 'messages within messages'); $rw->commit_txn_lazy; $ibx->search->reopen; my $n_test_eml = $query->('n:test.eml'); is(scalar(@$n_test_eml), 1, 'got a result'); my $n_embed2x_eml = $query->('n:embed2x.eml'); is_deeply($n_test_eml, $n_embed2x_eml, '.eml filenames searchable'); for my $m (qw(20200418222508.GA13918@dcvr 20200418222020.GA2745@dcvr 20200418214114.7575-1-e@yhbt.net)) { is($query->("m:$m")->[0]->{mid}, '20200418222508.GA13918@dcvr', 'probabilistic m:'.$m); is($query->("mid:$m")->[0]->{mid}, '20200418222508.GA13918@dcvr', 'boolean mid:'.$m); } is($query->('dfpost:4dc62c50')->[0]->{mid}, '20200418222508.GA13918@dcvr', 'diff search reaches inside message/rfc822'); is($query->('s:"mail header experiments"')->[0]->{mid}, '20200418222508.GA13918@dcvr', 'Subject search reaches inside message/rfc822'); $doc_id = $rw->add_message(eml_load('t/data/binary.patch')); $rw->commit_txn_lazy; $ibx->search->reopen; my $res = $query->('HcmV'); is_deeply($res, [], 'no results against trailer'); $res = $query->('IcmZPo000310RR91'); is_deeply($res, [], 'no results against 1-byte binary patch'); $res = $query->('"GIT binary patch"'); is(scalar(@$res), 1, 'got binary result from "GIT binary patch"'); is($res->[0]->{mid}, 'binary-patch-test@example', 'msgid for binary'); my $s = $query->('"literal 1"'); is_deeply($s, $res, 'got binary result from exact literal size'); $s = $query->('"literal 2"'); is_deeply($s, [], 'no results for wrong size'); }); SKIP: { my ($s, $g) = ($ibx->search, $ibx->git); my $q = $s->query_argv_to_string($g, ["quoted phrase"]); is($q, q["quoted phrase"], 'quoted phrase'); $q = $s->query_argv_to_string($g, ['s:pa ce']); is($q, q[s:"pa ce"], 'space with prefix'); $q = $s->query_argv_to_string($g, ["\(s:pa ce", "AND", "foo\)"]); is($q, q[(s:"pa ce" AND foo)], 'space AND foo'); local $ENV{TZ} = 'UTC'; my $now = strftime('%H:%M:%S', gmtime(time)); if ($now =~ /\A23:(?:59|60)/ || $now =~ /\A00:00:0[01]\z/) { skip 'too close to midnight, time is tricky', 6; } $q = $s->query_argv_to_string($g, [qw(d:20101002 blah)]); is($q, 'd:20101002..20101003 blah', 'YYYYMMDD expanded to range'); $q = $s->query_argv_to_string($g, [qw(d:2010-10-02)]); is($q, 'd:20101002..20101003', 'YYYY-MM-DD expanded to range'); $q = $s->query_argv_to_string($g, [qw(rt:2010-10-02.. yy)]); $q =~ /\Art:(\d+)\.\. yy/ or fail("rt: expansion failed: $q"); is(strftime('%Y-%m-%d', gmtime($1//0)), '2010-10-02', 'rt: beg expand'); $q = $s->query_argv_to_string($g, [qw(rt:..2010-10-02 zz)]); $q =~ /\Art:\.\.(\d+) zz/ or fail("rt: expansion failed: $q"); is(strftime('%Y-%m-%d', gmtime($1//0)), '2010-10-02', 'rt: end expand'); $q = $s->query_argv_to_string($g, [qw(something dt:2010-10-02..)]); like($q, qr/\Asomething dt:20101002\d{6}\.\./, 'dt: expansion'); $q = $s->query_argv_to_string($g, [qw(x dt:yesterday.. y)]); my $exp = strftime('%Y%m%d', gmtime(time - 86400)); like($q, qr/x dt:$exp[0-9]{6}\.\. y/, '"yesterday" handled'); $q = $s->query_argv_to_string($g, [qw(x dt:20101002054123)]); is($q, 'x dt:20101002054123..20101003054123', 'single dt: expanded'); $q = $s->query_argv_to_string($g, [qw(x dt:2010-10-02T05:41:23Z)]); is($q, 'x dt:20101002054123..20101003054123', 'ISO8601 dt: expanded'); $q = $s->query_argv_to_string($g, [qw(rt:1970..1971)]); $q =~ /\Art:(\d+)\.\.(\d+)\z/ or fail "YYYY rt: expansion: $q"; my ($beg, $end) = ($1, $2); is(strftime('%Y', gmtime($beg)), 1970, 'rt: starts at 1970'); is(strftime('%Y', gmtime($end)), 1971, 'rt: ends at 1971'); $q = $s->query_argv_to_string($g, [qw(rt:1970-01-01)]); $q =~ /\Art:(\d+)\.\.(\d+)\z/ or fail "YYYY-MM-DD rt: expansion: $q"; ($beg, $end) = ($1, $2); is(strftime('%Y-%m-%d', gmtime($beg)), '1970-01-01', 'rt: date-only w/o range'); is(strftime('%Y-%m-%d', gmtime($end)), '1970-01-02', 'rt: date-only auto-end'); $q = $s->query_argv_to_string($g, [qw{OR (rt:1993-10-02)}]); like($q, qr/\AOR \(rt:749\d{6}\.\.749\d{6}\)\z/, 'trailing parentheses preserved'); my $qs = qq[f:bob rt:1993-10-02..2010-10-02]; $s->query_approxidate($g, $qs); like($qs, qr/\Af:bob rt:749\d{6}\.\.128\d{7}\z/, 'no phrases, no problem'); my $orig = $qs = qq[f:bob "d:1993-10-02..2010-10-02"]; $s->query_approxidate($g, $qs); is($qs, $orig, 'phrase preserved'); $orig = $qs = qq[f:bob "d:1993-10-02..2010-10-02 "] . qq["dt:1993-10-02..2010-10-02 " \x{201c}]; $s->query_approxidate($g, $qs); is($qs, $orig, 'phrase preserved even with escaped ""'); $orig = $qs = qq[f:bob "hello world" d:1993-10-02..2010-10-02]; $s->query_approxidate($g, $qs); is($qs, qq[f:bob "hello world" d:19931002..20101002], 'post-phrase date corrected'); # Xapian uses "" to escape " inside phrases, we don't explictly # handle that, but are able to pass the result through unchanged for my $pair (["\x{201c}", "\x{201d}"], ['"', '"']) { my ($x, $y) = @$pair; $orig = $qs = qq[${x}hello d:1993-10-02.."" world$y]; $s->query_approxidate($g, $qs); is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y)); $s->query_approxidate($g, my $tmp = "$qs d:..2010-10-02"); is($tmp, "$orig d:..20101002", 'two phrases did not throw off date parsing'); $orig = $qs = qq[${x}hello d:1993-10-02..$y$x world$y]; $s->query_approxidate($g, $qs); is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y)); $s->query_approxidate($g, $tmp = "$qs d:..2010-10-02"); is($tmp, "$orig d:..20101002", 'two phrases did not throw off date parsing'); } my $x_days_ago = strftime('%Y%m%d', gmtime(time - (5 * 86400))); $orig = $qs = qq[broken d:5.days.ago..]; $s->query_approxidate($g, $qs); like($qs, qr/\Abroken dt:$x_days_ago[0-9]{6}\.\./, 'date.phrase.with.dots'); $orig = $qs = 'd:20101002..now'; $s->query_approxidate($g, $qs); like($qs, qr/\Adt:20101002000000\.\.[0-9]{14}\z/, 'approxidate on range-end only'); $ENV{TEST_EXPENSIVE} or skip 'TEST_EXPENSIVE not set for argv overflow check', 1; my @w; local $SIG{__WARN__} = sub { push @w, @_ }; # for pure Perl version my @fail = map { 'd:1993-10-02..2010-10-02' } (1..(4096 * 32)); eval { $s->query_argv_to_string($g, \@fail) }; ok($@, 'exception raised'); } done_testing(); public-inbox-1.9.0/t/shared_kv.t000066400000000000000000000032051430031475700165160ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite)); use_ok 'PublicInbox::SharedKV'; my ($tmpdir, $for_destroy) = tmpdir(); local $ENV{TMPDIR} = $tmpdir; my $skv = PublicInbox::SharedKV->new; my $skv_tmpdir = $skv->{"tmp$$.$skv"}; ok(-d $skv_tmpdir, 'created a temporary dir'); $skv->dbh; my $dead = "\xde\xad"; my $beef = "\xbe\xef"; my $cafe = "\xca\xfe"; ok($skv->set($dead, $beef), 'set'); is($skv->get($dead), $beef, 'get'); ok($skv->set($dead, $beef), 'set idempotent'); ok(!$skv->set_maybe($dead, $cafe), 'set_maybe ignores'); ok($skv->set_maybe($cafe, $dead), 'set_maybe sets'); is($skv->xchg($dead, $cafe), $beef, 'xchg'); is($skv->get($dead), $cafe, 'get after xchg'); is($skv->xchg($dead, undef), $cafe, 'xchg to undef'); is($skv->get($dead), undef, 'get after xchg to undef'); is($skv->get($cafe), $dead, 'get after set_maybe'); is($skv->xchg($dead, $cafe), undef, 'xchg from undef'); is($skv->count, 2, 'count works'); my %seen; my $sth = $skv->each_kv_iter; while (my ($k, $v) = $sth->fetchrow_array) { $seen{$k} = $v; } is($seen{$dead}, $cafe, '$dead has expected value'); is($seen{$cafe}, $dead, '$cafe has expected value'); is(scalar keys %seen, 2, 'iterated through all'); undef $skv; ok(!-d $skv_tmpdir, 'temporary dir gone'); $skv = PublicInbox::SharedKV->new("$tmpdir/dir", 'base'); ok(-e "$tmpdir/dir/base.sqlite3", 'file created'); $skv->dbh; ok($skv->set_maybe('02', '2'), "`02' set"); ok($skv->set_maybe('2', '2'), "`2' set (no match on `02')"); done_testing; public-inbox-1.9.0/t/sigfd.t000066400000000000000000000040231430031475700156430ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors use strict; use Test::More; use IO::Handle; use POSIX qw(:signal_h); use Errno qw(ENOSYS); require_ok 'PublicInbox::Sigfd'; use PublicInbox::DS; SKIP: { if ($^O ne 'linux' && !eval { require IO::KQueue }) { skip 'signalfd requires Linux or IO::KQueue to emulate', 10; } my $old = PublicInbox::DS::block_signals(); my $hit = {}; my $sig = {}; local $SIG{HUP} = sub { $hit->{HUP}->{normal}++ }; local $SIG{TERM} = sub { $hit->{TERM}->{normal}++ }; local $SIG{INT} = sub { $hit->{INT}->{normal}++ }; for my $s (qw(HUP TERM INT)) { $sig->{$s} = sub { $hit->{$s}->{sigfd}++ }; } my $sigfd = PublicInbox::Sigfd->new($sig, 0); if ($sigfd) { ok($sigfd, 'Sigfd->new works'); kill('HUP', $$) or die "kill $!"; kill('INT', $$) or die "kill $!"; my $fd = fileno($sigfd->{sock}); ok($fd >= 0, 'fileno(Sigfd->{sock}) works'); my $rvec = ''; vec($rvec, $fd, 1) = 1; is(select($rvec, undef, undef, undef), 1, 'select() works'); ok($sigfd->wait_once, 'wait_once reported success'); for my $s (qw(HUP INT)) { is($hit->{$s}->{sigfd}, 1, "sigfd fired $s"); is($hit->{$s}->{normal}, undef, 'normal $SIG{$s} not fired'); } $sigfd = undef; my $nbsig = PublicInbox::Sigfd->new($sig, 1); ok($nbsig, 'Sigfd->new SFD_NONBLOCK works'); is($nbsig->wait_once, undef, 'nonblocking ->wait_once'); ok($! == Errno::EAGAIN, 'got EAGAIN'); kill('HUP', $$) or die "kill $!"; PublicInbox::DS->SetPostLoopCallback(sub {}); # loop once PublicInbox::DS::event_loop(); is($hit->{HUP}->{sigfd}, 2, 'HUP sigfd fired in event loop') or diag explain($hit); # sometimes fails on FreeBSD 11.x kill('TERM', $$) or die "kill $!"; kill('HUP', $$) or die "kill $!"; PublicInbox::DS::event_loop(); PublicInbox::DS->Reset; is($hit->{TERM}->{sigfd}, 1, 'TERM sigfd fired in event loop'); is($hit->{HUP}->{sigfd}, 3, 'HUP sigfd fired in event loop'); } else { skip('signalfd disabled?', 10); } sigprocmask(SIG_SETMASK, $old) or die "sigprocmask $!"; } done_testing; public-inbox-1.9.0/t/solve/000077500000000000000000000000001430031475700155135ustar00rootroot00000000000000public-inbox-1.9.0/t/solve/0001-simple-mod.patch000066400000000000000000000011251430031475700211570ustar00rootroot00000000000000From: WEB DESIGN EXPERT To: meta@public-inbox.org Subject: [PATCH] TODO: take expert web design advice Date: Mon, 1 Apr 2019 08:15:20 +0000 Message-Id: <20190401081523.16213-1-BOFH@YHBT.net> Content-Type: text/plain; charset=utf-8 --- TODO | 2 ++ Ω | 5 -- 1 file changed, 2 insertions(+) diff --git a/TODO b/TODO index 605013e..69df7d5 100644 --- a/TODO +++ b/TODO @@ -109,3 +109,5 @@ all need to be considered for everything we introduce) * Optional history squashing to reduce commit and intermediate tree objects + + * Make use of and tags public-inbox-1.9.0/t/solve/0002-rename-with-modifications.patch000066400000000000000000000024511430031475700241630ustar00rootroot00000000000000From: POLITICAL CORRECTNESS EXPERT To: meta@public-inbox.org Subject: [PATCH] POLITICALLY CORRECT FILE NAMING Date: Mon, 1 Apr 2019 08:15:20 +0000 Message-Id: <20190401081523.16213-2-BOFH@YHBT.net> HACKING MIGHT GET US REPORTED TO EFF-BEE-EYE AND USE MARKDOWN CUZ MOAR FLAVORS == BETTER --- HACKING => CONTRIBUTING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename HACKING => CONTRIBUTING.md (94%) diff --git a/HACKING b/CONTRIBUTING.md similarity index 94% rename from HACKING rename to CONTRIBUTING.md index 3435775..0a92431 100644 --- a/HACKING +++ b/CONTRIBUTING.md @@ -1,5 +1,5 @@ -hacking public-inbox --------------------- +contributing to public-inbox +---------------------------- Send all patches and "git request-pull"-formatted emails to our self-hosting inbox at meta@public-inbox.org @@ -15,7 +15,7 @@ Please consider our goals in mind: Decentralization, Accessibility, Compatibility, Performance These goals apply to everyone: users viewing over the web or NNTP, -sysadmins running public-inbox, and other hackers working public-inbox. +sysadmins running public-inbox, and other contributors working public-inbox. We will reject any feature which advocates or contributes to any particular instance of a public-inbox becoming a single point of failure. public-inbox-1.9.0/t/solve/bare.patch000066400000000000000000000004301430031475700174420ustar00rootroot00000000000000diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex old mode 100644 new mode 100755 index 15ac20eb..771486c4 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -4 +3,0 @@ -# Basic tool to create a Xapian search index for a public-inbox. public-inbox-1.9.0/t/solver_git.t000066400000000000000000000254661430031475700167420ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use Cwd qw(abs_path); require_git(2.6); use PublicInbox::ContentHash qw(git_sha); use PublicInbox::Spawn qw(popen_rd); require_mods(qw(DBD::SQLite Search::Xapian Plack::Util)); my $git_dir = xqx([qw(git rev-parse --git-dir)], undef, {2 => \(my $null)}); $? == 0 or plan skip_all => "$0 must be run from a git working tree"; chomp $git_dir; # needed for alternates, and --absolute-git-dir is only in git 2.13+ $git_dir = abs_path($git_dir); use_ok "PublicInbox::$_" for (qw(Inbox V2Writable Git SolverGit WWW)); my $patch2 = eml_load 't/solve/0002-rename-with-modifications.patch'; my $patch2_oid = git_sha(1, $patch2)->hexdigest; my ($tmpdir, $for_destroy) = tmpdir(); my $ibx = create_inbox 'v2', version => 2, indexlevel => 'medium', sub { my ($im) = @_; $im->add(eml_load 't/solve/0001-simple-mod.patch') or BAIL_OUT; $im->add($patch2) or BAIL_OUT; }; my $md = "$tmpdir/md"; File::Path::mkpath([map { $md.$_ } (qw(/ /cur /new /tmp))]); symlink(abs_path('t/solve/0001-simple-mod.patch'), "$md/cur/foo:2,") or xbail "symlink: $!"; my $v1_0_0_tag = 'cb7c42b1e15577ed2215356a2bf925aef59cdd8d'; my $v1_0_0_tag_short = substr($v1_0_0_tag, 0, 16); my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0'; my $non_existent = 'ee5e32211bf62ab6531bdf39b84b6920d0b6775a'; test_lei({tmpdir => "$tmpdir/blob"}, sub { lei_ok('blob', '--mail', $patch2_oid, '-I', $ibx->{inboxdir}, \'--mail works for existing oid'); is($lei_out, $patch2->as_string, 'blob matches'); ok(!lei('blob', '--mail', '69df7d5', '-I', $ibx->{inboxdir}), "--mail won't run solver"); like($lei_err, qr/\b69df7d5\b/, 'OID in error by git(1)'); lei_ok('blob', '69df7d5', '-I', $ibx->{inboxdir}); is(git_sha(1, \$lei_out)->hexdigest, $expect, 'blob contents output'); my $prev = $lei_out; lei_ok(qw(blob --no-mail 69df7d5 -I), $ibx->{inboxdir}); is($lei_out, $prev, '--no-mail works'); ok(!lei(qw(blob -I), $ibx->{inboxdir}, $non_existent), 'non-existent blob fails'); my $abbrev = substr($non_existent, 0, 7); like($lei_err, qr/could not find $abbrev/, 'failed abbreviation noted'); SKIP: { skip '/.git exists', 1 if -e '/.git'; lei_ok(qw(-C / blob 69df7d5 -I), $ibx->{inboxdir}, "--git-dir=$git_dir"); is($lei_out, $prev, '--git-dir works'); ok(!lei(qw(-C / blob --no-cwd 69df7d5 -I), $ibx->{inboxdir}), '--no-cwd works'); like($lei_err, qr/no --git-dir to try/, 'lack of --git-dir noted'); ok(!lei(qw(-C / blob -I), $ibx->{inboxdir}, $non_existent), 'non-existent blob fails'); like($lei_err, qr/no --git-dir to try/, 'lack of --git-dir noted'); } # fallbacks lei_ok('blob', $v1_0_0_tag, '-I', $ibx->{inboxdir}); lei_ok('blob', $v1_0_0_tag_short, '-I', $ibx->{inboxdir}); }); test_lei({tmpdir => "$tmpdir/rediff"}, sub { lei_ok(qw(rediff -q -U9 t/solve/0001-simple-mod.patch)); like($lei_out, qr!^\Q+++\E b/TODO\n@@ -103,9 \+103,11 @@!sm, 'got more context with -U9'); my (undef, $re) = split(/\n\n/, $lei_out, 2); $re =~ s/^/> /sgm; substr($re, 0, 0, < \$re, %$lei_opt }); my $exp = <<'EOM'; From: me@example.com Subject: Re: awesome advice EOM like($lei_out, qr/\Q$exp\E/, '--drq preserved header'); # n.b. --drq can requote the attribution line ("So-and-so wrote:"), # but it's probably not worth preventing... $exp = <<'EOM'; > --- > TODO | 2 ++ > Ω | 5 -- > 1 file changed, 2 insertions(+) > > diff --git a/TODO b/TODO > index 605013e4904baabecd4a0a55997aebd8e8477a8f..69df7d565d49fbaaeb0a067910f03dc22cd52bd0 100644 > --- a/TODO > +++ b/TODO > @@ -96,16 +96,18 @@ all need to be considered for everything we introduce) EOM $exp =~ s/^>$/> /sgm; # re-add trailing white space like($lei_out, qr/\Q$exp\E/, '--drq diffstat + context'); lei_ok(qw(rediff -q --full-index -U9 t/solve/bare.patch)); $exp = <<'EOM'; diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex old mode 100644 new mode 100755 index 15ac20eb871bf47697377e58a27db23102a38fca..771486c425b315bae70fd8a82d62ab0331e0a827 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -1,13 +1,12 @@ #!perl -w EOM like($lei_out, qr/\Q$exp\E/, 'preserve mode, regen header + context from -U0 patch'); is($lei_err, '', 'no warnings from bare patch'); my $e = { GIT_DIR => "$ENV{HOME}/.local/share/lei/store/ALL.git" }; my @x = xqx([qw(git cat-file --batch-all-objects --batch-check)], $e); is_deeply(\@x, [], 'no objects stored') or diag explain(\@x); }); test_lei({tmpdir => "$tmpdir/index-eml-only"}, sub { lei_ok(qw(index), $md); lei_ok(qw(blob 69df7d5)); # hits LeiSearch->smsg_eml -> lms->local_blob }); my $git = PublicInbox::Git->new($git_dir); $ibx->{-repo_objs} = [ $git ]; my $res; my $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); open my $log, '+>>', "$tmpdir/solve.log" or die "open: $!"; my $psgi_env = { 'psgi.errors' => \*STDERR, 'psgi.url_scheme' => 'http', 'HTTP_HOST' => 'example.com' }; $solver->solve($psgi_env, $log, '69df7d5', {}); ok($res, 'solved a blob!'); my $wt_git = $res->[0]; is(ref($wt_git), 'PublicInbox::Git', 'got a git object for the blob'); is($res->[1], $expect, 'resolved blob to unabbreviated identifier'); is($res->[2], 'blob', 'type specified'); is($res->[3], 4405, 'size returned'); is(ref($wt_git->cat_file($res->[1])), 'SCALAR', 'wt cat-file works'); is_deeply([$expect, 'blob', 4405], [$wt_git->check($res->[1])], 'wt check works'); my $oid = $expect; for my $i (1..2) { my $more; my $s = PublicInbox::SolverGit->new($ibx, sub { $more = $_[0] }); $s->solve($psgi_env, $log, $oid, {}); is($more->[1], $expect, 'resolved blob to long OID '.$i); chop($oid); } $solver = undef; $res = undef; my $wt_git_dir = $wt_git->{git_dir}; $wt_git = undef; ok(!-d $wt_git_dir, 'no references to WT held'); $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); $solver->solve($psgi_env, $log, '0'x40, {}); is($res, undef, 'no error on z40'); my $git_v2_20_1_tag = '7a95a1cd084cb665c5c2586a415e42df0213af74'; $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); $solver->solve($psgi_env, $log, $git_v2_20_1_tag, {}); is($res, undef, 'no error on a tag not in our repo'); $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); $solver->solve($psgi_env, $log, '0a92431', {}); ok($res, 'resolved without hints'); my $hints = { oid_a => '3435775', path_a => 'HACKING', path_b => 'CONTRIBUTING' }; $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); $solver->solve($psgi_env, $log, '0a92431', $hints); my $hinted = $res; # don't compare ::Git objects: shift @$res; shift @$hinted; is_deeply($res, $hinted, 'hints work (or did not hurt :P'); my @psgi = qw(HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); SKIP: { require_mods(@psgi, 7 + scalar(@psgi)); use_ok($_) for @psgi; my $binfoo = "$ibx->{inboxdir}/binfoo.git"; my $l = "$ibx->{inboxdir}/inbox.lock"; -f $l or BAIL_OUT "BUG: $l missing: $!"; require_ok 'PublicInbox::ViewVCS'; my $big_size = do { no warnings 'once'; $PublicInbox::ViewVCS::MAX_SIZE + 1; }; my %bin = (big => $big_size, small => 1); my %oid; # (small|big) => OID my $lk = bless { lock_path => $l }, 'PublicInbox::Lock'; my $acq = $lk->lock_for_scope; my $stamp = "$binfoo/stamp"; if (open my $fh, '<', $stamp) { %oid = map { chomp; split(/=/, $_) } (<$fh>); } else { PublicInbox::Import::init_bare($binfoo); my $cmd = [ qw(git hash-object -w --stdin) ]; my $env = { GIT_DIR => $binfoo }; open my $fh, '>', "$stamp.$$" or BAIL_OUT; while (my ($label, $size) = each %bin) { pipe(my ($rin, $win)) or BAIL_OUT; my $rout = popen_rd($cmd , $env, { 0 => $rin }); $rin = undef; print { $win } ("\0" x $size) or BAIL_OUT; close $win or BAIL_OUT; chomp(my $x = <$rout>); close $rout or BAIL_OUT "$?"; print $fh "$label=$x\n" or BAIL_OUT; $oid{$label} = $x; } close $fh or BAIL_OUT; rename("$stamp.$$", $stamp) or BAIL_OUT; } undef $acq; # ensure the PSGI frontend (ViewVCS) works: my $name = $ibx->{name}; my $cfgpfx = "publicinbox.$name"; my $cfgpath = "$tmpdir/httpd-config"; open my $cfgfh, '>', $cfgpath or die; print $cfgfh <{-primary_address} inboxdir = $ibx->{inboxdir} coderepo = public-inbox coderepo = binfoo url = http://example.com/$name [coderepo "public-inbox"] dir = $git_dir cgiturl = http://example.com/public-inbox [coderepo "binfoo"] dir = $binfoo cgiturl = http://example.com/binfoo EOF close $cfgfh or die; my $cfg = PublicInbox::Config->new($cfgpath); my $www = PublicInbox::WWW->new($cfg); my $client = sub { my ($cb) = @_; my $mid = '20190401081523.16213-1-BOFH@YHBT.net'; my @warn; my $res = do { local $SIG{__WARN__} = sub { push @warn, @_ }; $cb->(GET("/$name/$mid/")); }; is_deeply(\@warn, [], 'no warnings from rendering diff'); like($res->content, qr!>Ω!, 'omega escaped'); $res = $cb->(GET("/$name/3435775/s/")); is($res->code, 200, 'success with existing blob'); $res = $cb->(GET("/$name/".('0'x40).'/s/')); is($res->code, 404, 'failure with null OID'); $res = $cb->(GET("/$name/$non_existent/s/")); is($res->code, 404, 'failure with null OID'); $res = $cb->(GET("/$name/$v1_0_0_tag/s/")); is($res->code, 200, 'shows commit (unabbreviated)'); $res = $cb->(GET("/$name/$v1_0_0_tag_short/s/")); is($res->code, 200, 'shows commit (abbreviated)'); while (my ($label, $size) = each %bin) { $res = $cb->(GET("/$name/$oid{$label}/s/")); is($res->code, 200, "$label binary file"); ok(index($res->content, "blob $size bytes") >= 0, "showed $label binary blob size"); $res = $cb->(GET("/$name/$oid{$label}/s/raw")); is($res->code, 200, "$label raw binary download"); is($res->content, "\0" x $size, "$label content matches"); } }; test_psgi(sub { $www->call(@_) }, $client); SKIP: { require_mods(qw(Plack::Test::ExternalServer), 7); my $env = { PI_CONFIG => $cfgpath }; my $sock = tcp_server() or die; my ($out, $err) = map { "$tmpdir/std$_.log" } qw(out err); my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, $env, { 3 => $sock }); my ($h, $p) = tcp_host_port($sock); my $url = "http://$h:$p"; local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = $url; Plack::Test::ExternalServer::test_psgi(client => $client); require_cmd('curl', 1) or skip 'no curl', 1; mkdir "$tmpdir/ext" // xbail "mkdir $!"; test_lei({tmpdir => "$tmpdir/ext"}, sub { my $rurl = "$url/$name"; lei_ok(qw(blob --no-mail 69df7d5 -I), $rurl); is(git_sha(1, \$lei_out)->hexdigest, $expect, 'blob contents output'); ok(!lei(qw(blob -I), $rurl, $non_existent), 'non-existent blob fails'); }); } } done_testing(); public-inbox-1.9.0/t/spamcheck_spamc.t000066400000000000000000000026351430031475700176770ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use IO::File; use Fcntl qw(:DEFAULT SEEK_SET); use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); use_ok 'PublicInbox::Spamcheck::Spamc'; my $spamc = PublicInbox::Spamcheck::Spamc->new; $spamc->{checkcmd} = [qw(cat)]; { open my $fh, '+>', "$tmpdir/file" or die "open failed: $!"; ok(!$spamc->spamcheck($fh), 'empty '.ref($fh)); } ok(!$spamc->spamcheck(IO::File->new_tmpfile), 'IO::File->new_tmpfile'); my $dst = ''; my $src = <<'EOF'; Date: Thu, 01 Jan 1970 00:00:00 +0000 To: From: Subject: test Message-ID: EOF ok($spamc->spamcheck(PublicInbox::Eml->new($src), \$dst), 'PublicInbox::Eml works'); is($dst, $src, 'input == output'); $dst = ''; $spamc->{checkcmd} = ['sh', '-c', 'cat; false']; ok(!$spamc->spamcheck(PublicInbox::Eml->new($src), \$dst), 'Failed check works'); is($dst, $src, 'input == output for spammy example'); for my $l (qw(ham spam)) { my $file = "$tmpdir/$l.out"; $spamc->{$l.'cmd'} = ['tee', $file ]; my $method = $l.'learn'; ok($spamc->$method(PublicInbox::Eml->new($src)), "$method OK"); open my $fh, '<', $file or die "failed to open $file: $!"; is(eval { local $/, <$fh> }, $src, "$l command ran alright"); } done_testing(); public-inbox-1.9.0/t/spawn.t000066400000000000000000000132621430031475700157040ustar00rootroot00000000000000# Copyright (C) 2015-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Spawn qw(which spawn popen_rd); use PublicInbox::Sigfd; { my $true = which('true'); ok($true, "'true' command found with which()"); } { my $pid = spawn(['true']); ok($pid, 'spawned process'); is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process'); is($?, 0, 'true exited successfully'); } SKIP: { my $pid = spawn(['true'], undef, { pgid => 0 }); ok($pid, 'spawned process with new pgid'); is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process'); is($?, 0, 'true exited successfully'); pipe(my ($r, $w)) or BAIL_OUT; # Find invalid PID to try to join its process group. my $wrong_pgid = 1; for (my $i=0x7fffffff; $i >= 2; $i--) { if (kill(0, $i) == 0) { $wrong_pgid = $i; last; } } # Test spawn behavior when it can't join the requested process group. $pid = eval { spawn(['true'], undef, { pgid => $wrong_pgid, 2 => $w }) }; close $w; my $err = do { local $/; <$r> }; # diag "$err ($@)"; if (defined $pid) { waitpid($pid, 0) if defined $pid; isnt($?, 0, 'child error (pure-Perl)'); } else { ok($@, 'exception raised'); } } { # ensure waitpid(-1, 0) and SIGCHLD works in spawned process my $script = <<'EOF'; $| = 1; # unbuffer stdout defined(my $pid = fork) or die "fork: $!"; if ($pid == 0) { exit } elsif ($pid > 0) { my $waited = waitpid(-1, 0); $waited == $pid or die "mismatched child $pid != $waited"; $? == 0 or die "child err: $>"; $SIG{CHLD} = sub { print "HI\n"; exit }; print "RDY $$\n"; select(undef, undef, undef, 0.01) while 1; } EOF my $oldset = PublicInbox::DS::block_signals(); my $rd = popen_rd([$^X, '-e', $script]); diag 'waiting for child to reap grandchild...'; chomp(my $line = readline($rd)); my ($rdy, $pid) = split(' ', $line); is($rdy, 'RDY', 'got ready signal, waitpid(-1) works in child'); ok(kill('CHLD', $pid), 'sent SIGCHLD to child'); is(readline($rd), "HI\n", '$SIG{CHLD} works in child'); ok(close $rd, 'popen_rd close works'); PublicInbox::DS::sig_setmask($oldset); } { my ($r, $w); pipe $r, $w or die "pipe failed: $!"; my $pid = spawn(['echo', 'hello world'], undef, { 1 => fileno($w) }); close $w or die "close pipe[1] failed: $!"; is(<$r>, "hello world\n", 'read stdout of spawned from pipe'); is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process'); is($?, 0, 'true exited successfully'); } { my ($r, $w); pipe $r, $w or die "pipe failed: $!"; my $pid = spawn(['sh', '-c', 'echo $HELLO'], { 'HELLO' => 'world' }, { 1 => $w }); close $w or die "close pipe[1] failed: $!"; is(<$r>, "world\n", 'read stdout of spawned from pipe'); is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process'); is($?, 0, 'sh exited successfully'); } { my $fh = popen_rd([qw(echo hello)]); ok(fileno($fh) >= 0, 'tied fileno works'); my $l = <$fh>; is($l, "hello\n", 'tied readline works'); $l = <$fh>; ok(!$l, 'tied readline works for EOF'); } { my $fh = popen_rd([qw(printf foo\nbar)]); ok(fileno($fh) >= 0, 'tied fileno works'); my $tfh = (tied *$fh)->{fh}; is($tfh->blocking(0), 1, '->blocking was true'); is($tfh->blocking, 0, '->blocking is false'); is($tfh->blocking(1), 0, '->blocking was true'); is($tfh->blocking, 1, '->blocking is true'); my @line = <$fh>; is_deeply(\@line, [ "foo\n", 'bar' ], 'wantarray works on readline'); } { my $fh = popen_rd([qw(echo hello)]); my $buf; is(sysread($fh, $buf, 6), 6, 'sysread got 6 bytes'); is($buf, "hello\n", 'tied gets works'); is(sysread($fh, $buf, 6), 0, 'sysread got EOF'); $? = 1; ok(close($fh), 'close succeeds'); is($?, 0, '$? set properly'); } { my $fh = popen_rd([qw(false)]); ok(!close($fh), 'close fails on false'); isnt($?, 0, '$? set properly: '.$?); } { local $ENV{GIT_CONFIG} = '/path/to/this/better/not/exist'; my $fh = popen_rd([qw(env)], { GIT_CONFIG => undef }); ok(!grep(/^GIT_CONFIG=/, <$fh>), 'GIT_CONFIG clobbered'); } { # ->CLOSE vs ->DESTROY waitpid caller distinction my @c; my $fh = popen_rd(['true'], undef, { cb => sub { @c = caller } }); ok(close($fh), '->CLOSE fired and successful'); ok(scalar(@c), 'callback fired by ->CLOSE'); ok(grep(!m[/PublicInbox/DS\.pm\z], @c), 'callback not invoked by DS'); @c = (); $fh = popen_rd(['true'], undef, { cb => sub { @c = caller } }); undef $fh; # ->DESTROY ok(scalar(@c), 'callback fired by ->DESTROY'); ok(grep(!m[/PublicInbox/ProcessPipe\.pm\z], @c), 'callback not invoked by ProcessPipe'); } { # children don't wait on siblings use POSIX qw(_exit); pipe(my ($r, $w)) or BAIL_OUT $!; my $cb = sub { warn "x=$$\n" }; my $fh = popen_rd(['cat'], undef, { 0 => $r, cb => $cb }); my $pp = tied *$fh; my $pid = fork // BAIL_OUT $!; local $SIG{__WARN__} = sub { _exit(1) }; if ($pid == 0) { local $SIG{__DIE__} = sub { _exit(2) }; undef $fh; _exit(0); } waitpid($pid, 0); is($?, 0, 'forked process exited'); my @w; local $SIG{__WARN__} = sub { push @w, @_ }; close $w; close $fh; is($?, 0, 'cat exited'); is_deeply(\@w, [ "x=$$\n" ], 'callback fired from owner'); } SKIP: { eval { require BSD::Resource; defined(BSD::Resource::RLIMIT_CPU()) } or skip 'BSD::Resource::RLIMIT_CPU missing', 3; my ($r, $w); pipe($r, $w) or die "pipe: $!"; my $cmd = ['sh', '-c', 'while true; do :; done']; my $fd = fileno($w); my $opt = { RLIMIT_CPU => [ 1, 1 ], RLIMIT_CORE => [ 0, 0 ], 1 => $fd }; my $pid = spawn($cmd, undef, $opt); close $w or die "close(w): $!"; my $rset = ''; vec($rset, fileno($r), 1) = 1; ok(select($rset, undef, undef, 5), 'child died before timeout'); is(waitpid($pid, 0), $pid, 'XCPU child process reaped'); isnt($?, 0, 'non-zero exit status'); } done_testing(); 1; public-inbox-1.9.0/t/thread-cycle.t000066400000000000000000000110751430031475700171200ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use_ok('PublicInbox::SearchThread'); my $mt = eval { require Mail::Thread; no warnings 'once'; $Mail::Thread::nosubject = 1; $Mail::Thread::noprune = 1; require Email::Simple; # required by Mail::Thread (via Email::Abstract) }; my $make_objs = sub { my @simples; my $n = 0; my @msgs = map { my $msg = $_; $msg->{ds} ||= ++$n; $msg->{references} =~ s/\s+/ /sg if $msg->{references}; $msg->{blob} = '0'x40; # any dummy value will do, here if ($mt) { my $simple = Email::Simple->create(header => [ 'Message-ID' => "<$msg->{mid}>", 'References' => $msg->{references}, ]); push @simples, $simple; } bless $msg, 'PublicInbox::Smsg' } @_; (\@simples, \@msgs); }; my ($simples, $smsgs) = $make_objs->( # data from t/testbox-6 in Mail::Thread 2.55: { mid => '20021124145312.GA1759@nlin.net' }, { mid => 'slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk', references => '<20021124145312.GA1759@nlin.net>', }, { mid => '15842.10677.577458.656565@jupiter.akutech-local.de', references => '<20021124145312.GA1759@nlin.net> ', }, { mid => '20021125171807.GK8236@somanetworks.com', references => '<20021124145312.GA1759@nlin.net> <15842.10677.577458.656565@jupiter.akutech-local.de>', }, { mid => '15843.12163.554914.469248@jupiter.akutech-local.de', references => '<20021124145312.GA1759@nlin.net> <15842.10677.577458.656565@jupiter.akutech-local.de> ', }, { mid => 'E18GPHf-0000zp-00@cloaked.freeserve.co.uk', references => '<20021124145312.GA1759@nlin.net> <15842.10677.577458.656565@jupiter.akutech-local.de>' } ); my $st = thread_to_s($smsgs); SKIP: { skip 'Mail::Thread missing', 1 unless $mt; check_mt($st, $simples, 'Mail::Thread output matches'); } my @backwards = ( { mid => 1, references => '<2> <3> <4>' }, { mid => 4, references => '<2> <3>' }, { mid => 5, references => '<6> <7> <8> <3> <2>' }, { mid => 9, references => '<6> <3>' }, { mid => 10, references => '<8> <7> <6>' }, { mid => 2, references => '<6> <7> <8> <3>' }, { mid => 3, references => '<6> <7> <8>' }, { mid => 6, references => '<8> <7>' }, { mid => 7, references => '<8>' }, { mid => 8, references => '' } ); ($simples, $smsgs) = $make_objs->(@backwards); my $backward = thread_to_s($smsgs); SKIP: { skip 'Mail::Thread missing', 1 unless $mt; check_mt($backward, $simples, 'matches Mail::Thread backwards'); } ($simples, $smsgs) = $make_objs->(reverse @backwards); my $forward = thread_to_s($smsgs); unless ('Mail::Thread sorts by Date') { SKIP: { skip 'Mail::Thread missing', 1 unless $mt; check_mt($forward, $simples, 'matches Mail::Thread forwards'); } } if ('sorting by Date') { is("\n".$backward, "\n".$forward, 'forward and backward matches'); } SKIP: { require_mods 'Devel::Cycle', 1; Devel::Cycle->import('find_cycle'); my @dup = ( { mid => 5, references => '<6>' }, { mid => 5, references => '<6> <1>' }, ); open my $fh, '+>', \(my $out = '') or xbail "open: $!"; (undef, $smsgs) = $make_objs->(@dup); eval 'package EmptyInbox; sub smsg_by_mid { undef }'; my $ctx = { ibx => bless {}, 'EmptyInbox' }; my $rootset = PublicInbox::SearchThread::thread($smsgs, sub { @{$_[0]} = sort { $a->{mid} cmp $b->{mid} } @{$_[0]} }, $ctx); my $oldout = select $fh; find_cycle($rootset); select $oldout; is($out, '', 'nothing from find_cycle'); } # Devel::Cycle check done_testing; sub thread_to_s { my ($msgs) = @_; my $rootset = PublicInbox::SearchThread::thread($msgs, sub { @{$_[0]} = sort { $a->{mid} cmp $b->{mid} } @{$_[0]} }); my $st = ''; my @q = map { (0, $_) } @$rootset; while (@q) { my $level = shift @q; my $node = shift @q or next; $st .= (" "x$level). "$node->{mid}\n"; my $cl = $level + 1; unshift @q, map { ($cl, $_) } @{$node->{children}}; } $st; } sub check_mt { my ($st, $simples, $msg) = @_; my $mt = Mail::Thread->new(@$simples); $mt->thread; $mt->order(sub { sort { $a->messageid cmp $b->messageid } @_ }); my $check = ''; my @q = map { (0, $_) } $mt->rootset; while (@q) { my $level = shift @q; my $node = shift @q or next; $check .= (" "x$level) . $node->messageid . "\n"; unshift @q, $level + 1, $node->child, $level, $node->next; } is("\n".$check, "\n".$st, $msg); } public-inbox-1.9.0/t/thread-index-gap.t000066400000000000000000000034351430031475700176760ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::Config; use List::Util qw(shuffle); require_mods(qw(DBD::SQLite)); require_git(2.6); chomp(my @msgs = split(/\n\n/, <<'EOF')); # "git log" order Subject: [bug#45000] [PATCH 1/9] References: <20201202045335.31096-1-j@example.com> Message-Id: <20201202045540.31248-1-j@example.com> Subject: [bug#45000] [PATCH 0/9] Message-Id: <20201202045335.31096-1-j@example.com> Subject: [bug#45000] [PATCH 0/9] References: <20201202045335.31096-1-j@example.com> Message-ID: <86sg8o1mou.fsf@example.com> Subject: [bug#45000] [PATCH 8/9] Message-Id: <20201202045540.31248-8-j@example.com> References: <20201202045540.31248-1-j@example.com> EOF my ($home, $for_destroy) = tmpdir(); for my $msgs (['orig', reverse @msgs], ['shuffle', shuffle(@msgs)]) { my $desc = shift @$msgs; my $n = "index-cap-$desc-basic"; # yes, the shuffle case gets memoized by create_inbox, oh well my $ibx = create_inbox $desc, version => 2, indexlevel => 'basic', tmpdir => "$home/$desc", sub { my ($im) = @_; for my $m (@$msgs) { my $x = "$m\nFrom: x\@example.com\n\n"; $im->add(PublicInbox::Eml->new(\$x)); } }; my $over = $ibx->over; my $dbh = $over->dbh; my $tid = $dbh->selectall_arrayref('SELECT DISTINCT(tid) FROM over'); is(scalar(@$tid), 1, "only one thread initially ($desc)"); $over->dbh_close; my $env = { HOME => $home }; run_script([qw(-index --no-fsync --reindex --rethread), $ibx->{inboxdir}], $env) or BAIL_OUT 'rethread'; $tid = $dbh->selectall_arrayref('SELECT DISTINCT(tid) FROM over'); is(scalar(@$tid), 1, "only one thread after rethread ($desc)"); } done_testing; public-inbox-1.9.0/t/time.t000066400000000000000000000015071430031475700155110ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use POSIX qw(strftime); use PublicInbox::Eml; use PublicInbox::MsgTime qw(msg_datestamp); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: b@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $ts = msg_datestamp($mime->header_obj); is(strftime('%Y-%m-%d %H:%M:%S', gmtime($ts)), '1993-10-02 00:00:00', 'got expected date with 2 digit year'); $mime->header_set(Date => 'Fri, 02 Oct 101 01:02:03 +0000'); $ts = msg_datestamp($mime->header_obj); is(strftime('%Y-%m-%d %H:%M:%S', gmtime($ts)), '2001-10-02 01:02:03', 'got expected date with 3 digit year'); done_testing(); public-inbox-1.9.0/t/uri_imap.t000066400000000000000000000145421430031475700163630ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods 'URI::Split'; use_ok 'PublicInbox::URIimap'; is(PublicInbox::URIimap->new('https://example.com/'), undef, 'invalid scheme ignored'); my $uri = PublicInbox::URIimap->new('imaps://EXAMPLE.com/'); is($uri->host, 'EXAMPLE.com', 'host ok'); is($uri->canonical->host, 'example.com', 'host canonicalized'); is($uri->canonical->as_string, 'imaps://example.com/', 'URI canonicalized'); is($uri->port, 993, 'imaps port'); is($uri->auth, undef); is($uri->user, undef); $uri = PublicInbox::URIimap->new('imaps://foo@0/'); is("$uri", $uri->as_string, '"" overload works'); is($uri->host, '0', 'numeric host'); is($uri->user, 'foo', 'user extracted'); $uri = PublicInbox::URIimap->new('imap://0/INBOX.sub#frag')->canonical; is($uri->as_string, 'imap://0/INBOX.sub', 'no fragment'); is($uri->scheme, 'imap'); $uri = PublicInbox::URIimap->new('imaps://;AUTH=ANONYMOUS@0/'); is($uri->auth, 'ANONYMOUS', 'AUTH=ANONYMOUS accepted'); $uri = PublicInbox::URIimap->new('imaps://bar%40example.com;AUTH=99%25@0/'); is($uri->auth, '99%', 'decoded AUTH'); is($uri->user, 'bar@example.com', 'decoded user'); is($uri->mailbox, undef, 'mailbox is undef'); $uri = PublicInbox::URIimap->new('imaps://ipv6@[::1]'); is($uri->host, '::1', 'IPv6 host'); is($uri->mailbox, undef, 'mailbox is undef'); $uri = PublicInbox::URIimap->new('imaps://0:666/INBOX'); is($uri->port, 666, 'port read'); is($uri->mailbox, 'INBOX'); $uri = PublicInbox::URIimap->new('imaps://0/INBOX.sub'); is($uri->mailbox, 'INBOX.sub'); is($uri->scheme, 'imaps'); is(PublicInbox::URIimap->new('imap://0:143/')->canonical->as_string, 'imap://0/'); is(PublicInbox::URIimap->new('imaps://0:993/')->canonical->as_string, 'imaps://0/'); $uri = PublicInbox::URIimap->new('imap://NSA:Hunter2@0/INBOX'); is($uri->user, 'NSA'); is($uri->password, 'Hunter2'); is($uri->uidvalidity, undef, 'no UIDVALIDITY'); $uri = PublicInbox::URIimap->new('imap://0/%'); is($uri->mailbox, '%', "RFC 2192 '%' supported"); $uri = PublicInbox::URIimap->new('imap://0/%25'); $uri = PublicInbox::URIimap->new('imap://0/*'); is($uri->mailbox, '*', "RFC 2192 '*' supported"); $uri = PublicInbox::URIimap->new('imap://0/mmm;UIDVALIDITY=1'); is($uri->mailbox, 'mmm', 'mailbox works with UIDVALIDITY'); is($uri->uidvalidity, 1, 'single-digit UIDVALIDITY'); $uri = PublicInbox::URIimap->new('imap://0/mmm;UIDVALIDITY=21'); is($uri->uidvalidity, 21, 'multi-digit UIDVALIDITY'); $uri = PublicInbox::URIimap->new('imap://0/mmm;UIDVALIDITY=bogus'); is($uri->uidvalidity, undef, 'bogus UIDVALIDITY'); is($uri->uidvalidity(2), 2, 'uid set'); is($$uri, 'imap://0/mmm;UIDVALIDITY=2', 'bogus uidvalidity replaced'); is($uri->uidvalidity(13), 13, 'uid set'); is($$uri, 'imap://0/mmm;UIDVALIDITY=13', 'valid uidvalidity replaced'); $uri = PublicInbox::URIimap->new('imap://0/mmm'); is($uri->uidvalidity(2), 2, 'uid set'); is($$uri, 'imap://0/mmm;UIDVALIDITY=2', 'uidvalidity appended'); is($uri->uid, undef, 'no uid'); is(PublicInbox::URIimap->new('imap://0/x;uidvalidity=1')->canonical->as_string, 'imap://0/x;UIDVALIDITY=1', 'capitalized UIDVALIDITY'); $uri = PublicInbox::URIimap->new('imap://0/mmm/;uid=8'); is($uri->canonical->as_string, 'imap://0/mmm/;UID=8', 'canonicalized UID'); is($uri->mailbox, 'mmm', 'mailbox works with uid'); is($uri->uid, 8, 'uid extracted'); is($uri->uid(9), 9, 'uid set'); is($$uri, 'imap://0/mmm/;UID=9', 'correct uid when stringified'); is($uri->uidvalidity(1), 1, 'set uidvalidity with uid'); is($$uri, 'imap://0/mmm;UIDVALIDITY=1/;UID=9', 'uidvalidity added with uid'); is($uri->uidvalidity(4), 4, 'set uidvalidity with uid'); is($$uri, 'imap://0/mmm;UIDVALIDITY=4/;UID=9', 'uidvalidity replaced with uid'); is($uri->uid(3), 3, 'uid set with uidvalidity'); is($$uri, 'imap://0/mmm;UIDVALIDITY=4/;UID=3', 'uid replaced properly'); my $lc = lc($$uri); is(PublicInbox::URIimap->new($lc)->canonical->as_string, "$$uri", 'canonical uppercased both params'); is($uri->uid(undef), undef, 'uid can be clobbered'); is($$uri, 'imap://0/mmm;UIDVALIDITY=4', 'uid dropped'); $uri->auth('ANONYMOUS'); is($$uri, 'imap://;AUTH=ANONYMOUS@0/mmm;UIDVALIDITY=4', 'AUTH= set'); is($uri->user, undef, 'user is undef w/ AUTH='); is($uri->password, undef, 'password is undef w/ AUTH='); $uri->user('foo'); is($$uri, 'imap://foo;AUTH=ANONYMOUS@0/mmm;UIDVALIDITY=4', 'user set w/AUTH'); is($uri->password, undef, 'password is undef w/ AUTH= & user'); $uri->auth(undef); is($$uri, 'imap://foo@0/mmm;UIDVALIDITY=4', 'user remains set w/o auth'); is($uri->password, undef, 'password is undef w/ user only'); $uri->user('bar'); is($$uri, 'imap://bar@0/mmm;UIDVALIDITY=4', 'user set w/o AUTH'); $uri->auth('NTML'); is($$uri, 'imap://bar;AUTH=NTML@0/mmm;UIDVALIDITY=4', 'auth set w/user'); $uri->auth(undef); $uri->user(undef); is($$uri, 'imap://0/mmm;UIDVALIDITY=4', 'auth and user both cleared'); is($uri->user, undef, 'user is undef'); is($uri->auth, undef, 'auth is undef'); is($uri->password, undef, 'password is undef'); $uri = PublicInbox::URIimap->new('imap://[::1]:36281/'); my $cred = bless { username => $uri->user, password => $uri->password }; is($cred->{username}, undef, 'user is undef in array context'); is($cred->{password}, undef, 'password is undef in array context'); $uri = PublicInbox::URIimap->new('imap://u@example.com/slash/separator'); is($uri->mailbox, 'slash/separator', "`/' separator accepted"); is($uri->uidvalidity(6), 6, "UIDVALIDITY set with `/' separator"); is($$uri, 'imap://u@example.com/slash/separator;UIDVALIDITY=6', "URI correct after adding UIDVALIDITY w/ `/' separator"); $uri = PublicInbox::URIimap->new('imap://u@example.com/a/b;UIDVALIDITY=3'); is($uri->uidvalidity, 3, "UIDVALIDITY w/ `/' separator"); is($uri->mailbox, 'a/b', "mailbox w/ `/' separator + UIDVALIDITY"); is($uri->uidvalidity(4), 4, "UIDVALIDITY set w/ `/' separator"); is($$uri, 'imap://u@example.com/a/b;UIDVALIDITY=4', "URI correct after replacing UIDVALIDITY w/ `/' separator"); is($uri->uid(5), 5, "set /;UID= w/ `/' separator"); $uri = PublicInbox::URIimap->new('imap://u@example.com/a/b/;UID=9'); is($uri->uid, 9, "UID read with `/' separator w/o UIDVALIDITY"); is($uri->uid(8), 8, "UID set with `/' separator w/o UIDVALIDITY"); is($$uri, 'imap://u@example.com/a/b/;UID=8', "URI correct after replacing UID w/ `/' separator"); done_testing; public-inbox-1.9.0/t/uri_nntps.t000066400000000000000000000033141430031475700165720ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods 'URI'; use_ok 'PublicInbox::URInntps'; my $uri = PublicInbox::URInntps->new('nntp://EXAMPLE.com/inbox.test'); isnt(ref($uri), 'PublicInbox::URInntps', 'URI fallback'); is($uri->scheme, 'nntp', 'NNTP fallback ->scheme'); $uri = PublicInbox::URInntps->new('nntps://EXAMPLE.com/inbox.test'); is($uri->host, 'EXAMPLE.com', 'host matches'); is($uri->canonical->host, 'example.com', 'host canonicalized'); is($uri->canonical->as_string, 'nntps://example.com/inbox.test', 'URI canonicalized'); is($uri->port, 563, 'nntps port'); is($uri->userinfo, undef, 'no userinfo'); is($uri->scheme, 'nntps', '->scheme works'); is($uri->group, 'inbox.test', '->group works'); $uri = PublicInbox::URInntps->new('nntps://foo@0/'); is("$uri", $uri->as_string, '"" overload works'); is($uri->host, '0', 'numeric host'); is($uri->userinfo, 'foo', 'user extracted'); $uri = PublicInbox::URInntps->new('nntps://ipv6@[::1]'); is($uri->host, '::1', 'IPv6 host'); is($uri->group, '', '->group is empty'); $uri = PublicInbox::URInntps->new('nntps://0:666/INBOX.test'); is($uri->port, 666, 'port read'); is($uri->group, 'INBOX.test', 'group read after port'); is(PublicInbox::URInntps->new('nntps://0:563/')->canonical->as_string, 'nntps://0/', 'default port stripped'); $uri = PublicInbox::URInntps->new('nntps://NSA:Hunter2@0/inbox'); is($uri->userinfo, 'NSA:Hunter2', 'userinfo accepted w/ pass'); $uri = PublicInbox::URInntps->new('nntps://NSA:Hunter2@0/inbox.test/9-10'); is_deeply([$uri->group], [ 'inbox.test', 9, 10 ], 'ranges work'); done_testing; public-inbox-1.9.0/t/utf8.eml000066400000000000000000000005611430031475700157520ustar00rootroot00000000000000Date: Thu, 01 Jan 1970 00:00:00 +0000 To: =?utf-8?Q?El=C3=A9anor?= From: =?utf-8?Q?El=C3=A9anor?= Subject: Testing for =?utf-8?Q?El=C3=A9anor?= Message-ID: MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline Content-Transfer-Encoding: 8bit This is a test message for Eléanor public-inbox-1.9.0/t/v1-add-remove-add.t000066400000000000000000000023241430031475700176460ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Import; use PublicInbox::TestCommon; use PublicInbox::Eml; require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::SearchIdx; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx = { inboxdir => $inboxdir, name => 'test-add-remove-add', -primary_address => 'test@example.com', }; $ibx = PublicInbox::Inbox->new($ibx); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); $im->init_bare; ok($im->add($mime), 'message added'); ok($im->remove($mime), 'message removed'); ok($im->add($mime), 'message added again'); $im->done; my $rw = PublicInbox::SearchIdx->new($ibx, 1); $rw->index_sync; my $msgs = $ibx->recent({limit => 10}); is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); is(scalar @$msgs, 1, 'only one message in history'); is($ibx->mm->num_for('a-mid@b'), 2, 'exists with second article number'); done_testing(); public-inbox-1.9.0/t/v1reindex.t000066400000000000000000000357671430031475700164770ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::ContentHash qw(content_digest); use File::Path qw(remove_tree); use PublicInbox::TestCommon; use PublicInbox::Eml; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::SearchIdx'; use_ok 'PublicInbox::Import'; use_ok 'PublicInbox::OverIdx'; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx_config = { inboxdir => $inboxdir, name => 'test-v1reindex', -primary_address => 'test@example.com', indexlevel => 'full', -no_fsync => 1, }; my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $minmax; my $msgmap; my ($mark1, $mark2, $mark3, $mark4); { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); $im->init_bare; foreach my $i (1..10) { $mime->header_set('Message-Id', "<$i\@example.com>"); ok($im->add($mime), "message $i added"); if ($i == 4) { $mark1 = $im->get_mark($im->{tip}); $im->remove($mime); $mark2 = $im->get_mark($im->{tip}); } } if ('test remove later') { $mark3 = $im->get_mark($im->{tip}); $mime->header_set('Message-Id', "<5\@example.com>"); $im->remove($mime); $mark4 = $im->get_mark($im->{tip}); } $im->done; my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync() }; is($@, '', 'no error from indexing'); $minmax = [ $ibx->mm->minmax ]; ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = @$minmax; $msgmap = $ibx->mm->msg_range(\$min, $max); is_deeply($msgmap, [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected'); } { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing'); $im->done; is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } my $xap = "$inboxdir/public-inbox/xapian".PublicInbox::Search::SCHEMA_VERSION(); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed'); { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is(scalar(@warn), 0, 'no warnings from reindexing'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = @$minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'medium'; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my $mset = $ibx->search->mset('hello world'); isnt($mset->size, 0, 'got Xapian search results'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'basic'; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); isnt($ibx->search, 'no search for basic'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } # upgrade existing basic to medium # note: changing indexlevels is not yet supported in v2, # and may not be without more effort # no removals { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'medium'; my $ibx = PublicInbox::Inbox->new(\%config); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from indexing'); is_deeply(\@warn, [], 'no warnings'); my $mset = $ibx->search->reopen->mset('hello world'); isnt($mset->size, 0, 'search OK after basic -> medium'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } # An incremental indexing test ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark1 4 simple additions in the same index_sync eval { $rw->index_sync({ref => $mark1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 4, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [4, '4@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark2 A delete separated form and add in the same index_sync eval { $rw->index_sync({ref => $mark2}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark3 adds following the delete at mark2 eval { $rw->index_sync({ref => $mark3}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [5, '5@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark4 A delete of an older message eval { $rw->index_sync({ref => $mark4}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } # Another incremental indexing test ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark2 an add and it's delete in the same index_sync eval { $rw->index_sync({ref => $mark2}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark3 adds following the delete at mark2 eval { $rw->index_sync({ref => $mark3}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [5, '5@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark4 A delete of an older message eval { $rw->index_sync({ref => $mark4}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my $ibx = PublicInbox::Inbox->new({ %$ibx_config }); my $f = $ibx->over->{dbh}->sqlite_db_filename; my $over = PublicInbox::OverIdx->new($f); my $dbh = $over->dbh; my $non_ghost_tids = sub { $dbh->selectall_arrayref(<<''); SELECT tid FROM over WHERE num > 0 ORDER BY tid ASC }; my $before = $non_ghost_tids->(); # mess up threading: my $tid = PublicInbox::OverIdx::get_counter($dbh, 'thread'); my $nr = $dbh->do('UPDATE over SET tid = ?', undef, $tid); my $rw = PublicInbox::SearchIdx->new($ibx, 1); my @pr; my $pr = sub { push @pr, @_ }; $rw->index_sync({reindex => 1, rethread => 1, -progress => $pr }); my @n = $dbh->selectrow_array(<(); ok($after->[0]->[0] > $before->[-1]->[0], 'all tids greater than before'); is(scalar @$after, scalar @$before, 'thread count unchanged'); is_deeply([], \@warn, 'no warnings'); # diag "@pr"; # XXX do we care? } done_testing(); public-inbox-1.9.0/t/v2-add-remove-add.t000066400000000000000000000021331430031475700176450ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::V2Writable'; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx = { inboxdir => "$inboxdir/v2", name => 'test-v2writable', version => 2, -no_fsync => 1, -primary_address => 'test@example.com', }; $ibx = PublicInbox::Inbox->new($ibx); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 Message-ID: hello world EOF my $im = PublicInbox::V2Writable->new($ibx, 1); $im->{parallel} = 0; ok($im->add($mime), 'message added'); ok($im->remove($mime), 'message removed'); ok($im->add($mime), 'message added again'); $im->done; my $msgs = $ibx->recent({limit => 1000}); is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); is(scalar @$msgs, 1, 'only one message in history'); done_testing(); public-inbox-1.9.0/t/v2dupindex.t000066400000000000000000000042371430031475700166460ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # we can index a message from a mirror which bypasses dedupe. use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Import; use PublicInbox::Git; require_git(2.6); require_mods(qw(DBD::SQLite)); my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = "$tmpdir/test"; my $ibx = create_inbox('test', indexlevel => 'basic', version => 2, tmpdir => $inboxdir, sub { my ($im, $ibx) = @_; $im->add(eml_load('t/plack-qp.eml')); $im->add(eml_load('t/mda-mime.eml')); $im->done; # bypass duplicate filters (->header_set is optional) my $git0 = PublicInbox::Git->new("$ibx->{inboxdir}/git/0.git"); $_[0] = undef; $im = PublicInbox::Import->new($git0, undef, undef, $ibx); $im->{path_type} = 'v2'; $im->{lock_path} = undef; my $eml = eml_load('t/plack-qp.eml'); $eml->header_set('X-This-Is-Not-Checked-By-ContentHash', 'blah'); $im->add($eml) or BAIL_OUT 'add seen message directly'; $im->add(eml_load('t/mda-mime.eml')) or BAIL_OUT 'add another seen message directly'; $im->add(eml_load('t/iso-2202-jp.eml')) or BAIL_OUT 'add another new message'; $im->done; # mimic a fresh clone by dropping indices my $dir = $ibx->{inboxdir}; my @sqlite = (glob("$dir/*sqlite3*"), glob("$dir/xap*/*sqlite3*")); unlink(@sqlite) == scalar(@sqlite) or BAIL_OUT 'did not unlink SQLite indices'; my @shards = glob("$dir/xap*/?"); scalar(@shards) == 0 or BAIL_OUT 'Xapian shards created unexpectedly'; open my $fh, '>', "$dir/empty" or BAIL_OUT; rmdir($_) for glob("$dir/xap*"); }); my $env = { PI_CONFIG => "$inboxdir/empty" }; my $rdr = { 2 => \(my $err = '') }; ok(run_script([qw(-index -Lbasic), $inboxdir ], $env, $rdr), '-indexed'); my @n = $ibx->over->dbh->selectrow_array('SELECT COUNT(*) FROM over'); is_deeply(\@n, [ 3 ], 'identical message not re-indexed'); my $mm = $ibx->mm->{dbh}->selectall_arrayref(<<''); SELECT num,mid FROM msgmap ORDER BY num ASC is_deeply($mm, [ [ 1, 'qp@example.com' ], [ 2, 'multipart-html-sucks@11' ], [ 3, '199707281508.AAA24167@hoyogw.example' ] ], 'msgmap omits redundant message'); done_testing; public-inbox-1.9.0/t/v2index-late-dupe.t000066400000000000000000000024771430031475700200170ustar00rootroot00000000000000# Copyright (C) all contributors # License: AGPL-3.0+ # # this simulates a mirror path: git fetch && -index use strict; use v5.10.1; use PublicInbox::TestCommon; use Test::More; # redundant, used for bisect require_mods 'v2'; require PublicInbox::Import; require PublicInbox::Inbox; require PublicInbox::Git; my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = "$tmpdir/i"; local $ENV{HOME} = $tmpdir; PublicInbox::Import::init_bare(my $e0 = "$inboxdir/git/0.git"); open my $fh, '>', "$inboxdir/inbox.lock" or xbail $!; my $git = PublicInbox::Git->new($e0); my $im = PublicInbox::Import->new($git, qw(i i@example.com)); $im->{lock_path} = undef; $im->{path_type} = 'v2'; my $eml = eml_load('t/plack-qp.eml'); ok($im->add($eml), 'add original'); $im->done; run_script([qw(-index -Lbasic), $inboxdir]); is($?, 0, 'basic index'); my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir }); my $orig = $ibx->over->get_art(1); my @mid = $eml->header_raw('Message-ID'); $eml->header_set('Message-ID', @mid, ''); ok($im->add($eml), 'add another'); $im->done; run_script([qw(-index -Lbasic), $inboxdir]); is($?, 0, 'basic index again'); my $after = $ibx->over->get_art(1); is_deeply($after, $orig, 'original unchanged') or note explain([$orig,$after]); done_testing; public-inbox-1.9.0/t/v2mda.t000066400000000000000000000062611430031475700155660ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use v5.10.1; use strict; use Test::More; use Fcntl qw(SEEK_SET); use Cwd; use PublicInbox::TestCommon; use PublicInbox::Eml; require_git(2.6); my $V = 2; require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::V2Writable'; my ($tmpdir, $for_destroy) = tmpdir(); my $ibx = { inboxdir => "$tmpdir/inbox", name => 'test-v2writable', address => [ 'test@example.com' ], }; my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 Message-ID: List-ID: hello world EOF my $main_bin = getcwd()."/t/main-bin"; my $fail_bin = getcwd()."/t/fail-bin"; local $ENV{PI_DIR} = "$tmpdir/foo"; my $fail_path = "$fail_bin:blib/script:$ENV{PATH}"; local $ENV{PATH} = "$main_bin:blib/script:$ENV{PATH}"; my $faildir = "$tmpdir/fail"; local $ENV{PI_EMERGENCY} = $faildir; ok(mkdir $faildir); my @cmd = (qw(-init), "-V$V", $ibx->{name}, $ibx->{inboxdir}, 'http://localhost/test', $ibx->{address}->[0]); ok(run_script(\@cmd), 'initialized v2 inbox'); my $rdr = { 0 => \($mime->as_string) }; local $ENV{ORIGINAL_RECIPIENT} = 'test@example.com'; ok(run_script(['-mda'], undef, $rdr), 'mda delivered a message'); $ibx = PublicInbox::Inbox->new($ibx); my $msgs = $ibx->over->recent; is(scalar(@$msgs), 1, 'only got one message'); my $eml = $ibx->smsg_eml($msgs->[0]); is($eml->as_string, $mime->as_string, 'injected message'); { my @new = glob("$faildir/new/*"); is_deeply(\@new, [], 'nothing in faildir'); local $ENV{PATH} = $fail_path; $mime->header_set('Message-ID', ''); $rdr->{0} = \($mime->as_string); ok(run_script(['-mda'], undef, $rdr), 'mda did not die on "spam"'); @new = glob("$faildir/new/*"); is(scalar(@new), 1, 'got a message in faildir'); $msgs = $ibx->over->recent; is(scalar(@$msgs), 1, 'no new message'); my $config = "$ENV{PI_DIR}/config"; ok(-f $config, 'config exists'); my $k = 'publicinboxmda.spamcheck'; is(xsys('git', 'config', "--file=$config", $k, 'none'), 0, 'disabled spamcheck for mda'); ok(run_script(['-mda'], undef, $rdr), 'mda did not die'); my @again = glob("$faildir/new/*"); is_deeply(\@again, \@new, 'no new message in faildir'); $msgs = $ibx->over->recent; is(scalar(@$msgs), 2, 'new message added OK'); } { my $patch = 't/data/0001.patch'; open my $fh, '<', $patch or die "failed to open $patch: $!\n"; $rdr->{0} = \(do { local $/; <$fh> }); ok(run_script(['-mda'], undef, $rdr), 'mda delivered a patch'); my $post = $ibx->search->reopen->mset('dfpost:6e006fd7'); is($post->size, 1, 'got one result for dfpost'); my $pre = $ibx->search->mset('dfpre:090d998'); is($pre->size, 1, 'got one result for dfpre'); $pre = $ibx->search->mset_to_smsg($ibx, $pre); $post = $ibx->search->mset_to_smsg($ibx, $post); is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases'); # git patch-id --stable search->mset("patchid:$patchid"); is($mset->size, 1, 'patchid search works'); } done_testing(); public-inbox-1.9.0/t/v2mirror.t000066400000000000000000000333551430031475700163430ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use File::Path qw(remove_tree make_path); use Cwd qw(abs_path); use Carp (); use PublicInbox::Spawn qw(which); require_git(2.6); require_cmd('curl'); local $ENV{HOME} = abs_path('t'); use IO::Uncompress::Gunzip qw(gunzip $GunzipError); # Integration tests for HTTP cloning + mirroring require_mods(qw(Plack::Util Plack::Builder HTTP::Date HTTP::Status Search::Xapian DBD::SQLite)); use_ok 'PublicInbox::V2Writable'; use PublicInbox::InboxWritable; use PublicInbox::Eml; use PublicInbox::Config; # FIXME: too much setup my ($tmpdir, $for_destroy) = tmpdir(); my $pi_config = "$tmpdir/config"; { open my $fh, '>', $pi_config or die "open($pi_config): $!"; print $fh <<"" or die "print $pi_config: $!"; [publicinbox "v2"] ; using "mainrepo" rather than "inboxdir" for v1.1.0-pre1 WWW compat below mainrepo = $tmpdir/in address = test\@example.com close $fh or die "close($pi_config): $!"; } local $ENV{PI_CONFIG} = $pi_config; my $cfg = PublicInbox::Config->new($pi_config); my $ibx = $cfg->lookup('test@example.com'); ok($ibx, 'inbox found'); $ibx->{version} = 2; $ibx->{-no_fsync} = 1; my $v2w = PublicInbox::V2Writable->new($ibx, 1); ok $v2w, 'v2w loaded'; $v2w->{parallel} = 0; my $mime = PublicInbox::Eml->new(<<''); From: Me To: You Subject: a Date: Thu, 01 Jan 1970 00:00:00 +0000 my $old_rotate_bytes = $v2w->{rotate_bytes}; $v2w->{rotate_bytes} = 500; # force rotating for my $i (1..9) { $mime->header_set('Message-ID', "<$i\@example.com>"); $mime->header_set('Subject', "subject = $i"); ok($v2w->add($mime), "add msg $i OK"); } my $epoch_max = $v2w->{epoch_max}; ok($epoch_max > 0, "multiple epochs"); $v2w->done; { my $smsg = $ibx->over->get_art(1); like($smsg->{lines}, qr/\A[0-9]+\z/, 'lines is a digit'); like($smsg->{bytes}, qr/\A[0-9]+\z/, 'bytes is a digit'); } $ibx->cleanup; local $ENV{TEST_IPV4_ONLY} = 1; # plackup (below) doesn't do IPv6 my $rdr = { 3 => tcp_server() }; my @cmd = ('-httpd', '-W0', "--stdout=$tmpdir/out", "--stderr=$tmpdir/err"); my $td = start_script(\@cmd, undef, $rdr); my ($host, $port) = tcp_host_port(delete $rdr->{3}); @cmd = (qw(-clone -q), "http://$host:$port/v2/", "$tmpdir/m"); run_script(\@cmd) or xbail '-clone'; for my $i (0..$epoch_max) { ok(-d "$tmpdir/m/git/$i.git", "epoch $i cloned"); } @cmd = ("-init", '-j1', '-V2', 'm', "$tmpdir/m", 'http://example.com/m', 'alt@example.com'); ok(run_script(\@cmd), 'initialized public-inbox -V2'); my @shards = glob("$tmpdir/m/xap*/?"); is(scalar(@shards), 1, 'got a single shard on init'); ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'indexed'); my $mibx = { inboxdir => "$tmpdir/m", address => 'alt@example.com' }; $mibx = PublicInbox::Inbox->new($mibx); is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax'); for my $i (10..15) { $mime->header_set('Message-ID', "<$i\@example.com>"); $mime->header_set('Subject', "subject = $i"); ok($v2w->add($mime), "add msg $i OK"); } $v2w->done; $ibx->cleanup; my @new_epochs; my $fetch_each_epoch = sub { my %before = map { $_ => 1 } glob("$tmpdir/m/git/*"); run_script([qw(-fetch --exit-code -q)], undef, {-C => "$tmpdir/m"}) or xbail('-fetch fail ', [ xqx([which('find'), "$tmpdir/m", qw(-type f -ls) ]) ], Carp::longmess()); is($?, 0, '--exit-code 0 after fetch updated'); my @after = grep { !$before{$_} } glob("$tmpdir/m/git/*"); push @new_epochs, @after; }; $fetch_each_epoch->(); my $mset = $mibx->search->reopen->mset('m:15@example.com'); is(scalar($mset->items), 0, 'new message not found in mirror, yet'); ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'index updated'); is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax'); $mset = $mibx->search->reopen->mset('m:15@example.com'); is(scalar($mset->items), 1, 'found message in mirror'); # purge: $mime->header_set('Message-ID', '<10@example.com>'); $mime->header_set('Subject', 'subject = 10'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; ok($v2w->purge($mime), 'purge a message'); my $warn = join('', @warn); like($warn, qr/purge rewriting/); my @subj = ($warn =~ m/^# subject .*$/mg); is_deeply(\@subj, ["# subject = 10"], "only rewrote one"); } $v2w->done; my $msgs = $mibx->over->get_thread('10@example.com'); my $to_purge = $msgs->[0]->{blob}; like($to_purge, qr/\A[a-f0-9]{40,}\z/, 'read blob to be purged'); $mset = $ibx->search->reopen->mset('m:10@example.com'); is(scalar($mset->items), 0, 'purged message gone from origin'); $fetch_each_epoch->(); { $ibx->cleanup; PublicInbox::InboxWritable::cleanup($mibx); $v2w->done; my $cmd = [ qw(-index --prune -j0), "$tmpdir/m" ]; my ($out, $err) = ('', ''); my $opt = { 1 => \$out, 2 => \$err }; ok(run_script($cmd, undef, $opt), '-index --prune'); like($err, qr/discontiguous range/, 'warned about discontiguous range'); unlike($err, qr/fatal/, 'no scary fatal error shown'); } $mset = $mibx->search->reopen->mset('m:10@example.com'); is(scalar($mset->items), 0, 'purged message not found in mirror'); is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'minmax still synced'); for my $i ((1..9),(11..15)) { $mset = $mibx->search->mset("m:$i\@example.com"); is(scalar($mset->items), 1, "$i\@example.com remains visible"); } is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; $v2w->index_sync; is_deeply(\@warn, [], 'no warnings from index_sync after purge'); } # deletes happen in a different fetch window { $mset = $mibx->search->reopen->mset('m:1@example.com'); is(scalar($mset->items), 1, '1@example.com visible in mirror'); $mime->header_set('Message-ID', '<1@example.com>'); $mime->header_set('Subject', 'subject = 1'); ok($v2w->remove($mime), 'removed <1@example.com> from source'); $v2w->done; $ibx->cleanup; $fetch_each_epoch->(); PublicInbox::InboxWritable::cleanup($mibx); my $cmd = [ qw(-index -j0), "$tmpdir/m" ]; my ($out, $err) = ('', ''); my $opt = { 1 => \$out, 2 => \$err }; ok(run_script($cmd, undef, $opt), 'index ran'); is($err, '', 'no errors reported by index'); $mset = $mibx->search->reopen->mset('m:1@example.com'); is(scalar($mset->items), 0, '1@example.com no longer visible in mirror'); } if ('sequential-shard') { $mset = $mibx->search->mset('m:15@example.com'); is(scalar($mset->items), 1, 'large message not indexed'); remove_tree(glob("$tmpdir/m/xap*"), glob("$tmpdir/m/msgmap.*")); my $cmd = [ qw(-index -j9 --sequential-shard), "$tmpdir/m" ]; ok(run_script($cmd), '--sequential-shard works'); my @shards = glob("$tmpdir/m/xap*/?"); is(scalar(@shards), 8, 'got expected shard count'); PublicInbox::InboxWritable::cleanup($mibx); $mset = $mibx->search->mset('m:15@example.com'); is(scalar($mset->items), 1, 'search works after --sequential-shard'); } if ('max size') { $mime->header_set('Message-ID', '<2big@a>'); my $max = '2k'; $mime->body_str_set("z\n" x 1024); ok($v2w->add($mime), "add big message"); $v2w->done; $ibx->cleanup; $fetch_each_epoch->(); PublicInbox::InboxWritable::cleanup($mibx); my $cmd = [qw(-index -j0), "$tmpdir/m", "--max-size=$max" ]; my $opt = { 2 => \(my $err) }; ok(run_script($cmd, undef, $opt), 'indexed with --max-size'); like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message'); $mset = $mibx->search->reopen->mset('m:2big@a'); is(scalar($mset->items), 0, 'large message not indexed'); { open my $fh, '>>', $pi_config or die; print $fh <search->reopen->mset('m:2big@a'); is(scalar($mset->items), 0, 'large message not re-indexed'); } ok(scalar(@new_epochs), 'new epochs were created and fetched'); for my $d (@new_epochs) { is(xqx(['git', "--git-dir=$d", 'config', qw(include.path)]), "../../all.git/config\n", 'include.path set'); } if ('test read-only epoch dirs') { my @git = ('git', "--git-dir=$new_epochs[0]"); my $get_objs = [@git, qw(cat-file --buffer --batch-check --batch-all-objects)]; my $before = [sort xqx($get_objs)]; remove_tree(map { "$new_epochs[0]/$_" } qw(objects refs/heads)); chmod(0555, $new_epochs[0]) or xbail "chmod: $!"; # force a refetch unlink("$tmpdir/m/manifest.js.gz") or xbail "unlink: $!"; run_script([qw(-fetch -q)], undef, {-C => "$tmpdir/m"}) or xbail '-fetch failed'; ok(!-d "$new_epochs[0]/objects", 'no objects after fetch to R/O dir'); chmod(0755, $new_epochs[0]) or xbail "chmod: $!"; mkdir("$new_epochs[0]/objects") or xbail "mkdir: $!"; mkdir("$new_epochs[0]/refs/heads") or xbail "mkdir: $!"; my $err = ''; run_script([qw(-fetch -q)], undef, {-C => "$tmpdir/m", 2 => \$err}) or xbail '-fetch failed '.$err; is_deeply([ sort xqx($get_objs) ], $before, 'fetch restored objects once GIT_DIR became writable'); } { my $dst = "$tmpdir/partial"; run_script([qw(-clone -q --epoch=~0), "http://$host:$port/v2/", $dst]); is($?, 0, 'no error from partial clone'); my @g = glob("$dst/git/*.git"); my @w = grep { -w $_ } @g; my @r = grep { ! -w $_ } @g; if ($> == 0) { @w = grep { (stat($_))[2] & 0200 } @g; @r = grep { !((stat($_))[2] & 0200) } @g; } is(scalar(@w), 1, 'one writable directory'); my ($w) = ($w[0] =~ m!/([0-9]+)\.git\z!); is((grep { m!/([0-9]+)\.git\z! or xbail "no digit in $_"; $w > ($1 + 0) } @r), scalar(@r), 'writable epoch # exceeds read-only ones'); run_script([qw(-fetch -q)], undef, { -C => $dst }); is($?, 0, 'no error from partial fetch'); remove_tree($dst); run_script([qw(-clone -q --epoch=~1..), "http://$host:$port/v2/", $dst]); my @g2 = glob("$dst/git/*.git") ; is_deeply(\@g2, \@g, 'cloned again'); is(scalar(grep { (stat($_))[2] & 0200 } @g2), scalar(@w) + 1, 'got one more cloned epoch'); # make 0.git writable and fetch into it, relies on culled manifest chmod(0755, $g2[0]) or xbail "chmod: $!"; my @before = glob("$g2[0]/objects/*/*"); run_script([qw(-fetch -q)], undef, { -C => $dst }); is($?, 0, 'no error from partial fetch'); my @after = glob("$g2[0]/objects/*/*"); ok(scalar(@before) < scalar(@after), 'fetched after chmod 0755 0.git'); # ensure culled manifest is maintained after fetch gunzip("$dst/manifest.js.gz" => \(my $m), MultiStream => 1) or xbail "gunzip: $GunzipError"; $m = PublicInbox::Config->json->decode($m); for my $k (keys %$m) { # /$name/git/$N.git my ($nr) = ($k =~ m!/git/([0-9]+)\.git\z!); ok(-w "$dst/git/$nr.git", "writable $nr.git in manifest"); } for my $ro (grep { !-w $_ } @g2) { my ($nr) = ($ro =~ m!/git/([0-9]+)\.git\z!); is(grep(m!/git/$nr\.git\z!, keys %$m), 0, "read-only $nr.git not in manifest") or xbail([sort keys %$m]); } } my $err = ''; my $oldrev = '0b3e19584c90d958a723ac2d3dec3f84f5513688~1'; # 3e0e596105198cfa (wwwlisting: allow hiding entries from manifest, 2019-06-09) $oldrev = xqx([qw(git rev-parse), $oldrev], undef, { 2 => \$err }); SKIP: { skip("no detected public-inbox GIT_DIR ($err)", 1) if $?; require_mods('Email::MIME', 1); # for legacy revision # using plackup to test old PublicInbox::WWW since -httpd from # back then relied on some packages we no longer depend on my $plackup = which('plackup') or skip('no plackup in path', 1); require PublicInbox::Lock; chomp $oldrev; my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!); my $wt = "t/data-gen/$base.pre-manifest-$oldrev"; my $lk = bless { lock_path => __FILE__ }, 'PublicInbox::Lock'; $lk->lock_acquire; my $psgi = "$wt/app.psgi"; if (!-f $psgi) { # checkout a pre-manifest.js.gz version my $t = File::Temp->new(TEMPLATE => 'g-XXXX', TMPDIR => 1); my $env = { GIT_INDEX_FILE => $t->filename }; xsys([qw(git read-tree), $oldrev], $env) and xbail 'read-tree'; xsys([qw(git checkout-index -a), "--prefix=$wt/"], $env) and xbail 'checkout-index'; my $f = "$wt/app.psgi.tmp.$$"; open my $fh, '>', $f or xbail $!; print $fh <<'EOM' or xbail $!; use Plack::Builder; use PublicInbox::WWW; my $www = PublicInbox::WWW->new; builder { enable 'Head'; sub { $www->call(@_) } } EOM close $fh or xbail $!; rename($f, $psgi) or xbail $!; } $lk->lock_release; $rdr->{run_mode} = 0; $rdr->{-C} = $wt; my $cmd = [$plackup, qw(-Enone -Ilib), "--host=$host", "--port=$port"]; $td->join('TERM'); open $rdr->{2}, '>>', "$tmpdir/plackup.err.log" or xbail "open: $!"; open $rdr->{1}, '>>&', $rdr->{2} or xbail "open: $!"; my $env = { PERL5LIB => 'lib', PERL_INLINE_DIRECTORY => undef }; $td = start_script($cmd, $env, $rdr); # wait for plackup socket()+bind()+listen() my %opt = ( Proto => 'tcp', Type => Socket::SOCK_STREAM(), PeerAddr => "$host:$port" ); for (0..50) { tick(); last if IO::Socket::INET->new(%opt); } my $dst = "$tmpdir/scrape"; @cmd = (qw(-clone -q), "http://$host:$port/v2", $dst); run_script(\@cmd, undef, { 2 => \($err = '') }); is($?, 0, 'scraping clone on old PublicInbox::WWW') or diag $err; my @g_all = glob("$dst/git/*.git"); ok(scalar(@g_all) > 1, 'cloned multiple epochs'); remove_tree($dst); @cmd = (qw(-clone -q --epoch=~0), "http://$host:$port/v2", $dst); run_script(\@cmd, undef, { 2 => \($err = '') }); is($?, 0, 'partial scraping clone on old PublicInbox::WWW'); my @g_last = grep { (stat($_))[2] & 0200 } glob("$dst/git/*.git"); is_deeply(\@g_last, [ $g_all[-1] ], 'partial clone of ~0 worked'); chmod(0755, $g_all[0]) or xbail "chmod $!"; my @before = glob("$g_all[0]/objects/*/*"); run_script([qw(-fetch -v)], undef, { -C => $dst, 2 => \($err = '') }); is($?, 0, 'scraping fetch on old PublicInbox::WWW') or diag $err; my @after = glob("$g_all[0]/objects/*/*"); ok(scalar(@before) < scalar(@after), 'fetched 0.git after enabling write-bit'); $td->join('TERM'); } done_testing; public-inbox-1.9.0/t/v2reindex.t000066400000000000000000000431461430031475700164660ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::ContentHash qw(content_digest); use File::Path qw(remove_tree); require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::V2Writable'; use_ok 'PublicInbox::OverIdx'; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx_config = { inboxdir => $inboxdir, name => 'test-v2writable', version => 2, -primary_address => 'test@example.com', indexlevel => 'full', -no_fsync => 1, }; my $agpl = do { open my $fh, '<', 'COPYING' or die "can't open COPYING: $!"; local $/; <$fh>; }; my $phrase = q("defending all users' freedom"); my $mime = PublicInbox::Eml->new(<<'EOF'.$agpl); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 EOF my $minmax; my $msgmap; my ($mark1, $mark2, $mark3, $mark4); { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, {nproc => 1}); my $im0 = $im->importer(0); foreach my $i (1..10) { $mime->header_set('Message-Id', "<$i\@example.com>"); ok($im->add($mime), "message $i added"); if ($i == 4) { $mark1 = $im0->get_mark($im0->{tip}); $im->remove($mime); $mark2 = $im0->get_mark($im0->{tip}); } } if ('test remove later') { $mark3 = $im0->get_mark($im0->{tip}); $mime->header_set('Message-Id', "<5\@example.com>"); $im->remove($mime); $mark4 = $im0->get_mark($im0->{tip}); } $im->done; $minmax = [ $ibx->mm->minmax ]; ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = @$minmax; $msgmap = $ibx->mm->msg_range(\$min, $max); is_deeply($msgmap, [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected'); } { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, 1); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing'); $im->done; delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } my $xap = "$inboxdir/xap".PublicInbox::Search::SCHEMA_VERSION(); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed'); { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, 1); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, 1); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is(scalar(@warn), 0, 'no warnings from reindexing'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } my %sizes; ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, 1); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my $mset = $ibx->search->mset($phrase); isnt($mset->size, 0, "phrase search succeeds on indexlevel=full"); for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'medium'; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); if (0) { # not sure why, but Xapian seems to fallback to terms and # phrase searches still work delete $ibx->{search}; my $mset = $ibx->search->mset($phrase); is($mset->size, 0, 'phrase search does not work on medium'); } my $words = $phrase; $words =~ tr/"'//d; my $mset = $ibx->search->mset($words); isnt($mset->size, 0, "normal search works on indexlevel=medium"); for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } ok($sizes{full} > $sizes{medium}, 'medium is smaller than full'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'basic'; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); isnt($ibx->search, 'no search for basic'); for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } ok($sizes{medium} > $sizes{basic}, 'basic is smaller than medium'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } # An incremental indexing test ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark1 4 simple additions in the same index_sync $ibx->{ref_head} = $mark1; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 4, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [4, '4@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark2 A delete separated from an add in the same index_sync $ibx->{ref_head} = $mark2; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark3 adds following the delete at mark2 $ibx->{ref_head} = $mark3; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [5, '5@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark4 A delete of an older message $ibx->{ref_head} = $mark4; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } # Another incremental indexing test ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark2 an add and it's delete in the same index_sync $ibx->{ref_head} = $mark2; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark3 adds following the delete at mark2 $ibx->{ref_head} = $mark3; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [5, '5@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark4 A delete of an older message $ibx->{ref_head} = $mark4; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } my $check_rethread = sub { my ($desc) = @_; my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $f = $ibx->over->{dbh}->sqlite_db_filename; my $over = PublicInbox::OverIdx->new($f); my $dbh = $over->dbh; my $non_ghost_tids = sub { $dbh->selectall_arrayref(<<''); SELECT tid FROM over WHERE num > 0 ORDER BY tid ASC }; my $before = $non_ghost_tids->(); # mess up threading: my $tid = PublicInbox::OverIdx::get_counter($dbh, 'thread'); my $nr = $dbh->do('UPDATE over SET tid = ?', undef, $tid); diag "messing up all threads with tid=$tid"; my $v2w = PublicInbox::V2Writable->new($ibx); my @pr; my $pr = sub { push @pr, @_ }; $v2w->index_sync({reindex => 1, rethread => 1, -progress => $pr}); # diag "@pr"; # nobody cares is_deeply(\@warn, [], 'no warnings on reindex + rethread'); my @n = $dbh->selectrow_array(<(); ok($after->[0]->[0] > $before->[-1]->[0], 'all tids greater than before'); is(scalar @$after, scalar @$before, 'thread count unchanged'); }; $check_rethread->('no-monster'); # A real example from linux-renesas-soc on lore where a 3-headed monster # of a message has 3 sets of common headers. Another normal message # previously existed with a single Message-ID that conflicts with one # of the Message-IDs in the 3-headed monster. { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'medium'; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx); my $m3 = PublicInbox::Eml->new(<<'EOF'); Date: Tue, 24 May 2016 14:34:22 -0700 (PDT) Message-Id: <20160524.143422.552507610109476444.d@example.com> To: t@example.com Cc: c@example.com Subject: Re: [PATCH v2 2/2] uno From: In-Reply-To: <1463825855-7363-2-git-send-email-y@example.com> References: <1463825855-7363-1-git-send-email-y@example.com> <1463825855-7363-2-git-send-email-y@example.com> Date: Wed, 25 May 2016 10:01:51 +0900 From: h@example.com To: g@example.com Cc: m@example.com Subject: Re: [PATCH] dos Message-ID: <20160525010150.GD7292@example.com> References: <1463498133-23918-1-git-send-email-g+r@example.com> In-Reply-To: <1463498133-23918-1-git-send-email-g+r@example.com> From: s@example.com To: h@example.com Cc: m@example.com Subject: [PATCH 12/13] tres Date: Wed, 01 Jun 2016 01:32:35 +0300 Message-ID: <1923946.Jvi0TDUXFC@wasted.example.com> In-Reply-To: <13205049.n7pM8utpHF@wasted.example.com> References: <13205049.n7pM8utpHF@wasted.example.com> Somehow we got a message with 3 sets of headers into one message, could've been something broken on the archiver side. EOF my $m1 = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: t@example.com Subject: [PATCH 12/13] Date: Wed, 01 Jun 2016 01:32:35 +0300 Message-ID: <1923946.Jvi0TDUXFC@wasted.example.com> In-Reply-To: <13205049.n7pM8utpHF@wasted.example.com> References: <13205049.n7pM8utpHF@wasted.example.com> This is probably one of the original messages EOF $im->add($m1); $im->add($m3); $im->done; remove_tree($xap); eval { $im->index_sync() }; is($@, '', 'no error from initial indexing'); is_deeply(\@warn, [], 'no warnings from initial index'); eval { $im->index_sync({reindex=>1}) }; is($@, '', 'no error from reindexing after reused Message-ID (x3)'); is_deeply(\@warn, [], 'no warnings on reindex'); my %uniq; for my $s (qw(uno dos tres)) { my $mset = $ibx->search->mset("s:$s"); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar(@$msgs), 1, "only one result for `$s'"); $uniq{$msgs->[0]->{num}}++; } is_deeply([values %uniq], [3], 'search on different subjects'); } # XXX: not deterministic when dealing with ambiguous messages, oh well $check_rethread->('3-headed-monster once'); $check_rethread->('3-headed-monster twice'); my $rdr = { 2 => \(my $err = '') }; my $env = { PI_CONFIG => '/dev/null' }; ok(run_script([qw(-index --reindex --xapian-only), $inboxdir], $env, $rdr), '--xapian-only works'); is($err, '', 'no errors from --xapian-only'); undef $for_destroy; SKIP: { skip 'only testing lsof(8) output on Linux', 1 if $^O ne 'linux'; my $lsof = require_cmd('lsof', 1) or skip 'no lsof in PATH', 1; my $rdr = { 2 => \(my $null_err) }; my @d = grep(m!/xap[0-9]+/!, xqx([$lsof, '-p', $$], undef, $rdr)); is_deeply(\@d, [], 'no deleted index files') or diag explain(\@d); } done_testing(); public-inbox-1.9.0/t/v2writable.t000066400000000000000000000254151430031475700166400ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::ContentHash qw(content_digest content_hash); use PublicInbox::TestCommon; use Cwd qw(abs_path); require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); local $ENV{HOME} = abs_path('t'); use_ok 'PublicInbox::V2Writable'; umask 007; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx = { inboxdir => $inboxdir, name => 'test-v2writable', version => 2, -no_fsync => 1, -primary_address => 'test@example.com', }; $ibx = PublicInbox::Inbox->new($ibx); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $im = PublicInbox::V2Writable->new($ibx, {nproc => 1}); is($im->{shards}, 1, 'one shard when forced'); ok($im->add($mime), 'ordinary message added'); foreach my $f ("$inboxdir/msgmap.sqlite3", glob("$inboxdir/xap*/*"), glob("$inboxdir/xap*/*/*")) { my @st = stat($f); my ($bn) = (split(m!/!, $f))[-1]; is($st[2] & 07777, -f _ ? 0660 : 0770, "default sharedRepository respected for $bn"); } my $git0; if ('ensure git configs are correct') { my @cmd = (qw(git config), "--file=$inboxdir/all.git/config", qw(core.sharedRepository 0644)); is(xsys(@cmd), 0, "set sharedRepository in all.git"); $git0 = PublicInbox::Git->new("$inboxdir/git/0.git"); chomp(my $v = $git0->qx(qw(config core.sharedRepository))); is($v, '0644', 'child repo inherited core.sharedRepository'); chomp($v = $git0->qx(qw(config --bool repack.writeBitmaps))); is($v, 'true', 'child repo inherited repack.writeBitmaps'); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; is($im->add($mime), undef, 'obvious duplicate rejected'); is(scalar(@warn), 0, 'no warning about resent message'); @warn = (); $mime->header_set('Message-Id', '', ''); is($im->add($mime), undef, 'secondary MID ignored if first matches'); my $sec = PublicInbox::Eml->new($mime->as_string); $sec->header_set('Date'); $sec->header_set('Message-Id', '', ''); ok($im->add($sec), 'secondary MID used if data is different'); like(join(' ', @warn), qr/mismatched/, 'warned about mismatch'); like(join(' ', @warn), qr/alternative/, 'warned about alternative'); is_deeply([ '', '' ], [ $sec->header_obj->header_raw('Message-Id') ], 'no new Message-Id added'); my $sane_mid = qr/\A<[\w\-\.]+\@\w+>\z/; @warn = (); $mime->header_set('Message-Id', ''); $mime->body_set('different'); ok($im->add($mime), 'reused mid ok'); like(join(' ', @warn), qr/reused/, 'warned about reused MID'); my @mids = $mime->header_obj->header_raw('Message-Id'); is($mids[0], '', 'original mid not changed'); like($mids[1], $sane_mid, 'new MID added'); is(scalar(@mids), 2, 'only one new MID added'); @warn = (); $mime->header_set('Message-Id', ''); $mime->body_set('this one needs a random mid'); my $hdr = $mime->header_obj; my $gen = PublicInbox::Import::digest2mid(content_digest($mime), $hdr); unlike($gen, qr![\+/=]!, 'no URL-unfriendly chars in Message-Id'); my $fake = PublicInbox::Eml->new($mime->as_string); $fake->header_set('Message-Id', "<$gen>"); ok($im->add($fake), 'fake added easily'); is_deeply(\@warn, [], 'no warnings from a faker'); ok($im->add($mime), 'random MID made'); like(join(' ', @warn), qr/using random/, 'warned about using random'); @mids = $mime->header_obj->header_raw('Message-Id'); is($mids[0], '', 'original mid not changed'); like($mids[1], $sane_mid, 'new MID added'); is(scalar(@mids), 2, 'only one new MID added'); @warn = (); $mime->header_set('Message-Id'); ok($im->add($mime), 'random MID made for MID free message'); @mids = $mime->header_obj->header_raw('Message-Id'); like($mids[0], $sane_mid, 'mid was generated'); is(scalar(@mids), 1, 'new generated'); @warn = (); $mime->header_set('Message-Id', ''); ok($im->add($mime), 'message added with space in Message-Id'); is_deeply([], \@warn); } { $mime->header_set('Message-Id', '', ''); $mime->header_set('X-Alt-Message-Id', ''); $mime->header_set('References', ''); ok($im->add($mime), 'message with multiple Message-ID'); $im->done; my $total = $ibx->over->dbh->selectrow_array(<<''); SELECT COUNT(*) FROM over WHERE num > 0 is($ibx->mm->num_highwater, $total, 'got expected highwater value'); my $mset1 = $ibx->search->reopen->mset('m:abcde@1'); is($mset1->size, 1, 'message found by first MID'); my $mset2 = $ibx->search->mset('m:abcde@2'); is($mset2->size, 1, 'message found by second MID'); is((($mset1->items)[0])->get_docid, (($mset2->items)[0])->get_docid, 'same document') if ($mset1->size); my $alt = $ibx->search->mset('m:alt-id-for-nntp'); is($alt->size, 1, 'message found by alt MID (NNTP)'); is((($alt->items)[0])->get_docid, (($mset1->items)[0])->get_docid, 'same document') if ($mset1->size); $mime->header_set('X-Alt-Message-Id'); my %uniq; for my $mid (qw(abcde@1 abcde@2 alt-id-for-nntp)) { my $msgs = $ibx->over->get_thread($mid); my $key = join(' ', sort(map { $_->{num} } @$msgs)); $uniq{$key}++; } is(scalar(keys(%uniq)), 1, 'all alt Message-ID queries give same smsg'); is_deeply([values(%uniq)], [3], '3 queries, 3 results'); } { use Net::NNTP; my $err = "$inboxdir/stderr.log"; my $out = "$inboxdir/stdout.log"; my $group = 'inbox.comp.test.v2writable'; my $pi_config = "$inboxdir/pi_config"; open my $fh, '>', $pi_config or die "open: $!\n"; print $fh < $pi_config }; my $td = start_script($cmd, $env, { 3 => $sock }); my $host_port = tcp_host_port($sock); my $n = Net::NNTP->new($host_port); $n->group($group); my $x = $n->xover('1-'); my %uniq; foreach my $num (sort { $a <=> $b } keys %$x) { my $mid = $x->{$num}->[3]; is($uniq{$mid}++, 0, "MID for $num is unique in XOVER"); is_deeply($n->xhdr('Message-ID', $num), { $num => $mid }, "XHDR lookup OK on num $num"); # FIXME PublicInbox::NNTP (server) doesn't handle spaces in # Message-ID, but neither does Net::NNTP (client) next if $mid =~ / /; is_deeply($n->xhdr('Message-ID', $mid), { $mid => $mid }, "XHDR lookup OK on MID $mid ($num)"); } my %nn; foreach my $mid (@{$n->newnews(0, $group)}) { is($nn{$mid}++, 0, "MID is unique in NEWNEWS"); } is_deeply([sort keys %nn], [sort keys %uniq]); my %lg; foreach my $num (@{$n->listgroup($group)}) { is($lg{$num}++, 0, "num is unique in LISTGROUP"); } is_deeply([sort keys %lg], [sort keys %$x], 'XOVER and LISTGROUPS return the same article numbers'); my $xref = $n->xhdr('Xref', '1-'); is_deeply([sort keys %lg], [sort keys %$xref], 'Xref range OK'); my $mids = $n->xhdr('Message-ID', '1-'); is_deeply([sort keys %lg], [sort keys %$xref], 'Message-ID range OK'); my $rover = $n->xrover('1-'); is_deeply([sort keys %lg], [sort keys %$rover], 'XROVER range OK'); }; { my @log = qw(log --no-decorate --no-abbrev --no-notes --no-color); my @before = $git0->qx(@log, qw(--pretty=oneline)); my $before = $git0->qx(@log, qw(--pretty=raw --raw -r)); $im = PublicInbox::V2Writable->new($ibx, {nproc => 2}); is($im->{shards}, 1, 'detected single shard from previous'); my ($mark, $rm_mime, $smsg) = $im->remove($mime, 'test removal'); is(content_hash($rm_mime), content_hash($mime), 'removed object returned matches'); ok(defined($mark), 'mark set'); $im->done; my @after = $git0->qx(@log, qw(--pretty=oneline)); my $tip = shift @after; like($tip, qr/\A[a-f0-9]+ test removal\n\z/s, 'commit message propagated to git'); is_deeply(\@after, \@before, 'only one commit written to git'); my $mid = $smsg->{mid}; is($ibx->mm->num_for($mid), undef, 'no longer in Msgmap by mid'); my $num = $smsg->{num}; like($num, qr/\A\d+\z/, 'numeric number in return message'); is($ibx->mm->mid_for($num), undef, 'no longer in Msgmap by num'); my $mset = $ibx->search->reopen->mset('m:'.$mid); is($mset->size, 0, 'no longer found in Xapian'); my @log1 = (@log, qw(-1 --pretty=raw --raw -r --no-renames)); is($ibx->over->get_art($num), undef, 'removal propagated to Over DB'); my $after = $git0->qx(@log1); if ($after =~ m!( [a-f0-9]+ )A\td$!m) { my $oid = $1; ok(index($before, $oid) > 0, 'no new blob introduced'); } else { fail('failed to extract blob from log output'); } is($im->remove($mime, 'test removal'), undef, 'remove is idempotent'); $im->done; is($git0->qx(@log1), $after, 'no git history made with idempotent remove'); eval { $im->done }; ok(!$@, '->done is idempotent'); } { ok($im->add($mime), 'add message to be purged'); local $SIG{__WARN__} = sub {}; ok(my $cmt = $im->purge($mime), 'purged message'); like($cmt->[0], qr/\A[a-f0-9]{40,}\z/, 'purge returned current commit'); $im->done; # again is($im->purge($mime), undef, 'no-op returns undef'); } { my $x = 'x'x250; my $y = 'y'x250; local $SIG{__WARN__} = sub {}; $mime->header_set('Subject', 'long mid'); $mime->header_set('Message-ID', "<$x>"); ok($im->add($mime), 'add excessively long Message-ID'); $mime->header_set('Message-ID', "<$y>"); $mime->header_set('References', "<$x>"); ok($im->add($mime), 'add excessively long References'); $im->done; my $msgs = $ibx->over->get_thread('x'x244); is(2, scalar(@$msgs), 'got both messages'); is($msgs->[0]->{mid}, 'x'x244, 'stored truncated mid'); is($msgs->[1]->{references}, '<'.('x'x244).'>', 'stored truncated ref'); is($msgs->[1]->{mid}, 'y'x244, 'stored truncated mid(2)'); } my $tmp = { inboxdir => "$inboxdir/non-existent/subdir", name => 'nope', version => 2, -primary_address => 'test@example.com', }; eval { my $nope = PublicInbox::V2Writable->new($tmp); $nope->add($mime); }; ok($@, 'V2Writable fails on non-existent dir'); { my $v2w = PublicInbox::V2Writable->new($tmp, 1); ok($v2w, 'creat flag works'); $v2w->{parallel} = 0; $v2w->init_inbox(0); my $alt = "$tmp->{inboxdir}/all.git/objects/info/alternates"; open my $fh, '>>', $alt or die $!; print $fh "$inboxdir/all.git/objects\n" or die $!; chmod(0664, $fh) or die "fchmod: $!"; close $fh or die $!; open $fh, '<', $alt or die $!; my $before = do { local $/; <$fh> }; ok($v2w->{mg}->add_epoch(3), 'init a new epoch'); open $fh, '<', $alt or die $!; my $after = do { local $/; <$fh> }; ok(index($after, $before) > 0, 'old contents preserved after adding epoch'); like($after, qr!\A[^\n]+?/3\.git/objects\n!s, 'first line is newest epoch'); my $mode = (stat($alt))[2] & 07777; is($mode, 0664, sprintf('0%03o', $mode).' is 0664'); } done_testing(); public-inbox-1.9.0/t/view.t000066400000000000000000000020261430031475700155220ustar00rootroot00000000000000# Copyright (C) 2013-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; require_mods('Plack::Util'); use_ok 'PublicInbox::View'; # this only tests View.pm internals which are subject to change, # see t/plack.t for tests against the PSGI interface. my $cols = PublicInbox::View::COLS(); my @addr; until (length(join(', ', @addr)) > ($cols * 2)) { push @addr, '"l, f" '; my $n = int(rand(20)) + 1; push @addr, ('x'x$n).'@x'; } my $orig = join(', ', @addr); my $res = PublicInbox::View::fold_addresses($orig.''); isnt($res, $orig, 'folded result'); unlike($res, qr/l,\n\tf/s, '"last, first" no broken'); my @nospc = ($res, $orig); s/\s+//g for @nospc; is($nospc[0], $nospc[1], 'no addresses lost in translation'); my $tws = PublicInbox::View::fold_addresses($orig.' '); # (Email::Simple drops leading whitespace, but not trailing) $tws =~ s/ \z//; is($tws, $res, 'not thrown off by trailing whitespace'); done_testing(); public-inbox-1.9.0/t/watch_filter_rubylang.t000066400000000000000000000053611430031475700211330ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use PublicInbox::TestCommon; use Test::More; use PublicInbox::Eml; use PublicInbox::Config; require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::Watch'; use_ok 'PublicInbox::Emergency'; my ($tmpdir, $for_destroy) = tmpdir(); local $ENV{PI_CONFIG} = "$tmpdir/pi_config"; my @v = qw(V1); SKIP: { if (require_git(2.6, 1)) { use_ok 'PublicInbox::V2Writable'; push @v, 'V2'; } else { skip 'git 2.6+ needed for V2', 40; } } for my $v (@v) { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my $cfgpfx = "publicinbox.$v"; my $inboxdir = "$tmpdir/$v"; my $maildir = "$tmpdir/md-$v"; my $spamdir = "$tmpdir/spam-$v"; my $addr = "test-$v\@example.com"; my @cmd = ('-init', '-Lfull', "-$v", $v, $inboxdir, "http://example.com/$v", $addr); ok(run_script(\@cmd), 'public-inbox init'); PublicInbox::Emergency->new($spamdir); for my $i (1..15) { my $msg = < Date: Sat, 05 Jan 2019 04:19:17 +0000 something EOF PublicInbox::Emergency->new($maildir)->prepare(\$msg); } my $spam = < Date: Sat, 05 Jan 2019 04:19:17 +0000 spam EOF PublicInbox::Emergency->new($maildir)->prepare(\"$spam"); my $orig = <new(\$orig); my $ibx = $cfg->lookup_name($v); $ibx->{-no_fsync} = 1; ok($ibx, 'found inbox by name'); my $w = PublicInbox::Watch->new($cfg); for my $i (1..2) { $w->scan('full'); } # make sure all serials are searchable: for my $i (1..15) { my $mset = $ibx->search->mset("alerts:$i"); is($mset->size, 1, "got one result for alerts:$i"); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i"); } is($ibx->search->mset('b:spam')->size, 1, 'got spam message'); my $nr = unlink <$maildir/new/*>; is(16, $nr); { PublicInbox::Emergency->new($spamdir)->prepare(\$spam); my @new = glob("$spamdir/new/*"); my @p = split(m!/+!, $new[0]); ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S")); is(unlink($new[0]), 1); } $w->scan('full'); $cfg = PublicInbox::Config->new(\$orig); $ibx = $cfg->lookup_name($v); $ibx->{-no_fsync} = 1; is($ibx->search->reopen->mset('b:spam')->size, 0, 'spam removed'); is_deeply([], \@warn, 'no warnings'); } done_testing(); public-inbox-1.9.0/t/watch_imap.t000066400000000000000000000013371430031475700166700ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Config; # see t/imapd*.t for tests against a live IMAP server use_ok 'PublicInbox::Watch'; my $cfg = PublicInbox::Config->new(\<new($cfg); is($watch->{imap}->{'imap://example.com/INBOX.a'}->[0]->{name}, 'i', 'watched an inbox'); is($watch->{imap}->{'imap://example.com/INBOX.spam'}, 'watchspam', 'watched spam folder'); done_testing; public-inbox-1.9.0/t/watch_maildir.t000066400000000000000000000160201430031475700173560ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Eml; use Cwd; use PublicInbox::Config; use PublicInbox::TestCommon; use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/test.git"; my $maildir = "$tmpdir/md"; my $spamdir = "$tmpdir/spam"; use_ok 'PublicInbox::Watch'; use_ok 'PublicInbox::Emergency'; my $cfgpfx = "publicinbox.test"; my $addr = 'test-public@example.com'; my $default_branch = PublicInbox::Import::default_branch; PublicInbox::Import::init_bare($git_dir); my $msg = < Date: Sat, 18 Jun 2016 00:00:00 +0000 something EOF PublicInbox::Emergency->new($maildir)->prepare(\$msg); ok(POSIX::mkfifo("$maildir/cur/fifo", 0777), 'create FIFO to ensure we do not get stuck on it :P'); my $sem = PublicInbox::Emergency->new($spamdir); # create dirs { my @w; local $SIG{__WARN__} = sub { push @w, @_ }; my $cfg = PublicInbox::Config->new(\<new($cfg); is(scalar grep(/is a spam folder/, @w), 1, 'got warning about spam'); is_deeply($wm->{mdmap}, { "$spamdir/cur" => 'watchspam' }, 'only got the spam folder to watch'); } my $cfg_path = "$tmpdir/config"; { open my $fh, '>', $cfg_path or BAIL_OUT $!; print $fh <new($cfg_path); PublicInbox::Watch->new($cfg)->scan('full'); my $git = PublicInbox::Git->new($git_dir); my @list = $git->qx('rev-list', $default_branch); is(scalar @list, 1, 'one revision in rev-list'); my $write_spam = sub { is(scalar glob("$spamdir/new/*"), undef, 'no spam existing'); $sem->prepare(\$msg); $sem->commit; my @new = glob("$spamdir/new/*"); is(scalar @new, 1); my @p = split(m!/+!, $new[0]); ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S")); is(unlink($new[0]), 1); }; $write_spam->(); is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam'); PublicInbox::Watch->new($cfg)->scan('full'); @list = $git->qx('rev-list', $default_branch); is(scalar @list, 2, 'two revisions in rev-list'); @list = $git->qx('ls-tree', '-r', '--name-only', $default_branch); is(scalar @list, 0, 'tree is empty'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); # check with scrubbing { $msg .= qq(-- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo\@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::Watch->new($cfg)->scan('full'); @list = $git->qx('ls-tree', '-r', '--name-only', $default_branch); is(scalar @list, 1, 'tree has one file'); my $mref = $git->cat_file('HEAD:'.$list[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam'); $write_spam->(); PublicInbox::Watch->new($cfg)->scan('full'); @list = $git->qx('ls-tree', '-r', '--name-only', $default_branch); is(scalar @list, 0, 'tree is empty'); @list = $git->qx('rev-list', $default_branch); is(scalar @list, 4, 'four revisions in rev-list'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); } { my $fail_bin = getcwd()."/t/fail-bin"; ok(-x "$fail_bin/spamc", "mock spamc exists"); my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc ham mock local $ENV{PATH} = $fail_path; PublicInbox::Emergency->new($maildir)->prepare(\$msg); $cfg->{'publicinboxwatch.spamcheck'} = 'spamc'; { local $SIG{__WARN__} = sub {}; # quiet spam check warning PublicInbox::Watch->new($cfg)->scan('full'); } @list = $git->qx('ls-tree', '-r', '--name-only', $default_branch); is(scalar @list, 0, 'tree has no files spamc checked'); is(unlink(glob("$maildir/new/*")), 1); } { my $main_bin = getcwd()."/t/main-bin"; ok(-x "$main_bin/spamc", "mock spamc exists"); my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock local $ENV{PATH} = $main_path; PublicInbox::Emergency->new($maildir)->prepare(\$msg); $cfg->{'publicinboxwatch.spamcheck'} = 'spamc'; @list = $git->qx('ls-tree', '-r', '--name-only', $default_branch); PublicInbox::Watch->new($cfg)->scan('full'); @list = $git->qx('ls-tree', '-r', '--name-only', $default_branch); is(scalar @list, 1, 'tree has one file after spamc checked'); # XXX: workaround some weird caching/memoization in cat-file, # shouldn't be an issue in real-world use, though... $git = PublicInbox::Git->new($git_dir); my $mref = $git->cat_file($default_branch.':'.$list[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); } # end-to-end test which actually uses inotify/kevent { my $env = { PI_CONFIG => $cfg_path }; $git->cleanup; # n.b. --no-scan is only intended for testing atm my $wm = start_script([qw(-watch --no-scan)], $env); my $eml = eml_load('t/data/0001.patch'); $eml->header_set('Cc', $addr); my $em = PublicInbox::Emergency->new($maildir); $em->prepare(\($eml->as_string)); use_ok 'PublicInbox::InboxIdle'; use_ok 'PublicInbox::DS'; my $delivered = 0; my $cb = sub { my ($ibx) = @_; diag "message delivered to `$ibx->{name}'"; $delivered++; }; PublicInbox::DS->Reset; my $ii = PublicInbox::InboxIdle->new($cfg); my $obj = bless \$cb, 'PublicInbox::TestCommon::InboxWakeup'; $cfg->each_inbox(sub { $_[0]->subscribe_unlock('ident', $obj) }); PublicInbox::DS->SetPostLoopCallback(sub { $delivered == 0 }); # wait for -watch to setup inotify watches my $sleep = 1; if (eval { require Linux::Inotify2 } && -d "/proc/$wm->{pid}/fd") { my $end = time + 2; my (@ino, @ino_info); do { @ino = grep { (readlink($_)//'') =~ /\binotify\b/ } glob("/proc/$wm->{pid}/fd/*"); } until (@ino || time > $end || !tick); if (scalar(@ino) == 1) { my $ino_fd = (split('/', $ino[0]))[-1]; my $ino_fdinfo = "/proc/$wm->{pid}/fdinfo/$ino_fd"; while (time < $end && open(my $fh, '<', $ino_fdinfo)) { @ino_info = grep(/^inotify wd:/, <$fh>); last if @ino_info >= 3; tick; } $sleep = undef if @ino_info >= 3; } } if ($sleep) { diag "waiting ${sleep}s for -watch to start up"; sleep $sleep; } $em->commit; # wake -watch up diag 'waiting for -watch to import new message'; PublicInbox::DS::event_loop(); $wm->kill; $wm->join; $ii->close; PublicInbox::DS->Reset; my $head = $git->qx(qw(cat-file commit HEAD)); my $subj = $eml->header('Subject'); like($head, qr/^\Q$subj\E/sm, 'new commit made'); } sub is_maildir { my ($dir) = @_; PublicInbox::Watch::is_maildir($dir); } is(is_maildir('maildir:/hello//world'), '/hello/world', 'extra slash gone'); is(is_maildir('maildir:/hello/world/'), '/hello/world', 'trailing slash gone'); is(is_maildir('faildir:/hello/world/'), undef, 'non-maildir rejected'); done_testing; public-inbox-1.9.0/t/watch_maildir_v2.t000066400000000000000000000153431430031475700177740ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Eml; use Cwd; use PublicInbox::Config; use PublicInbox::TestCommon; use PublicInbox::Import; require_git(2.6); require_mods(qw(Search::Xapian DBD::SQLite)); require PublicInbox::V2Writable; my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = "$tmpdir/v2"; my $maildir = "$tmpdir/md"; my $spamdir = "$tmpdir/spam"; use_ok 'PublicInbox::Watch'; use_ok 'PublicInbox::Emergency'; my $cfgpfx = "publicinbox.test"; my $addr = 'test-public@example.com'; my @cmd = ('-init', '-V2', 'test', $inboxdir, 'http://example.com/v2list', $addr); local $ENV{PI_CONFIG} = "$tmpdir/pi_config"; ok(run_script(\@cmd), 'public-inbox init OK'); my $msg = < Date: Sat, 18 Jun 2016 00:00:00 +0000 something EOF PublicInbox::Emergency->new($maildir)->prepare(\$msg); ok(POSIX::mkfifo("$maildir/cur/fifo", 0777), 'create FIFO to ensure we do not get stuck on it :P'); my $sem = PublicInbox::Emergency->new($spamdir); # create dirs my $orig = <new(\$orig); my $ibx = $cfg->lookup_name('test'); ok($ibx, 'found inbox by name'); $ibx->{-no_fsync} = 1; PublicInbox::Watch->new($cfg)->scan('full'); my $total = scalar @{$ibx->over->recent}; is($total, 1, 'got one revision'); # my $git = PublicInbox::Git->new("$inboxdir/git/0.git"); # my @list = $git->qx(qw(rev-list refs/heads/master)); # is(scalar @list, 1, 'one revision in rev-list'); my $write_spam = sub { is(scalar glob("$spamdir/new/*"), undef, 'no spam existing'); $sem->prepare(\$msg); $sem->commit; my @new = glob("$spamdir/new/*"); is(scalar @new, 1); my @p = split(m!/+!, $new[0]); ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S")); is(unlink($new[0]), 1); }; $write_spam->(); is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam'); PublicInbox::Watch->new($cfg)->scan('full'); is_deeply($ibx->over->recent, [], 'deleted file'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); # check with scrubbing { $msg .= qq(-- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo\@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::Watch->new($cfg)->scan('full'); my $msgs = $ibx->over->recent; is(scalar(@$msgs), 1, 'got one file back'); my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam'); $write_spam->(); PublicInbox::Watch->new($cfg)->scan('full'); $msgs = $ibx->over->recent; is(scalar(@$msgs), 0, 'inbox is empty again'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); } { my $fail_bin = getcwd()."/t/fail-bin"; ok(-x "$fail_bin/spamc", "mock spamc exists"); my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc ham mock local $ENV{PATH} = $fail_path; PublicInbox::Emergency->new($maildir)->prepare(\$msg); $cfg->{'publicinboxwatch.spamcheck'} = 'spamc'; { local $SIG{__WARN__} = sub {}; # quiet spam check warning PublicInbox::Watch->new($cfg)->scan('full'); } my $msgs = $ibx->over->recent; is(scalar(@$msgs), 0, 'inbox is still empty'); is(unlink(glob("$maildir/new/*")), 1); } { my $main_bin = getcwd()."/t/main-bin"; ok(-x "$main_bin/spamc", "mock spamc exists"); my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock local $ENV{PATH} = $main_path; PublicInbox::Emergency->new($maildir)->prepare(\$msg); $cfg->{'publicinboxwatch.spamcheck'} = 'spamc'; PublicInbox::Watch->new($cfg)->scan('full'); my $msgs = $ibx->over->recent; is(scalar(@$msgs), 1, 'inbox has one mail after spamc OK-ed a message'); my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); delete $cfg->{'publicinboxwatch.spamcheck'}; } { my $patch = 't/data/0001.patch'; open my $fh, '<', $patch or die "failed to open $patch: $!\n"; $msg = do { local $/; <$fh> }; PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::Watch->new($cfg)->scan('full'); my $post = $ibx->search->reopen->mset('dfpost:6e006fd7'); is($post->size, 1, 'diff postimage found'); my $pre = $ibx->search->mset('dfpre:090d998b6c2c'); is($pre->size, 1, 'diff preimage found'); $pre = $ibx->search->mset_to_smsg($ibx, $pre); $post = $ibx->search->mset_to_smsg($ibx, $post); is(scalar(@$pre), 1, 'diff preimage found'); is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message'); } # multiple inboxes in the same maildir { my $v1repo = "$tmpdir/v1"; my $v1pfx = "publicinbox.v1"; my $v1addr = 'v1-public@example.com'; PublicInbox::Import::init_bare($v1repo); my $raw = <new(\$raw); my $both = < Date: Sat, 18 Jun 2016 00:00:00 +0000 both EOF PublicInbox::Emergency->new($maildir)->prepare(\$both); PublicInbox::Watch->new($cfg)->scan('full'); my $mset = $ibx->search->reopen->mset('m:both@b.com'); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); my $v1 = $cfg->lookup_name('v1'); my $msg = $v1->git->cat_file($msgs->[0]->{blob}); is($both, $$msg, 'got original message back from v1'); $msg = $ibx->git->cat_file($msgs->[0]->{blob}); is($both, $$msg, 'got original message back from v2'); } { my $want = <<'EOF'; From: List-Id: Message-ID: EOF my $do_not_want = <<'EOF'; From: List-Id: X-Mailing-List: no@example.com Message-ID: EOF my $raw = $orig."$cfgpfx.listid=i.want.you.to.want.me\n"; PublicInbox::Emergency->new($maildir)->prepare(\$want); PublicInbox::Emergency->new($maildir)->prepare(\$do_not_want); my $cfg = PublicInbox::Config->new(\$raw); PublicInbox::Watch->new($cfg)->scan('full'); $ibx = $cfg->lookup_name('test'); my $num = $ibx->mm->num_for('do.want@example.com'); ok(defined $num, 'List-ID matched for watch'); $num = $ibx->mm->num_for('do.not.want@example.com'); is($num, undef, 'unaccepted List-ID matched for watch'); $raw = $orig."$cfgpfx.watchheader=X-Mailing-List:no\@example.com\n"; $cfg = PublicInbox::Config->new(\$raw); PublicInbox::Watch->new($cfg)->scan('full'); $ibx = $cfg->lookup_name('test'); $num = $ibx->mm->num_for('do.not.want@example.com'); ok(defined $num, 'X-Mailing-List matched'); } done_testing; public-inbox-1.9.0/t/watch_multiple_headers.t000066400000000000000000000036701430031475700212720ustar00rootroot00000000000000# Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Config; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(Search::Xapian DBD::SQLite)); my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = "$tmpdir/v2"; my $maildir = "$tmpdir/md"; use_ok 'PublicInbox::Watch'; use_ok 'PublicInbox::Emergency'; my $cfgpfx = "publicinbox.test"; my $addr = 'test-public@example.com'; my @cmd = ('-init', '-V2', 'test', $inboxdir, 'http://example.com/list', $addr); local $ENV{PI_CONFIG} = "$tmpdir/pi_config"; ok(run_script(\@cmd), 'public-inbox init OK'); my $msg_to = < Date: Sat, 18 Apr 2020 00:00:00 +0000 content1 EOF my $msg_cc = < Date: Sat, 18 Apr 2020 00:01:00 +0000 content2 EOF my $msg_none = < Date: Sat, 18 Apr 2020 00:02:00 +0000 content3 EOF PublicInbox::Emergency->new($maildir)->prepare(\$msg_to); PublicInbox::Emergency->new($maildir)->prepare(\$msg_cc); PublicInbox::Emergency->new($maildir)->prepare(\$msg_none); my $raw = <new(\$raw); PublicInbox::Watch->new($cfg)->scan('full'); my $ibx = $cfg->lookup_name('test'); ok($ibx, 'found inbox by name'); my $num = $ibx->mm->num_for('to@a.com'); ok(defined $num, 'Matched for address in To:'); $num = $ibx->mm->num_for('cc@a.com'); ok(defined $num, 'Matched for address in Cc:'); $num = $ibx->mm->num_for('none@a.com'); is($num, undef, 'No match without address in To: or Cc:'); done_testing; public-inbox-1.9.0/t/www_altid.t000066400000000000000000000047441430031475700165620ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Config; use PublicInbox::Spawn qw(spawn); require_cmd('sqlite3'); require_mods(qw(DBD::SQLite HTTP::Request::Common Plack::Test URI::Escape Plack::Builder IO::Uncompress::Gunzip)); use_ok($_) for qw(Plack::Test HTTP::Request::Common); require_ok 'PublicInbox::Msgmap'; require_ok 'PublicInbox::AltId'; require_ok 'PublicInbox::WWW'; my ($tmpdir, $for_destroy) = tmpdir(); my $aid = 'xyz'; my $cfgpath; my $ibx = create_inbox 'test', indexlevel => 'basic', sub { my ($im, $ibx) = @_; $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; From: a@example.com Message-Id: EOF # $im->done; my $spec = "serial:$aid:file=blah.sqlite3"; my $altid = PublicInbox::AltId->new($ibx, $spec, 1); $altid->mm_alt->mid_set(1, 'a@example.com'); $cfgpath = "$ibx->{inboxdir}/cfg"; open my $fh, '>', $cfgpath or BAIL_OUT "open $cfgpath: $!"; print $fh <{inboxdir} address = $ibx->{-primary_address} altid = $spec url = http://example.com/test EOF close $fh or BAIL_OUT $!; }; $cfgpath //= "$ibx->{inboxdir}/cfg"; my $cfg = PublicInbox::Config->new($cfgpath); my $www = PublicInbox::WWW->new($cfg); my $cmpfile = "$tmpdir/cmp.sqlite3"; my $client = sub { my ($cb) = @_; my $res = $cb->(POST("/test/$aid.sql.gz")); is($res->code, 200, 'retrieved gzipped dump'); IO::Uncompress::Gunzip::gunzip(\($res->content) => \(my $buf)); pipe(my ($r, $w)) or die; my $cmd = ['sqlite3', $cmpfile]; my $pid = spawn($cmd, undef, { 0 => $r }); print $w $buf or die; close $w or die; is(waitpid($pid, 0), $pid, 'sqlite3 exited'); is($?, 0, 'sqlite3 loaded dump'); my $mm_cmp = PublicInbox::Msgmap->new_file($cmpfile); is($mm_cmp->mid_for(1), 'a@example.com', 'sqlite3 dump valid'); $mm_cmp = undef; unlink $cmpfile or die; }; test_psgi(sub { $www->call(@_) }, $client); SKIP: { require_mods(qw(Plack::Test::ExternalServer), 4); my $env = { PI_CONFIG => $cfgpath }; my $sock = tcp_server() or die; my ($out, $err) = map { "$tmpdir/std$_.log" } qw(out err); my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, $env, { 3 => $sock }); my ($h, $p) = tcp_host_port($sock); local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; Plack::Test::ExternalServer::test_psgi(client => $client); } done_testing; public-inbox-1.9.0/t/www_listing.t000066400000000000000000000150001430031475700171210ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # manifest.js.gz generation and grok-pull integration test use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Import; use IO::Uncompress::Gunzip qw(gunzip); require_mods(qw(json URI::Escape Plack::Builder Digest::SHA HTTP::Tiny)); require PublicInbox::WwwListing; require PublicInbox::ManifestJsGz; use PublicInbox::Config; my $json = PublicInbox::Config::json(); use_ok 'PublicInbox::Git'; my ($tmpdir, $for_destroy) = tmpdir(); my $bare = PublicInbox::Git->new("$tmpdir/bare.git"); PublicInbox::Import::init_bare($bare->{git_dir}); is($bare->manifest_entry, undef, 'empty repo has no manifest entry'); { my $fi_data = './t/git.fast-import-data'; open my $fh, '<', $fi_data or die "open $fi_data: $!"; my $env = { GIT_DIR => $bare->{git_dir} }; is(xsys([qw(git fast-import --quiet)], $env, { 0 => $fh }), 0, 'fast-import'); } like($bare->manifest_entry->{fingerprint}, qr/\A[a-f0-9]{40}\z/, 'got fingerprint with non-empty repo'); sub tiny_test { my ($json, $host, $port, $html) = @_; my ($tmp, $res); my $http = HTTP::Tiny->new; if ($html) { $res = $http->get("http://$host:$port/"); is($res->{status}, 200, 'got HTML listing'); like($res->{content}, qr!!si, 'listing looks like HTML'); $res = $http->get("http://$host:$port/", {'Accept-Encoding'=>'gzip'}); is($res->{status}, 200, 'got gzipped HTML listing'); gunzip(\(delete $res->{content}) => \$tmp); like($tmp, qr!!si, 'unzipped listing looks like HTML'); } $res = $http->get("http://$host:$port/manifest.js.gz"); is($res->{status}, 200, 'got manifest'); gunzip(\(delete $res->{content}) => \$tmp); unlike($tmp, qr/"modified":\s*"/, 'modified is an integer'); my $manifest = $json->decode($tmp); ok(my $clone = $manifest->{'/alt'}, '/alt in manifest'); is($clone->{owner}, "lorelei \x{100}", 'owner set'); is($clone->{reference}, '/bare', 'reference detected'); is($clone->{description}, "we're \x{100}ll clones", 'description read'); ok(my $bare = $manifest->{'/bare'}, '/bare in manifest'); is($bare->{description}, 'Unnamed repository', 'missing $GIT_DIR/description fallback'); like($bare->{fingerprint}, qr/\A[a-f0-9]{40}\z/, 'fingerprint'); is($clone->{fingerprint}, $bare->{fingerprint}, 'fingerprint matches'); is(HTTP::Date::time2str($bare->{modified}), $res->{headers}->{'last-modified'}, 'modified field and Last-Modified header match'); ok(my $v2epoch0 = $manifest->{'/v2/git/0.git'}, 'v2 epoch 0 appeared'); like($v2epoch0->{description}, qr/ \[epoch 0\]\z/, 'epoch 0 in description'); ok(my $v2epoch1 = $manifest->{'/v2/git/1.git'}, 'v2 epoch 1 appeared'); like($v2epoch1->{description}, qr/ \[epoch 1\]\z/, 'epoch 1 in description'); $res = $http->get("http://$host:$port/alt/description"); is($res->{content}, "we're \xc4\x80ll clones\n", 'UTF-8 description') or diag explain($res); } my $td; SKIP: { my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $alt = "$tmpdir/alt.git"; my $cfgfile = "$tmpdir/config"; my $v2 = "$tmpdir/v2"; my $sock = tcp_server(); my ($host, $port) = tcp_host_port($sock); my @clone = qw(git clone -q -s --bare); is(xsys(@clone, $bare->{git_dir}, $alt), 0, 'clone shared repo'); PublicInbox::Import::init_bare("$v2/all.git"); for my $i (0..2) { is(xsys(@clone, $alt, "$v2/git/$i.git"), 0, "clone epoch $i") } ok(open(my $fh, '>', "$v2/inbox.lock"), 'mock a v2 inbox'); open $fh, '>', "$alt/description" or xbail "open $alt/description $!"; print $fh "we're \xc4\x80ll clones\n" or xbail "print $!"; close $fh or xbail "write: $alt/description $!"; is(xsys('git', "--git-dir=$alt", qw(config gitweb.owner), "lorelei \xc4\x80"), 0, 'set gitweb user'); open $fh, '>', $cfgfile or xbail "open $cfgfile: $!"; $fh->autoflush(1); print $fh <<"" or xbail "print $!"; [publicinbox "bare"] inboxdir = $bare->{git_dir} url = http://$host/bare address = bare\@example.com [publicinbox "alt"] inboxdir = $alt url = http://$host/alt address = alt\@example.com [publicinbox "v2"] inboxdir = $v2 url = http://$host/v2 address = v2\@example.com my $env = { PI_CONFIG => $cfgfile }; my $cmd = [ '-httpd', '-W0', "--stdout=$out", "--stderr=$err" ]; $td = start_script($cmd, $env, { 3 => $sock }); # default publicinboxGrokManifest match=domain default tiny_test($json, $host, $port); undef $td; print $fh <<"" or xbail "print $!"; [publicinbox] wwwlisting = all close $fh or xbail "close $!"; $td = start_script($cmd, $env, { 3 => $sock }); tiny_test($json, $host, $port, 1); undef $sock; skip 'TEST_GROK unset', 12 unless $ENV{TEST_GROK}; my $grok_pull = require_cmd('grok-pull', 1) or skip('grok-pull not available', 12); my ($grok_version) = (xqx([$grok_pull, "--version"]) =~ /(\d+)\.(?:\d+)(?:\.(\d+))?/); $grok_version >= 2 or skip('grok-pull v2 or later not available', 12); my $grok_loglevel = $ENV{TEST_GROK_LOGLEVEL} // 'info'; ok(mkdir("$tmpdir/mirror"), 'prepare grok mirror dest'); my $tail = tail_f("$tmpdir/grok.log"); open $fh, '>', "$tmpdir/repos.conf" or xbail $!; print $fh <<"" or xbail $!; [core] toplevel = $tmpdir/mirror manifest = $tmpdir/local-manifest.js.gz log = $tmpdir/grok.log loglevel = $grok_loglevel [remote] site = http://$host:$port manifest = \${site}/manifest.js.gz [pull] [fsck] close $fh or xbail $!; xsys($grok_pull, '-c', "$tmpdir/repos.conf"); is($? >> 8, 0, 'grok-pull exit code as expected'); for (qw(alt bare v2/git/0.git v2/git/1.git v2/git/2.git)) { ok(-d "$tmpdir/mirror/$_", "grok-pull created $_"); } # support per-inbox manifests, handy for v2: # /$INBOX/v2/manifest.js.gz open $fh, '>', "$tmpdir/per-inbox.conf" or xbail $!; print $fh <<"" or xbail $!; [core] toplevel = $tmpdir/per-inbox manifest = $tmpdir/per-inbox-manifest.js.gz log = $tmpdir/grok.log loglevel = $grok_loglevel [remote] site = http://$host:$port manifest = \${site}/v2/manifest.js.gz [pull] [fsck] close $fh or xbail $!; ok(mkdir("$tmpdir/per-inbox"), 'prepare single-v2-inbox mirror'); xsys($grok_pull, '-c', "$tmpdir/per-inbox.conf"); is($? >> 8, 0, 'grok-pull exit code as expected'); for (qw(v2/git/0.git v2/git/1.git v2/git/2.git)) { ok(-d "$tmpdir/per-inbox/$_", "grok-pull created $_"); } $td->kill; $td->join; is($?, 0, 'no error in exited process'); open $fh, '<', $err or BAIL_OUT("open $err failed: $!"); my $eout = do { local $/; <$fh> }; unlike($eout, qr/wide/i, 'no Wide character warnings'); unlike($eout, qr/uninitialized/i, 'no uninitialized warnings'); } done_testing(); public-inbox-1.9.0/t/www_static.t000066400000000000000000000065051430031475700167510ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape); require_mods(@mods, 'IO::Uncompress::Gunzip'); use_ok $_ foreach @mods; use_ok 'PublicInbox::WwwStatic'; my $app = sub { my $ws = PublicInbox::WwwStatic->new(docroot => $tmpdir, @_); sub { $ws->call(shift) }; }; test_psgi($app->(), sub { my $cb = shift; my $res = $cb->(GET('/')); is($res->code, 404, '404 on "/" by default'); open my $fh, '>', "$tmpdir/index.html" or die; print $fh 'hi' or die; close $fh or die; $res = $cb->(GET('/')); is($res->code, 200, '200 with index.html'); is($res->content, 'hi', 'default index.html returned'); $res = $cb->(HEAD('/')); is($res->code, 200, '200 on HEAD /'); is($res->content, '', 'no content'); is($res->header('Content-Length'), '2', 'content-length set'); like($res->header('Content-Type'), qr!^text/html\b!, 'content-type is html'); }); test_psgi($app->(autoindex => 1, index => []), sub { my $cb = shift; my $res = $cb->(GET('/')); my $updir = 'href="../">../'; is($res->code, 200, '200 with autoindex default'); my $ls = $res->content; like($ls, qr/index\.html/, 'got listing with index.html'); ok(index($ls, $updir) < 0, 'no updir at /'); mkdir("$tmpdir/dir") or die; rename("$tmpdir/index.html", "$tmpdir/dir/index.html") or die; $res = $cb->(GET('/dir/')); is($res->code, 200, '200 with autoindex for dir/'); $ls = $res->content; ok(index($ls, $updir) > 0, 'updir at /dir/'); for my $up (qw(/../ .. /dir/.. /dir/../)) { is($cb->(GET($up))->code, 403, "`$up' traversal rejected"); } $res = $cb->(GET('/dir')); is($res->code, 302, '302 w/o slash'); like($res->header('Location'), qr!://[^/]+/dir/\z!, 'redirected w/ slash'); rename("$tmpdir/dir/index.html", "$tmpdir/dir/foo") or die; link("$tmpdir/dir/foo", "$tmpdir/dir/foo.gz") or die; $res = $cb->(GET('/dir/')); unlike($res->content, qr/>foo\.gzcontent, qr/>foo(GET('/dir/foo/bar')); is($res->code, 404, 'using file as dir fails'); unlink("$tmpdir/dir/foo") or die; $res = $cb->(GET('/dir/')); like($res->content, qr/>foo\.gz', "$tmpdir/dir/foo" or die; print $fh "uncompressed\n" or die; close $fh or die; utime(0, 0, "$tmpdir/dir/foo") or die; $res = $cb->(GET('/dir/')); my $html = $res->content; like($html, qr/>foofoo\.gz(GET('/dir/foo')); is($res->content, "uncompressed\n", 'got uncompressed on mtime mismatch'); utime(0, 0, "$tmpdir/dir/foo.gz") or die; my $get = GET('/dir/foo'); $get->header('Accept-Encoding' => 'gzip'); $res = $cb->($get); is($res->content, "hi", 'got compressed on mtime match'); $get = GET('/dir/'); $get->header('Accept-Encoding' => 'gzip'); $res = $cb->($get); my $in = $res->content; my $out = ''; IO::Uncompress::Gunzip::gunzip(\$in => \$out); like($out, qr/\A/, 'got HTML start after gunzip'); like($out, qr{$}, 'got HTML end after gunzip'); }); done_testing(); public-inbox-1.9.0/t/x-unknown-alpine.eml000066400000000000000000000012731430031475700202770ustar00rootroot00000000000000Date: Sat, 13 Aug 2016 12:14:15 +0200 (CEST) From: Alpine User To: Subject: charset=X-UNKNOWN test Message-ID: User-Agent: Alpine 2.20 (DEB 67 2015-01-07) MIME-Version: 1.0 Content-Type: multipart/mixed; BOUNDARY="8323329-703494712-1471083256=:4924" This message is in MIME format. The first part should be readable text, while the remaining parts are likely unreadable without MIME-aware tools. --8323329-703494712-1471083256=:4924 Content-Type: text/plain; charset=X-UNKNOWN Content-Transfer-Encoding: QUOTED-PRINTABLE =09https://example.com/ =E2=80=A2 bullet point --8323329-703494712-1471083256=:4924-- public-inbox-1.9.0/t/xcpdb-reshard.t000066400000000000000000000046341430031475700173050ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite Search::Xapian)); require_git('2.6'); use PublicInbox::Eml; require PublicInbox::Search; my ($tmpdir, $for_destroy) = tmpdir(); my $nproc = 8; my $ndoc = 13; my $ibx = create_inbox 'test', version => 2, indexlevel => 'medium', tmpdir => "$tmpdir/testbox", nproc => $nproc, sub { my ($im, $ibx) = @_; my $eml = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 EOF for my $i (1..$ndoc) { $eml->header_set('Message-ID', ""); ok($im->add($eml), "message $i added"); } open my $fh, '>', "$ibx->{inboxdir}/empty" or BAIL_OUT "open $!"; }; my $env = { PI_CONFIG => "$ibx->{inboxdir}/empty" }; my @shards = grep(m!/\d+\z!, glob("$ibx->{inboxdir}/xap*/*")); is(scalar(@shards), $nproc - 1, 'got expected shards'); my $orig = $ibx->over->query_xover(1, $ndoc); my %nums = map {; "$_->{num}" => 1 } @$orig; my @xcpdb = qw(-xcpdb -q); my $XapianDatabase = do { no warnings 'once'; $PublicInbox::Search::X{Database}; }; # ensure we can go up or down in shards, or stay the same: for my $R (qw(2 4 1 3 3)) { delete $ibx->{search}; # release old handles my $cmd = [@xcpdb, "-R$R", $ibx->{inboxdir}]; push @$cmd, '--compact' if $R == 1 && have_xapian_compact; ok(run_script($cmd, $env), "xcpdb -R$R"); my @new_shards = grep(m!/\d+\z!, glob("$ibx->{inboxdir}/xap*/*")); is(scalar(@new_shards), $R, 'resharded to two shards'); my $mset = $ibx->search->mset('s:this'); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar(@$msgs), $ndoc, 'got expected docs after resharding'); my %by_mid = map {; "$_->{mid}" => $_ } @$msgs; ok($by_mid{"m$_\@example.com"}, "$_ exists") for (1..$ndoc); delete $ibx->{search}; # release old handles # ensure docids in Xapian match NNTP article numbers my $tot = 0; my %tmp = %nums; foreach my $d (@new_shards) { my $xdb = $XapianDatabase->new($d); $tot += $xdb->get_doccount; my $it = $xdb->postlist_begin(''); my $end = $xdb->postlist_end(''); for (; $it != $end; $it++) { my $docid = $it->get_docid; if ($xdb->get_document($docid)) { ok(delete($tmp{$docid}), "saw #$docid"); } } } is(scalar keys %tmp, 0, 'all docids seen'); } done_testing; public-inbox-1.9.0/version-gen.perl000066400000000000000000000014101430031475700172340ustar00rootroot00000000000000#!perl -w use v5.10.1; my $v = $ENV{VERSION} // die 'VERSION unset'; my $f = './lib/PublicInbox.pm'; if (-d ($ENV{GIT_DIR} // '.git') || -f '.git') { chomp(my $gv = `git describe --match "v[0-9]*" HEAD`); if ($? == 0) { substr($gv, 0, 1, ''); # remove "v" system(qw(git update-index -q --refresh)); if (my @n = `git diff-index --name-only HEAD --`) { $gv .= '-dirty'; } $v = $gv; } } $v =~ tr/-/./; if (-f $f && do $f && (eval('$PublicInbox::VERSION') // 'undef') eq $v) { exit } my $tmp = "$f.tmp.$$"; open my $fh, '>', $tmp or die "open($tmp): $!"; print $fh < # License: AGPL-3.0+ use strict; use Test::More; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; use PublicInbox::TestCommon; use PublicInbox::Eml; use Digest::MD5; use PublicInbox::MsgIter; require_mods(qw(Data::Dumper Email::MIME)); Data::Dumper->import('Dumper'); require PublicInbox::MIME; require_git(2.19); my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'cmp' }); my $git = $ibx->git; my $fh = $git->popen(@cat); vec(my $vec = '', fileno($fh), 1) = 1; select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; my $n = 0; my $m = 0; my $dig_cls = 'Digest::MD5'; sub h ($) { s/\s+\z//s; # E::M leaves trailing white space s/\s+/ /sg; "$_[0]: $_"; } my $cmp = sub { my ($p, $cmp_arg) = @_; my $part = shift @$p; push @$cmp_arg, '---'.join(', ', @$p).'---'; my $ct = $part->content_type // 'text/plain'; $ct =~ s/[ \t]+.*\z//s; my ($s, $err); eval { push @$cmp_arg, map { h 'f' } $part->header('From'); push @$cmp_arg, map { h 't' } $part->header('To'); push @$cmp_arg, map { h 'cc' } $part->header('Cc'); push @$cmp_arg, map { h 'mid' } $part->header('Message-ID'); push @$cmp_arg, map { h 'refs' } $part->header('References'); push @$cmp_arg, map { h 'irt' } $part->header('In-Reply-To'); push @$cmp_arg, map { h 's' } $part->header('Subject'); push @$cmp_arg, map { h 'cd' } $part->header('Content-Description'); ($s, $err) = msg_part_text($part, $ct); if (defined $s) { $s =~ s/\s+\z//s; push @$cmp_arg, "S: ".$s; } else { $part = $part->body; push @$cmp_arg, "T: $ct"; if ($part =~ /[^\p{XPosixPrint}\s]/s) { # binary my $dig = $dig_cls->new; $dig->add($part); push @$cmp_arg, "M: ".$dig->hexdigest; push @$cmp_arg, "B: ".length($part); } else { $part =~ s/\s+\z//s; push @$cmp_arg, "X: ".$part; } } }; if ($@) { $err //= ''; push @$cmp_arg, "E: $@ ($err)"; } }; my $ndiff = 0; my $git_cb = sub { my ($bref, $oid) = @_; local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; ++$m; PublicInbox::MIME->new($$bref)->each_part($cmp, my $m_ctx = [], 1); PublicInbox::Eml->new($$bref)->each_part($cmp, my $e_ctx = [], 1); if (join("\0", @$e_ctx) ne join("\0", @$m_ctx)) { ++$ndiff; open my $fh, '>', "$tmpdir/mime" or die $!; print $fh Dumper($m_ctx) or die $!; close $fh or die $!; open $fh, '>', "$tmpdir/eml" or die $!; print $fh Dumper($e_ctx) or die $!; close $fh or die $!; diag "$inboxdir $oid differ"; # using `git diff', diff(1) may not be installed diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]); } }; my $t = timeit(1, sub { while (<$fh>) { my ($oid, $type) = split / /; next if $type ne 'blob'; ++$n; $git->cat_async($oid, $git_cb); } $git->async_wait_all; }); is($m, $n, "$inboxdir rendered all $m <=> $n messages"); is($ndiff, 0, "$inboxdir $ndiff differences"); done_testing(); public-inbox-1.9.0/xt/cmp-msgview.t000066400000000000000000000060131430031475700171760ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; use PublicInbox::TestCommon; use PublicInbox::Eml; use Digest::MD5; require_git(2.19); require_mods qw(Data::Dumper Email::MIME Plack::Util); Data::Dumper->import('Dumper'); require PublicInbox::MIME; my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'perf' }); my $git = $ibx->git; my $fh = $git->popen(@cat); vec(my $vec = '', fileno($fh), 1) = 1; select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; my $mime_ctx = { env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, ibx => $ibx, www => Plack::Util::inline_object(style => sub {''}), obuf => \(my $mime_buf = ''), mhref => '../', }; my $eml_ctx = { %$mime_ctx, obuf => \(my $eml_buf = '') }; my $n = 0; my $m = 0; my $ndiff_html = 0; my $dig_cls = 'Digest::MD5'; my $digest_attach = sub { # ensure ->body (not ->body_raw) matches my ($p, $cmp_arg) = @_; my $part = shift @$p; my $dig = $cmp_arg->[0] //= $dig_cls->new; $dig->add($part->body_raw); push @$cmp_arg, join(', ', @$p); }; my $git_cb = sub { my ($bref, $oid) = @_; local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; ++$m; my $mime = PublicInbox::MIME->new($$bref); PublicInbox::View::multipart_text_as_html($mime, $mime_ctx); my $eml = PublicInbox::Eml->new($$bref); PublicInbox::View::multipart_text_as_html($eml, $eml_ctx); if ($eml_buf ne $mime_buf) { ++$ndiff_html; open my $fh, '>', "$tmpdir/mime" or die $!; print $fh $mime_buf or die $!; close $fh or die $!; open $fh, '>', "$tmpdir/eml" or die $!; print $fh $eml_buf or die $!; close $fh or die $!; # using `git diff', diff(1) may not be installed diag "$inboxdir $oid differs"; diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]); } $eml_buf = $mime_buf = ''; # don't tolerate differences in attachment downloads $mime = PublicInbox::MIME->new($$bref); $mime->each_part($digest_attach, my $mime_cmp = [], 1); $eml = PublicInbox::Eml->new($$bref); $eml->each_part($digest_attach, my $eml_cmp = [], 1); $mime_cmp->[0] = $mime_cmp->[0]->hexdigest; $eml_cmp->[0] = $eml_cmp->[0]->hexdigest; # don't have millions of "ok" lines if (join("\0", @$eml_cmp) ne join("\0", @$mime_cmp)) { diag Dumper([ $oid, eml => $eml_cmp, mime =>$mime_cmp ]); is_deeply($eml_cmp, $mime_cmp, "$inboxdir $oid match"); } }; my $t = timeit(1, sub { while (<$fh>) { my ($oid, $type) = split / /; next if $type ne 'blob'; ++$n; $git->cat_async($oid, $git_cb); } $git->async_wait_all; }); is($m, $n, 'rendered all messages'); # we'll tolerate minor differences in HTML rendering diag "$ndiff_html HTML differences"; done_testing(); public-inbox-1.9.0/xt/create-many-inboxes.t000066400000000000000000000052471430031475700206220ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::IPC; use File::Path qw(mkpath); use IO::Handle (); # autoflush use POSIX qw(_exit); use Cwd qw(getcwd abs_path); use File::Spec; my $many_root = $ENV{TEST_MANY_ROOT} or plan skip_all => 'TEST_MANY_ROOT not defined'; my $cwd = getcwd(); mkpath($many_root); -d $many_root or BAIL_OUT "$many_root: $!"; $many_root = abs_path($many_root); $many_root =~ m!\A\Q$cwd\E/! and BAIL_OUT "$many_root must not be in $cwd"; require_git 2.6; require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::V2Writable'; my $nr_inbox = $ENV{NR_INBOX} // 10; my $nproc = $ENV{NPROC} || PublicInbox::IPC::detect_nproc() || 2; my $indexlevel = $ENV{TEST_INDEXLEVEL} // 'basic'; diag "NR_INBOX=$nr_inbox NPROC=$nproc TEST_INDEXLEVEL=$indexlevel"; diag "TEST_MANY_ROOT=$many_root"; my $level_cfg = $indexlevel eq 'full' ? '' : "\tindexlevel = $indexlevel\n"; my $pfx = "$many_root/$nr_inbox-$indexlevel"; mkpath($pfx); open my $cfg_fh, '>>', "$pfx/config" or BAIL_OUT $!; $cfg_fh->autoflush(1); my $v2_init_add = sub { my ($i) = @_; my $ibx = PublicInbox::Inbox->new({ inboxdir => "$pfx/test-$i", name => "test-$i", newsgroup => "inbox.comp.test.foo.test-$i", address => [ "test-$i\@example.com" ], url => [ "//example.com/test-$i" ], version => 2, -no_fsync => 1, }); $ibx->{indexlevel} = $indexlevel if $level_cfg ne ''; my $entry = <{name}"] address = $ibx->{-primary_address} url = $ibx->{url}->[0] newsgroup = $ibx->{newsgroup} inboxdir = $ibx->{inboxdir} EOF $entry .= $level_cfg; print $cfg_fh $entry or die $!; my $v2w = PublicInbox::V2Writable->new($ibx, { nproc => 0 }); $v2w->init_inbox(0); $v2w->add(PublicInbox::Eml->new(< To: test-$i\@example.com Message-ID: <20101002-000000-$i\@example.com> Subject: hello world $i hi EOM $v2w->done; }; my @children; for my $i (1..$nproc) { my ($r, $w); pipe($r, $w) or BAIL_OUT $!; my $pid = fork // BAIL_OUT "fork: $!"; if ($pid == 0) { close $w; while (my $i = <$r>) { chomp $i; $v2_init_add->($i); } _exit(0); } close $r or BAIL_OUT $!; push @children, [ $w, $pid ]; $w->autoflush(1); } for my $i (0..$nr_inbox) { print { $children[$i % @children]->[0] } "$i\n" or BAIL_OUT $!; } for my $c (@children) { close $c->[0] or BAIL_OUT "close $!"; } my $i = 0; for my $c (@children) { my $pid = waitpid($c->[1], 0); is($?, 0, ++$i.' exited ok'); } ok(close($cfg_fh), 'config written'); done_testing; public-inbox-1.9.0/xt/eml_check_limits.t000066400000000000000000000050311430031475700202320ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::Inbox; use List::Util qw(max); use Benchmark qw(:all :hireswallclock); use PublicInbox::Spawn qw(popen_rd); use Carp (); require_git(2.19); # for --unordered require_mods(qw(BSD::Resource)); BSD::Resource->import(qw(getrusage)); my $cls = $ENV{TEST_CLASS}; if ($cls) { diag "TEST_CLASS=$cls"; require_mods($cls); } $cls //= 'PublicInbox::Eml'; my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; local $PublicInbox::Eml::mime_nesting_limit = 0x7fffffff; local $PublicInbox::Eml::mime_parts_limit = 0x7fffffff; local $PublicInbox::Eml::header_size_limit = 0x7fffffff; my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'x' }); my $git = $ibx->git; my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); my $fh = $git->popen(@cat); my ($m, $n); my $max_nest = [ 0, '' ]; # [ bytes, blob oid ] my $max_idx = [ 0, '' ]; my $max_parts = [ 0, '' ]; my $max_size = [ 0, '' ]; my $max_hdr = [ 0, '' ]; my $info = [ 0, '' ]; my $each_part_cb = sub { my ($p) = @_; my ($part, $depth, $idx) = @$p; $max_nest = [ $depth, $info->[1] ] if $depth > $max_nest->[0]; my $max = max(split(/\./, $idx)); $max_idx = [ $max, $info->[1] ] if $max > $max_idx->[0]; ++$info->[0]; }; my ($bref, $oid, $size); local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; my $cat_cb = sub { ($bref, $oid, undef, $size) = @_; ++$m; $info = [ 0, $oid ]; my $eml = $cls->new($bref); my $hdr_len = length($eml->header_obj->as_string); $max_hdr = [ $hdr_len, $oid ] if $hdr_len > $max_hdr->[0]; $eml->each_part($each_part_cb, $info, 1); $max_parts = $info if $info->[0] > $max_parts->[0]; $max_size = [ $size, $oid ] if $size > $max_size->[0]; }; my $t = timeit(1, sub { my ($blob, $type); while (<$fh>) { ($blob, $type) = split / /; next if $type ne 'blob'; ++$n; $git->cat_async($blob, $cat_cb); } $git->async_wait_all; }); is($m, $n, 'scanned all messages'); diag "$$ $inboxdir took ".timestr($t)." for $n <=> $m messages"; diag "$$ max_nest $max_nest->[0] @ $max_nest->[1]"; diag "$$ max_idx $max_idx->[0] @ $max_idx->[1]"; diag "$$ max_parts $max_parts->[0] @ $max_parts->[1]"; diag "$$ max_size $max_size->[0] @ $max_size->[1]"; diag "$$ max_hdr $max_hdr->[0] @ $max_hdr->[1]"; diag "$$ RSS ".getrusage()->maxrss. ' k'; done_testing; public-inbox-1.9.0/xt/eml_octet-stream.t000066400000000000000000000041741430031475700202120ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Git; use PublicInbox::Eml; use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::LeiToMail; my $eml2mboxcl2 = PublicInbox::LeiToMail->can('eml2mboxcl2'); my $git_dir = $ENV{GIANT_GIT_DIR}; plan 'skip_all' => "GIANT_GIT_DIR not defined for $0" unless defined($git_dir); use Data::Dumper; $Data::Dumper::Useqq = 1; my $mboxfh; if (my $out = $ENV{DEBUG_MBOXCL2}) { BAIL_OUT("$out exists") if -s $out; open $mboxfh, '>', $out or BAIL_OUT "open $out: $!"; } else { diag "DEBUG_MBOXCL2 unset, not saving debug output"; } my $git = PublicInbox::Git->new($git_dir); my @cat = qw(cat-file --buffer --batch-check --batch-all-objects); if (require_git(2.19, 1)) { push @cat, '--unordered'; } else { warn "git <2.19, cat-file lacks --unordered, locality suffers\n"; } my ($errs, $ok, $tot); $errs = $ok = $tot = 0; my $ep = sub { # eml->each_part callback my ($part, $level, @ex) = @{$_[0]}; ++$tot; my $ct = $part->content_type // return; $ct =~ m!\bapplication/octet-stream\b!i or return; my ($s, $err) = msg_part_text($part, $ct); if (defined $s) { ++$ok; } else { warn "binary $err\n"; ++$errs; my $x = eval { $part->body }; if ($@) { warn "decode totally failed: $@"; } else { my ($bad) = ($x =~ m/([\p{XPosixPrint}\s]{0,10} [^\p{XPosixPrint}\s]+ [\p{XPosixPrint}\s]{0,10})/sx); warn Dumper([$bad]); } push @{$_[1]}, $err; # $fail } }; my $cb = sub { my ($bref, $oid) = @_; my $eml = PublicInbox::Eml->new($bref); local $SIG{__WARN__} = sub { diag("$oid ", @_) }; $eml->each_part($ep, my $fail = []); if (@$fail && $mboxfh) { diag "@$fail"; print $mboxfh ${$eml2mboxcl2->($eml, { blob => $oid })} or BAIL_OUT "print: $!"; } }; my $cat = $git->popen(@cat); while (<$cat>) { my ($oid, $type, $size) = split(/ /); $git->cat_async($oid, $cb) if $size && $type eq 'blob'; } $git->async_wait_all; note "$errs errors"; note "$ok/$tot messages had text as application/octet-stream"; ok 1; done_testing; public-inbox-1.9.0/xt/git-http-backend.t000066400000000000000000000100201430031475700200560ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # # Ensure buffering behavior in -httpd doesn't cause runaway memory use # or data corruption use strict; use warnings; use Test::More; use POSIX qw(setsid); use PublicInbox::TestCommon; use PublicInbox::Spawn qw(which); my $git_dir = $ENV{GIANT_GIT_DIR}; plan 'skip_all' => 'GIANT_GIT_DIR not defined' unless $git_dir; require_mods(qw(BSD::Resource Plack::Util Plack::Builder HTTP::Date HTTP::Status Net::HTTP)); my $psgi = "./t/git-http-backend.psgi"; my ($tmpdir, $for_destroy) = tmpdir(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $sock = tcp_server(); my ($host, $port) = tcp_host_port($sock); my $td; my $get_maxrss = sub { my $http = Net::HTTP->new(Host => "$host:$port"); ok($http, 'Net::HTTP object created for maxrss'); $http->write_request(GET => '/'); my ($code, $mess, %h) = $http->read_response_headers; is($code, 200, 'success reading maxrss'); my $n = $http->read_entity_body(my $buf, 256); ok(defined $n, 'read response body'); like($buf, qr/\A\d+\n\z/, 'got memory response'); ok(int($buf) > 0, 'got non-zero memory response'); int($buf); }; { my $cmd = [ '-httpd', '-W0', "--stdout=$out", "--stderr=$err", $psgi ]; $td = start_script($cmd, undef, { 3 => $sock }); } my $mem_a = $get_maxrss->(); SKIP: { my $max = 0; my $pack; my $glob = "$git_dir/objects/pack/pack-*.pack"; foreach my $f (glob($glob)) { my $n = -s $f; if ($n > $max) { $max = $n; $pack = $f; } } skip "no packs found in $git_dir" unless defined $pack; if ($pack !~ m!(/objects/pack/pack-[a-f0-9]{40}.pack)\z!) { skip "bad pack name: $pack"; } my $url = $1; my $http = Net::HTTP->new(Host => "$host:$port"); ok($http, 'Net::HTTP object created'); $http->write_request(GET => $url); my ($code, $mess, %h) = $http->read_response_headers; is(200, $code, 'got 200 success for pack'); is($max, $h{'Content-Length'}, 'got expected Content-Length for pack'); # no $http->read_entity_body, here, since we want to force buffering foreach my $i (1..3) { sleep 1; my $diff = $get_maxrss->() - $mem_a; note "${diff}K memory increase after $i seconds"; ok($diff < 1024, 'no bloating caused by slow dumb client'); } } SKIP: { # make sure Last-Modified + If-Modified-Since works with curl my $nr = 6; skip 'no description', $nr unless -f "$git_dir/description"; my $mtime = (stat(_))[9]; my $curl = which('curl'); skip 'curl(1) not found', $nr unless $curl; my $url = "http://$host:$port/description"; my $dst = "$tmpdir/desc"; is(xsys($curl, qw(-RsSf), '-o', $dst, $url), 0, 'curl -R'); is((stat($dst))[9], $mtime, 'curl used remote mtime'); is(xsys($curl, qw(-sSf), '-z', $dst, '-o', "$dst.2", $url), 0, 'curl -z noop'); ok(!-e "$dst.2", 'no modification, nothing retrieved'); utime(0, 0, $dst) or die "utime failed: $!"; is(xsys($curl, qw(-sSfR), '-z', $dst, '-o', "$dst.2", $url), 0, 'curl -z updates'); ok(-e "$dst.2", 'faked modification, got new file retrieved'); } { my $c = fork; if ($c == 0) { setsid(); exec qw(git clone -q --mirror), "http://$host:$port/", "$tmpdir/mirror.git"; die "Failed start git clone: $!\n"; } select(undef, undef, undef, 0.1); foreach my $i (1..10) { is(1, kill('STOP', -$c), 'signaled clone STOP'); sleep 1; ok(kill('CONT', -$c), 'continued clone'); my $diff = $get_maxrss->() - $mem_a; note "${diff}K memory increase after $i seconds"; ok($diff < 2048, 'no bloating caused by slow smart client'); } ok(kill('CONT', -$c), 'continued clone'); is($c, waitpid($c, 0), 'reaped wayward slow clone'); is($?, 0, 'clone did not error out'); note 'clone done, fsck-ing clone result...'; is(0, system("git", "--git-dir=$tmpdir/mirror.git", qw(fsck --no-progress)), 'fsck did not report corruption'); my $diff = $get_maxrss->() - $mem_a; note "${diff}K memory increase after smart clone"; ok($diff < 2048, 'no bloating caused by slow smart client'); } { ok($td->kill, 'killed httpd'); $td->join; } done_testing(); public-inbox-1.9.0/xt/git_async_cmp.t000066400000000000000000000030761430031475700175650ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use Benchmark qw(:all); use Digest::SHA; use PublicInbox::TestCommon; my $git_dir = $ENV{GIANT_GIT_DIR}; plan 'skip_all' => "GIANT_GIT_DIR not defined for $0" unless defined($git_dir); use_ok 'PublicInbox::Git'; my $git = PublicInbox::Git->new($git_dir); my @cat = qw(cat-file --buffer --batch-check --batch-all-objects); if (require_git(2.19, 1)) { push @cat, '--unordered'; } else { warn "git <2.19, cat-file lacks --unordered, locality suffers\n"; } my @dig; my $nr = $ENV{NR} || 1; diag "NR=$nr"; my $async = timeit($nr, sub { my $dig = Digest::SHA->new(1); my $cb = sub { my ($bref) = @_; $dig->add($$bref); }; my $cat = $git->popen(@cat); while (<$cat>) { my ($oid, undef, undef) = split(/ /); $git->cat_async($oid, $cb); } close $cat or die "cat: $?"; $git->async_wait_all; push @dig, ['async', $dig->hexdigest ]; }); my $sync = timeit($nr, sub { my $dig = Digest::SHA->new(1); my $cat = $git->popen(@cat); while (<$cat>) { my ($oid, undef, undef) = split(/ /); my $bref = $git->cat_file($oid); $dig->add($$bref); } close $cat or die "cat: $?"; push @dig, ['sync', $dig->hexdigest ]; }); ok(scalar(@dig) >= 2, 'got some digests'); my $ref = shift @dig; my $exp = $ref->[1]; isnt($exp, Digest::SHA->new(1)->hexdigest, 'not empty'); foreach (@dig) { is($_->[1], $exp, "digest matches $_->[0] <=> $ref->[0]"); } diag "sync=".timestr($sync); diag "async=".timestr($async); done_testing; 1; public-inbox-1.9.0/xt/httpd-async-stream.t000066400000000000000000000061061430031475700204720ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # Expensive test to validate compression and TLS. use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::DS qw(now); use PublicInbox::Spawn qw(which popen_rd); use Digest::MD5; use POSIX qw(_exit); my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my $curl = which('curl') or plan skip_all => "curl(1) missing for $0"; my ($tmpdir, $for_destroy) = tmpdir(); require_mods(qw(DBD::SQLite)); my $JOBS = $ENV{TEST_JOBS} // 4; my $endpoint = $ENV{TEST_ENDPOINT} // 'all.mbox.gz'; my $curl_opt = $ENV{TEST_CURL_OPT} // ''; diag "TEST_JOBS=$JOBS TEST_ENDPOINT=$endpoint TEST_CURL_OPT=$curl_opt"; # we set Host: to ensure stable results across test runs my @CURL_OPT = (qw(-HHost:example.com -sSf), split(' ', $curl_opt)); my $make_local_server = sub { my $pi_config = "$tmpdir/config"; open my $fh, '>', $pi_config or die "open($pi_config): $!"; print $fh <<"" or die "print $pi_config: $!"; [publicinbox "test"] inboxdir = $inboxdir address = test\@example.com close $fh or die "close($pi_config): $!"; my ($out, $err) = ("$tmpdir/out", "$tmpdir/err"); for ($out, $err) { open my $fh, '>', $_ or die "truncate: $!"; } my $http = tcp_server(); my $rdr = { 3 => $http }; # not using multiple workers, here, since we want to increase # the chance of tripping concurrency bugs within PublicInbox/HTTP*.pm my $cmd = [ '-httpd', "--stdout=$out", "--stderr=$err", '-W0' ]; my $host_port = tcp_host_port($http); push @$cmd, "-lhttp://$host_port"; my $url = "$host_port/test/$endpoint"; print STDERR "# CMD ". join(' ', @$cmd). "\n"; my $env = { PI_CONFIG => $pi_config }; (start_script($cmd, $env, $rdr), $url); }; my ($td, $url) = $make_local_server->(); my $do_get_all = sub { my ($job) = @_; local $SIG{__DIE__} = sub { print STDERR $job, ': ', @_; _exit(1) }; my $dig = Digest::MD5->new; my ($buf, $nr); my $bytes = 0; my $t0 = now(); my ($rd, $pid) = popen_rd([$curl, @CURL_OPT, $url]); while (1) { $nr = sysread($rd, $buf, 65536); last if !$nr; $dig->add($buf); $bytes += $nr; } my $res = $dig->hexdigest; my $elapsed = sprintf('%0.3f', now() - $t0); close $rd or die "close curl failed: $!\n"; waitpid($pid, 0) == $pid or die "waitpid failed: $!\n"; $? == 0 or die "curl failed: $?\n"; print STDERR "# $job $$ ($?) $res (${elapsed}s) $bytes bytes\n"; $res; }; my (%pids, %res); for my $job (1..$JOBS) { pipe(my ($r, $w)) or die; my $pid = fork; if ($pid == 0) { close $r or die; my $res = $do_get_all->($job); print $w $res or die; close $w or die; _exit(0); } close $w or die; $pids{$pid} = [ $job, $r ]; } while (scalar keys %pids) { my $pid = waitpid(-1, 0) or next; my $child = delete $pids{$pid} or next; my ($job, $rpipe) = @$child; is($?, 0, "$job done"); my $sum = do { local $/; <$rpipe> }; push @{$res{$sum}}, $job; } is(scalar keys %res, 1, 'all got the same result'); $td->kill; $td->join; is($?, 0, 'no error on -httpd exit'); done_testing; public-inbox-1.9.0/xt/imapd-mbsync-oimap.t000066400000000000000000000065421430031475700204350ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # ensure mbsync and offlineimap compatibility use strict; use v5.10.1; use File::Path qw(mkpath); use PublicInbox::TestCommon; use PublicInbox::Spawn qw(which spawn); require_mods(qw(-imapd)); my $inboxdir = $ENV{GIANT_INBOX_DIR}; (defined($inboxdir) && -d $inboxdir) or plan skip_all => "GIANT_INBOX_DIR not defined for $0"; plan skip_all => "bad characters in $inboxdir" if $inboxdir =~ m![^\w\.\-/]!; my ($tmpdir, $for_destroy) = tmpdir(); my $cfg = "$tmpdir/cfg"; my $newsgroup = 'inbox.test'; my $mailbox = "$newsgroup.0"; { open my $fh, '>', $cfg or BAIL_OUT "open: $!"; print $fh < $cfg }; my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT "-imapd: $?"; { my $c = tcp_connect($sock); like(readline($c), qr/CAPABILITY /, 'got greeting'); } my $host_port = tcp_host_port($sock); my ($host, $port) = ($sock->sockhost, $sock->sockport); my %pids; SKIP: { mkpath([map { "$tmpdir/oimapdir/$_" } qw(cur new tmp)]); my $oimap = which('offlineimap') or skip 'no offlineimap(1)', 1; open my $fh, '>', "$tmpdir/.offlineimaprc" or BAIL_OUT "open: $!"; print $fh < usecompression = no EOF close $fh or BAIL_OUT "close: $!"; my $cmd = [ $oimap, qw(-o -q -u quiet) ]; my $pid = spawn($cmd, { HOME => $tmpdir }, { 1 => 2 }); $pids{$pid} = $cmd; } SKIP: { mkpath([map { "$tmpdir/mbsyncdir/test/$_" } qw(cur new tmp)]); my $mbsync = which('mbsync') or skip 'no mbsync(1)', 1; open my $fh, '>', "$tmpdir/.mbsyncrc" or BAIL_OUT "open: $!"; print $fh < $tmpdir }, { 1 => 2 }); $pids{$pid} = $cmd; } while (scalar keys %pids) { my $pid = waitpid(-1, 0) or next; my $cmd = delete $pids{$pid} or next; is($?, 0, join(' ', @$cmd, 'done')); } my $sec = $ENV{TEST_PERSIST} // 0; diag "TEST_PERSIST=$sec"; if ($sec) { diag "sleeping ${sec}s, imap://$host_port/$mailbox available"; diag "tmpdir=$tmpdir (Maildirs available)"; diag "stdout=$out"; diag "stderr=$err"; diag "pid=$td->{pid}"; sleep $sec; } $td->kill; $td->join; is($?, 0, 'no error on -imapd exit'); done_testing; public-inbox-1.9.0/xt/imapd-validate.t000066400000000000000000000123501430031475700176220ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # Expensive test to validate compression and TLS. use strict; use v5.10.1; use Symbol qw(gensym); use PublicInbox::DS qw(now); use POSIX qw(_exit); use PublicInbox::TestCommon; my $inbox_dir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inbox_dir; # how many emails to read into memory at once per-process my $BATCH = $ENV{TEST_BATCH} // 100; my $REPEAT = $ENV{TEST_REPEAT} // 1; diag "TEST_BATCH=$BATCH TEST_REPEAT=$REPEAT"; require_mods(qw(Mail::IMAPClient -imapd)); my $imap_client = 'Mail::IMAPClient'; my $can_compress = $imap_client->can('compress'); if ($can_compress) { # hope this gets fixed upstream, soon require PublicInbox::IMAPClient; $imap_client = 'PublicInbox::IMAPClient'; } my $test_tls = $ENV{TEST_SKIP_TLS} ? 0 : eval { require IO::Socket::SSL }; my ($cert, $key) = qw(certs/server-cert.pem certs/server-key.pem); if ($test_tls && !-r $key || !-r $cert) { plan skip_all => "certs/ missing for $0, run $^X ./certs/create-certs.perl"; } my ($tmpdir, $for_destroy) = tmpdir(); my %OPT = qw(User u Password p); my (%STARTTLS_OPT, %IMAPS_OPT, $td, $newsgroup, $mailbox, $make_local_server); if (($ENV{IMAP_TEST_URL} // '') =~ m!\Aimap://([^/]+)/(.+)\z!) { ($OPT{Server}, $mailbox) = ($1, $2); $OPT{Server} =~ s/:([0-9]+)\z// and $OPT{Port} = $1 + 0; %STARTTLS_OPT = %OPT; %IMAPS_OPT = (%OPT, Port => 993) if $OPT{Port} == 143; } else { require_mods(qw(DBD::SQLite)); $make_local_server->(); $mailbox = "$newsgroup.0"; } my %opts = (imap => \%OPT, 'imap+compress' => { %OPT, Compress => 1 }); my $uid_max = do { my $mic = $imap_client->new(%OPT) or BAIL_OUT "new $!"; $mic->examine($mailbox) or BAIL_OUT "examine: $!"; my $next = $mic->uidnext($mailbox) or BAIL_OUT "uidnext: $!"; $next - 1; }; if (scalar keys %STARTTLS_OPT) { $opts{starttls} = \%STARTTLS_OPT; $opts{'starttls+compress'} = { %STARTTLS_OPT, Compress => 1 }; } if (scalar keys %IMAPS_OPT) { $opts{imaps} = \%IMAPS_OPT; $opts{'imaps+compress'} = { %IMAPS_OPT, Compress => 1 }; } my $do_get_all = sub { my ($desc, $opt) = @_; local $SIG{__DIE__} = sub { print STDERR $desc, ': ', @_; _exit(1) }; my $t0 = now(); my $dig = Digest::SHA->new(1); my $mic = $imap_client->new(%$opt); $mic->examine($mailbox) or die "examine: $!"; my $uid_base = 1; my $bytes = 0; my $nr = 0; until ($uid_base > $uid_max) { my $end = $uid_base + $BATCH; my $ret = $mic->fetch_hash("$uid_base:$end", 'BODY[]') or last; for my $uid ($uid_base..$end) { $dig->add($uid); my $h = delete $ret->{$uid} or next; my $body = delete $h->{'BODY[]'} or die "no BODY[] for UID=$uid"; $dig->add($body); $bytes += length($body); ++$nr; } $uid_base = $end + 1; } $mic->logout or die "logout failed: $!"; my $elapsed = sprintf('%0.3f', now() - $t0); my $res = $dig->hexdigest; print STDERR "# $desc $res (${elapsed}s) $bytes bytes, NR=$nr\n"; $res; }; my (%pids, %res); for (1..$REPEAT) { while (my ($desc, $opt) = each %opts) { pipe(my ($r, $w)) or die; my $pid = fork; if ($pid == 0) { close $r or die; my $res = $do_get_all->($desc, $opt); print $w $res or die; close $w or die; _exit(0); } close $w or die; $pids{$pid} = [ $desc, $r ]; } } while (scalar keys %pids) { my $pid = waitpid(-1, 0) or next; my $child = delete $pids{$pid} or next; my ($desc, $rpipe) = @$child; is($?, 0, "$desc done"); my $sum = do { local $/; <$rpipe> }; push @{$res{$sum}}, $desc; } is(scalar keys %res, 1, 'all got the same result'); $td->kill; $td->join; is($?, 0, 'no error on -imapd exit'); done_testing; BEGIN { $make_local_server = sub { require PublicInbox::Inbox; $newsgroup = 'inbox.test'; my $ibx = { inboxdir => $inbox_dir, newsgroup => $newsgroup }; $ibx = PublicInbox::Inbox->new($ibx); my $pi_config = "$tmpdir/config"; { open my $fh, '>', $pi_config or die "open($pi_config): $!"; print $fh <<"" or die "print $pi_config: $!"; [publicinbox "test"] newsgroup = $newsgroup inboxdir = $inbox_dir address = test\@example.com close $fh or die "close($pi_config): $!"; } my ($out, $err) = ("$tmpdir/out", "$tmpdir/err"); for ($out, $err) { open my $fh, '>', $_ or die "truncate: $!"; } my $imap = tcp_server(); my $rdr = { 3 => $imap }; $OPT{Server} = $imap->sockhost; $OPT{Port} = $imap->sockport; # not using multiple workers, here, since we want to increase # the chance of tripping concurrency bugs within PublicInbox/IMAP*.pm my $cmd = [ '-imapd', "--stdout=$out", "--stderr=$err", '-W0' ]; push @$cmd, '-limap://'.tcp_host_port($imap); if ($test_tls) { my $imaps = tcp_server(); $rdr->{4} = $imaps; push @$cmd, '-limaps://'.tcp_host_port($imaps); push @$cmd, "--cert=$cert", "--key=$key"; my $tls_opt = [ SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ]; %STARTTLS_OPT = (%OPT, Starttls => $tls_opt); %IMAPS_OPT = (%OPT, Ssl => $tls_opt, Server => $imaps->sockhost, Port => $imaps->sockport ); } print STDERR "# CMD ". join(' ', @$cmd). "\n"; my $env = { PI_CONFIG => $pi_config }; $td = start_script($cmd, $env, $rdr); }; } # BEGIN public-inbox-1.9.0/xt/lei-auth-fail.t000066400000000000000000000016471430031475700173710ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; require_mods(qw(Mail::IMAPClient lei)); # TODO: mock IMAP server which fails at authentication so we don't # have to make external connections to test this: my $imap_fail = $ENV{TEST_LEI_IMAP_FAIL_URL} // 'imaps://AzureDiamond:Hunter2@public-inbox.org:994/INBOX'; my ($ro_home, $cfg_path) = setup_public_inboxes; test_lei(sub { for my $pfx ([qw(q z:0.. --only), "$ro_home/t1", '-o'], [qw(convert -o mboxrd:/dev/stdout)], [qw(convert t/utf8.eml -o), $imap_fail], ['import'], [qw(tag +L:INBOX)]) { ok(!lei(@$pfx, $imap_fail), "IMAP auth failure on @$pfx"); like($lei_err, qr!\bE:.*?imaps?://.*?!sm, 'error shown'); unlike($lei_err, qr!Hunter2!s, 'password not shown'); is($lei_out, '', 'nothing output'); } }); done_testing; public-inbox-1.9.0/xt/lei-onion-convert.t000066400000000000000000000036541430031475700203170ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10; use PublicInbox::TestCommon; use PublicInbox::MboxReader; my $test_tor = $ENV{TEST_TOR}; plan skip_all => "TEST_TOR unset" unless $test_tor; unless ($test_tor =~ m!\Asocks5h://!i) { my $default = 'socks5h://127.0.0.1:9050'; diag "using $default (set TEST_TOR=socks5h://ADDR:PORT to override)"; $test_tor = $default; } my $onion = $ENV{TEST_ONION_HOST} // '7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion'; my $ng = 'inbox.comp.mail.public-inbox.meta'; my $nntp_url = $ENV{TEST_NNTP_ONION_URL} // "nntp://$onion/$ng"; my $imap_url = $ENV{TEST_IMAP_ONION_URL} // "imap://$onion/$ng.0"; my @cnv = qw(lei convert -o mboxrd:/dev/stdout); my @proxy_cli = ("--proxy=$test_tor"); my $proxy_cfg = "proxy=$test_tor"; test_lei(sub { my $run = {}; for my $args ([$nntp_url, @proxy_cli], [$imap_url, @proxy_cli], [ $nntp_url, '-c', "nntp.$proxy_cfg" ], [ $imap_url, '-c', "imap.$proxy_cfg" ]) { pipe(my ($r, $w)) or xbail "pipe: $!"; my $cmd = [@cnv, @$args]; my $td = start_script($cmd, undef, { 1 => $w, run_mode => 0 }); $args->[0] =~ s!\A(.+?://).*!$1...!; my $key = "@$args"; ok($td, "$key running"); $run->{$key} = { td => $td, r => $r }; } while (my ($key, $x) = each %$run) { my ($td, $r) = delete(@$x{qw(td r)}); eval { PublicInbox::MboxReader->mboxrd($r, sub { my ($eml) = @_; if ($key =~ m!\Anntps?://!i) { for (qw(Xref Newsgroups Path)) { $eml->header_set($_); } } push @{$x->{eml}}, $eml; close $r; $td->kill('-INT'); die "$key done\n"; }); }; chomp(my $done = $@); like($done, qr/\Q$key\E done/, $done); $td->join; } my @keys = keys %$run; my $first_key = shift @keys; for my $key (@keys) { is_deeply($run->{$key}, $run->{$first_key}, "`$key' matches `$first_key'"); } }); done_testing; public-inbox-1.9.0/xt/mem-imapd-tls.t000066400000000000000000000154071430031475700174150ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # Idle client memory usage test, particularly after EXAMINE when # Message Sequence Numbers are loaded use strict; use v5.10.1; use Socket qw(SOCK_STREAM IPPROTO_TCP SOL_SOCKET); use PublicInbox::TestCommon; use PublicInbox::Syscall qw(:epoll); use PublicInbox::DS; require_mods(qw(-imapd)); my $inboxdir = $ENV{GIANT_INBOX_DIR}; my $TEST_TLS; SKIP: { require_mods('IO::Socket::SSL', 1); $TEST_TLS = $ENV{TEST_TLS} // 1; }; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; diag 'TEST_COMPRESS='.($ENV{TEST_COMPRESS} // 1) . " TEST_TLS=$TEST_TLS"; my ($cert, $key) = qw(certs/server-cert.pem certs/server-key.pem); if ($TEST_TLS) { if (!-r $key || !-r $cert) { plan skip_all => "certs/ missing for $0, run ./certs/create-certs.perl"; } use_ok 'PublicInbox::TLS'; } my ($tmpdir, $for_destroy) = tmpdir(); my ($out, $err) = ("$tmpdir/stdout.log", "$tmpdir/stderr.log"); my $pi_config = "$tmpdir/pi_config"; my $group = 'inbox.test'; local $SIG{PIPE} = 'IGNORE'; # for IMAPC (below) my $imaps = tcp_server(); { open my $fh, '>', $pi_config or die "open: $!\n"; print $fh < $pi_config }; my $arg = $TEST_TLS ? [ "-limaps://$imaps_addr/?cert=$cert,key=$key" ] : []; my $cmd = [ '-imapd', '-W0', @$arg, "--stdout=$out", "--stderr=$err" ]; # run_mode=0 ensures Test::More FDs don't get shared my $td = start_script($cmd, $env, { 3 => $imaps, run_mode => 0 }); my %ssl_opt; if ($TEST_TLS) { %ssl_opt = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); my $ctx = IO::Socket::SSL::SSL_Context->new(%ssl_opt); # cf. https://rt.cpan.org/Ticket/Display.html?id=129463 my $mode = eval { Net::SSLeay::MODE_RELEASE_BUFFERS() }; if ($mode && $ctx->{context}) { eval { Net::SSLeay::CTX_set_mode($ctx->{context}, $mode) }; warn "W: $@ (setting SSL_MODE_RELEASE_BUFFERS)\n" if $@; } $ssl_opt{SSL_reuse_ctx} = $ctx; $ssl_opt{SSL_startHandshake} = 0; } chomp(my $nfd = `/bin/sh -c 'ulimit -n'`); $nfd -= 10; ok($nfd > 0, 'positive FD count'); my $MAX_FD = 10000; $nfd = $MAX_FD if $nfd >= $MAX_FD; our $DONE = 0; sub once { 0 }; # stops event loop # setup the event loop so that it exits at every step # while we're still doing connect(2) PublicInbox::DS->SetLoopTimeout(0); PublicInbox::DS->SetPostLoopCallback(\&once); my $pid = $td->{pid}; if ($^O eq 'linux' && open(my $f, '<', "/proc/$pid/status")) { diag(grep(/RssAnon/, <$f>)); } foreach my $n (1..$nfd) { my $io = tcp_connect($imaps, Blocking => 0); $io = IO::Socket::SSL->start_SSL($io, %ssl_opt) if $TEST_TLS; IMAPC->new($io); # one step through the event loop # do a little work as we connect: PublicInbox::DS::event_loop(); # try not to overflow the listen() backlog: if (!($n % 128) && $DONE != $n) { diag("nr: ($n) $DONE/$nfd"); PublicInbox::DS->SetLoopTimeout(-1); PublicInbox::DS->SetPostLoopCallback(sub { $DONE != $n }); # clear the backlog: PublicInbox::DS::event_loop(); # resume looping PublicInbox::DS->SetLoopTimeout(0); PublicInbox::DS->SetPostLoopCallback(\&once); } } # run the event loop normally, now: diag "done?: @".time." $DONE/$nfd"; if ($DONE != $nfd) { PublicInbox::DS->SetLoopTimeout(-1); PublicInbox::DS->SetPostLoopCallback(sub { $DONE != $nfd }); PublicInbox::DS::event_loop(); } is($nfd, $DONE, "$nfd/$DONE done"); if ($^O eq 'linux' && open(my $f, '<', "/proc/$pid/status")) { diag(grep(/RssAnon/, <$f>)); diag " SELF lsof | wc -l ".`lsof -p $$ |wc -l`; diag "SERVER lsof | wc -l ".`lsof -p $pid |wc -l`; } PublicInbox::DS->Reset; $td->kill; $td->join; is($?, 0, 'no error in exited process'); done_testing; package IMAPC; use strict; use parent qw(PublicInbox::DS); # fields: step: state machine, zin: Zlib inflate context use PublicInbox::Syscall qw(EPOLLIN EPOLLOUT EPOLLONESHOT); use Errno qw(EAGAIN); # determines where we start event_step use constant FIRST_STEP => ($ENV{TEST_COMPRESS} // 1) ? -2 : 0; # return true if complete, false if incomplete (or failure) sub connect_tls_step { my ($self) = @_; my $sock = $self->{sock} or return; return 1 if $sock->connect_SSL; return $self->drop("$!") if $! != EAGAIN; if (my $ev = PublicInbox::TLS::epollbit()) { unshift @{$self->{wbuf}}, \&connect_tls_step; PublicInbox::DS::epwait($sock, $ev | EPOLLONESHOT); 0; } else { $self->drop('BUG? EAGAIN but '.PublicInbox::TLS::err()); } } sub event_step { my ($self) = @_; # TLS negotiation happens in flush_write via {wbuf} return unless $self->flush_write && $self->{sock}; if ($self->{step} == -2) { $self->do_read(\(my $buf = ''), 128) or return; $buf =~ /\A\* OK / or die 'no greeting'; $self->{step} = -1; $self->write(\"1 COMPRESS DEFLATE\r\n"); } if ($self->{step} == -1) { $self->do_read(\(my $buf = ''), 128) or return; $buf =~ /\A1 OK / or die "no compression $buf"; IMAPCdeflate->enable($self); $self->{step} = 1; $self->write(\"2 EXAMINE inbox.test.0\r\n"); } if ($self->{step} == 0) { $self->do_read(\(my $buf = ''), 128) or return; $buf =~ /\A\* OK / or die 'no greeting'; $self->{step} = 1; $self->write(\"2 EXAMINE inbox.test.0\r\n"); } if ($self->{step} == 1) { my $buf = ''; until ($buf =~ /^2 OK \[READ-ONLY/ms) { $self->do_read(\$buf, 4096, length($buf)) or return; } $self->{step} = 2; $self->write(\"3 UID FETCH 1 (UID FLAGS)\r\n"); } if ($self->{step} == 2) { my $buf = ''; until ($buf =~ /^3 OK /ms) { $self->do_read(\$buf, 4096, length($buf)) or return; } $self->{step} = 3; $self->write(\"4 IDLE\r\n"); } if ($self->{step} == 3) { $self->do_read(\(my $buf = ''), 128) or return; no warnings 'once'; $::DONE++; $self->{step} = 5; # all done } else { warn "$self->{step} Should never get here $self"; } } sub new { my ($class, $io) = @_; my $self = bless { step => FIRST_STEP }, $class; if ($io->can('connect_SSL')) { $self->{wbuf} = [ \&connect_tls_step ]; } # wait for connect(), and maybe SSL_connect() $self->SUPER::new($io, EPOLLOUT|EPOLLONESHOT); } 1; package IMAPCdeflate; use strict; our @ISA; use Compress::Raw::Zlib; use PublicInbox::IMAP; my %ZIN_OPT; BEGIN { @ISA = qw(IMAPC); %ZIN_OPT = ( -WindowBits => -15, -AppendOutput => 1 ); *write = \&PublicInbox::DSdeflate::write; *do_read = \&PublicInbox::DSdeflate::do_read; }; sub enable { my ($class, $self) = @_; my ($in, $err) = Compress::Raw::Zlib::Inflate->new(%ZIN_OPT); die "Inflate->new failed: $err" if $err != Z_OK; bless $self, $class; $self->{zin} = $in; } 1; public-inbox-1.9.0/xt/mem-msgview.t000066400000000000000000000047311430031475700172020ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ # Note: this may be altered as-needed to demonstrate improvements. # See history in git for this file. use strict; use IO::Handle; # ->flush use Fcntl qw(SEEK_SET); use PublicInbox::TestCommon; use PublicInbox::Tmpfile; use Test::More; my @mods = qw(DBD::SQLite BSD::Resource PublicInbox::WWW); require_mods(@mods); use_ok($_) for @mods; my $lines = $ENV{NR_LINES} // 50000; diag "NR_LINES=$lines"; my ($tmpdir, $for_destroy) = tmpdir(); my $inboxname = 'big'; my $inboxdir = "$tmpdir/big"; local $ENV{PI_CONFIG} = "$tmpdir/cfg"; my $mid = 'test@example.com'; { # setup open my $fh, '>', "$tmpdir/cfg" or die; print $fh < To: Nikki <%s> Date: Tue, 3 May 1988 00:00:00 +0000 Subject: todo Message-ID: <%s> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="FOO" Content-Disposition: inline --FOO Content-Type: text/plain; charset=utf-8 Content-Disposition: inline EOF print $fh $hdr or die; for (0..$lines) { print $fh 'x' x 72, "\n" or die } print $fh <flush or die; sysseek($fh, 0, SEEK_SET) or die; my $env = { ORIGINAL_RECIPIENT => $addr }; my $err = ''; my $opt = { 0 => $fh, 2 => \$err, run_mode => 0 }; ok(run_script([qw(-mda --no-precheck)], $env, $opt), 'message delivered'); } my $www = PublicInbox::WWW->new; my $env = { PATH_INFO => "/$inboxname/$mid/", REQUEST_URI => "/$inboxname/$mid/", SCRIPT_NAME => '', QUERY_STRING => '', REQUEST_METHOD => 'GET', HTTP_HOST => 'example.com', 'psgi.errors' => \*STDERR, 'psgi.url_scheme' => 'http', }; my $ru_before = BSD::Resource::getrusage(); my $res = $www->call($env); my $body = $res->[2]; while (defined(my $x = $body->getline)) { } $body->close; my $ru_after = BSD::Resource::getrusage(); my $diff = $ru_after->maxrss - $ru_before->maxrss; diag "before: ${\$ru_before->maxrss} => ${\$ru_after->maxrss} diff=$diff kB"; done_testing(); public-inbox-1.9.0/xt/mem-nntpd-tls.t000066400000000000000000000153551430031475700174500ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # Idle client memory usage test use v5.12.1; use PublicInbox::TestCommon; use File::Temp qw(tempdir); use Socket qw(SOCK_STREAM IPPROTO_TCP SOL_SOCKET); require_mods(qw(-nntpd)); require PublicInbox::InboxWritable; require PublicInbox::SearchIdx; use PublicInbox::Syscall qw(:epoll); use PublicInbox::DS; my $version = 2; # v2 needs newer git require_git('2.6') if $version >= 2; use_ok 'IO::Socket::SSL'; my ($cert, $key) = qw(certs/server-cert.pem certs/server-key.pem); unless (-r $key && -r $cert) { plan skip_all => "certs/ missing for $0, run ./certs/create-certs.perl"; } use_ok 'PublicInbox::TLS'; my ($tmpdir, $for_destroy) = tmpdir(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $mainrepo = $tmpdir; my $pi_config = "$tmpdir/pi_config"; my $group = 'test-nntpd-tls'; my $addr = $group . '@example.com'; local $SIG{PIPE} = 'IGNORE'; # for NNTPC (below) my $nntps = tcp_server(); my $ibx = PublicInbox::Inbox->new({ inboxdir => $mainrepo, name => 'nntpd-tls', version => $version, -primary_address => $addr, indexlevel => 'basic', }); $ibx = PublicInbox::InboxWritable->new($ibx, {nproc=>1}); $ibx->init_inbox(0); { open my $fh, '>', $pi_config or die "open: $!\n"; print $fh <importer(0); my $eml = eml_load('t/data/0001.patch'); ok($im->add($eml), 'message added'); $im->done; if ($version == 1) { my $s = PublicInbox::SearchIdx->new($ibx, 1); $s->index_sync; } } my $nntps_addr = tcp_host_port($nntps); my $env = { PI_CONFIG => $pi_config }; my $tls = $ENV{TLS} // 1; my $args = $tls ? ["--cert=$cert", "--key=$key", "-lnntps://$nntps_addr"] : []; my $cmd = [ '-nntpd', '-W0', @$args, "--stdout=$out", "--stderr=$err" ]; # run_mode=0 ensures Test::More FDs don't get shared my $td = start_script($cmd, $env, { 3 => $nntps, run_mode => 0 }); my %ssl_opt = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); my $ctx = IO::Socket::SSL::SSL_Context->new(%ssl_opt); # cf. https://rt.cpan.org/Ticket/Display.html?id=129463 my $mode = eval { Net::SSLeay::MODE_RELEASE_BUFFERS() }; if ($mode && $ctx->{context}) { eval { Net::SSLeay::CTX_set_mode($ctx->{context}, $mode) }; warn "W: $@ (setting SSL_MODE_RELEASE_BUFFERS)\n" if $@; } $ssl_opt{SSL_reuse_ctx} = $ctx; $ssl_opt{SSL_startHandshake} = 0; my %opt = ( Proto => 'tcp', PeerAddr => $nntps_addr, Type => SOCK_STREAM, Blocking => 0 ); chomp(my $nfd = `/bin/sh -c 'ulimit -n'`); $nfd -= 10; ok($nfd > 0, 'positive FD count'); my $MAX_FD = 10000; $nfd = $MAX_FD if $nfd >= $MAX_FD; our $DONE = 0; sub once { 0 }; # stops event loop # setup the event loop so that it exits at every step # while we're still doing connect(2) PublicInbox::DS->SetLoopTimeout(0); PublicInbox::DS->SetPostLoopCallback(\&once); foreach my $n (1..$nfd) { my $io = tcp_connect($nntps, Blocking => 0); $io = IO::Socket::SSL->start_SSL($io, %ssl_opt) if $tls; NNTPC->new($io); # one step through the event loop # do a little work as we connect: PublicInbox::DS::event_loop(); # try not to overflow the listen() backlog: if (!($n % 128) && $n != $DONE) { diag("nr: ($n) $DONE/$nfd"); PublicInbox::DS->SetLoopTimeout(-1); PublicInbox::DS->SetPostLoopCallback(sub { $DONE != $n }); # clear the backlog: PublicInbox::DS::event_loop(); # resume looping PublicInbox::DS->SetLoopTimeout(0); PublicInbox::DS->SetPostLoopCallback(\&once); } } my $pid = $td->{pid}; my $dump_rss = sub { return if $^O ne 'linux'; open(my $f, '<', "/proc/$pid/status") or return; diag(grep(/RssAnon/, <$f>)); }; $dump_rss->(); # run the event loop normally, now: if ($DONE != $nfd) { PublicInbox::DS->SetLoopTimeout(-1); PublicInbox::DS->SetPostLoopCallback(sub { diag "done: ".time." $DONE"; $DONE != $nfd; }); PublicInbox::DS::event_loop(); } is($nfd, $DONE, 'done'); $dump_rss->(); if ($^O eq 'linux') { diag " SELF lsof | wc -l ".`lsof -p $$ |wc -l`; diag "SERVER lsof | wc -l ".`lsof -p $pid |wc -l`; } PublicInbox::DS->Reset; $td->kill; $td->join; is($?, 0, 'no error in exited process'); done_testing(); package NNTPC; use v5.12; use parent qw(PublicInbox::DS); use PublicInbox::Syscall qw(EPOLLIN EPOLLOUT EPOLLONESHOT); use Data::Dumper; # return true if complete, false if incomplete (or failure) sub connect_tls_step ($) { my ($self) = @_; my $sock = $self->{sock} or return; return 1 if $sock->connect_SSL; return $self->drop("$!") unless $!{EAGAIN}; if (my $ev = PublicInbox::TLS::epollbit()) { unshift @{$self->{wbuf}}, \&connect_tls_step; PublicInbox::DS::epwait($self->{sock}, $ev | EPOLLONESHOT); 0; } else { $self->drop('BUG? EAGAIN but '.PublicInbox::TLS::err()); } } sub event_step ($) { my ($self) = @_; # TLS negotiation happens in flush_write via {wbuf} return unless $self->flush_write && $self->{sock}; if ($self->{step} == -2) { $self->do_read(\(my $buf = ''), 128) or return; $buf =~ /\A201 / or die "no greeting"; $self->{step} = -1; $self->write(\"COMPRESS DEFLATE\r\n"); } if ($self->{step} == -1) { $self->do_read(\(my $buf = ''), 128) or return; $buf =~ /\A20[0-9] / or die "no compression $buf"; NNTPCdeflate->enable($self); $self->{step} = 1; $self->write(\"DATE\r\n"); } if ($self->{step} == 0) { $self->do_read(\(my $buf = ''), 128) or return; $buf =~ /\A201 / or die "no greeting"; $self->{step} = 1; $self->write(\"DATE\r\n"); } if ($self->{step} == 1) { $self->do_read(\(my $buf = ''), 128) or return; $buf =~ /\A111 / or die 'no date'; no warnings 'once'; $::DONE++; $self->{step} = 2; # all done } else { die "$self->{step} Should never get here ". Dumper($self); } } sub new { my ($class, $io) = @_; my $self = bless {}, $class; # wait for connect(), and maybe SSL_connect() $self->SUPER::new($io, EPOLLOUT|EPOLLONESHOT); $self->{wbuf} = [ \&connect_tls_step ] if $io->can('connect_SSL'); $self->{step} = -2; # determines where we start event_step $self; }; 1; package NNTPCdeflate; use v5.12; our @ISA = qw(NNTPC PublicInbox::DS); use Compress::Raw::Zlib; use PublicInbox::DSdeflate; BEGIN { *write = \&PublicInbox::DSdeflate::write; *do_read = \&PublicInbox::DSdeflate::do_read; *event_step = \&NNTPC::event_step; *flush_write = \&PublicInbox::DS::flush_write; *close = \&PublicInbox::DS::close; } sub enable { my ($class, $self) = @_; my %ZIN_OPT = ( -WindowBits => -15, -AppendOutput => 1 ); my ($in, $err) = Compress::Raw::Zlib::Inflate->new(%ZIN_OPT); die "Inflate->new failed: $err" if $err != Z_OK; bless $self, $class; $self->{zin} = $in; } 1; public-inbox-1.9.0/xt/msgtime_cmp.t000066400000000000000000000110551430031475700172460ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::Inbox; use PublicInbox::Git; use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); use POSIX qw(strftime); require_mods('Date::Parse'); my $git; my ($inboxdir, $git_dir) = @ENV{qw(GIANT_INBOX_DIR GIANT_GIT_DIR)}; if (defined $inboxdir) { my $ibx = { inboxdir => $inboxdir, name => 'name' }; $git = PublicInbox::Inbox->new($ibx)->git; } elsif (defined $git_dir) { # sometimes just an old epoch is enough, since newer filters are nicer $git = PublicInbox::Git->new($git_dir); } else { plan skip_all => "GIANT_INBOX_DIR not set for $0"; } my @cat = qw(cat-file --buffer --batch-check --batch-all-objects); if (require_git(2.19, 1)) { push @cat, '--unordered'; } else { warn "git <2.19, cat-file lacks --unordered, locality suffers\n"; } # millions of "ok" lines are noise, just show mismatches: sub quiet_is_deeply ($$$$$) { my ($cur, $old, $func, $oid, $hdr) = @_; if ((scalar(@$cur) != 2) || (scalar(@$old) == 2 && ($old->[0] != $cur->[0]) || ($old->[1] != $cur->[1]))) { for ($cur, $old) { $_->[2] = strftime('%Y-%m-%d %k:%M:%S', gmtime($_->[0])) } is_deeply($cur, $old, "$func $oid"); diag('got: ', explain($cur)); diag('exp: ', explain($old)); diag $hdr->as_string; } } sub compare { my ($bref, $oid, $type, $size) = @_; local $SIG{__WARN__} = sub { diag "$oid: ", @_ }; my $mime = PublicInbox::Eml->new($$bref); my $hdr = $mime->header_obj; my @cur = msg_datestamp($hdr); my @old = Old::msg_datestamp($hdr); quiet_is_deeply(\@cur, \@old, 'datestamp', $oid, $hdr); @cur = msg_timestamp($hdr); @old = Old::msg_timestamp($hdr); quiet_is_deeply(\@cur, \@old, 'timestamp', $oid, $hdr); } my $fh = $git->popen(@cat); while (<$fh>) { my ($oid, $type) = split / /; next if $type ne 'blob'; $git->cat_async($oid, \&compare); } $git->async_wait_all; ok(1); done_testing; # old date/time-related functions package Old; use strict; use warnings; sub str2date_zone ($) { my ($date) = @_; my $ts = Date::Parse::str2time($date); return undef unless(defined $ts); # off is the time zone offset in seconds from GMT my ($ss,$mm,$hh,$day,$month,$year,$off) = Date::Parse::strptime($date); # new behavior which wasn't in the original old version: if ('commit d857e7dc0d816b635a7ead09c3273f8c2d2434be') { # "msgtime: assume +0000 if TZ missing when using Date::Parse" $off //= '+0000'; } return undef unless(defined $off); # Compute the time zone from offset my $sign = ($off < 0) ? '-' : '+'; my $hour = abs(int($off / 3600)); my $min = ($off / 60) % 60; # deal with weird offsets like '-0420' properly $min = 60 - $min if ($min && $off < 0); my $zone = sprintf('%s%02d%02d', $sign, $hour, $min); # "-1200" is the furthest westermost zone offset, # but git fast-import is liberal so we use "-1400" if ($zone >= 1400 || $zone <= -1400) { warn "bogus TZ offset: $zone, ignoring and assuming +0000\n"; $zone = '+0000'; } [$ts, $zone]; } sub time_response ($) { my ($ret) = @_; wantarray ? @$ret : $ret->[0]; } sub msg_received_at ($) { my ($hdr) = @_; # PublicInbox::Eml my @recvd = $hdr->header_raw('Received'); my ($ts); foreach my $r (@recvd) { $r =~ /\s*([0-9]+\s+[a-zA-Z]+\s+[0-9]{2,4}\s+ [0-9]+[^0-9][0-9]+(?:[^0-9][0-9]+) \s+([\+\-][0-9]+))/sx or next; $ts = eval { str2date_zone($1) } and return $ts; my $mid = $hdr->header_raw('Message-ID'); warn "no date in $mid Received: $r\n"; } undef; } sub msg_date_only ($) { my ($hdr) = @_; # PublicInbox::Eml my @date = $hdr->header_raw('Date'); my ($ts); foreach my $d (@date) { # Y2K problems: 3-digit years $d =~ s!([A-Za-z]{3}) ([0-9]{3}) ([0-9]{2}:[0-9]{2}:[0-9]{2})! my $yyyy = $2 + 1900; "$1 $yyyy $3"!e; $ts = eval { str2date_zone($d) } and return $ts; if ($@) { my $mid = $hdr->header_raw('Message-ID'); warn "bad Date: $d in $mid: $@\n"; } } undef; } # Favors Received header for sorting globally sub msg_timestamp ($) { my ($hdr) = @_; # PublicInbox::Eml my $ret; $ret = msg_received_at($hdr) and return time_response($ret); $ret = msg_date_only($hdr) and return time_response($ret); wantarray ? (time, '+0000') : time; } # Favors the Date: header for display and sorting within a thread sub msg_datestamp ($) { my ($hdr) = @_; # PublicInbox::Eml my $ret; $ret = msg_date_only($hdr) and return time_response($ret); $ret = msg_received_at($hdr) and return time_response($ret); wantarray ? (time, '+0000') : time; } 1; public-inbox-1.9.0/xt/net_nntp_socks.t000066400000000000000000000015511430031475700177710ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use v5.12; use PublicInbox::TestCommon; use URI; require_mods 'IO::Socket::Socks'; use_ok 'PublicInbox::NetNNTPSocks'; my $url = $ENV{TEST_NNTP_ONION_URL} // 'nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.mail.public-inbox.meta'; my $uri = URI->new($url); my $on = PublicInbox::NetNNTPSocks->new_socks( Port => $uri->port, Host => $uri->host, ProxyAddr => '127.0.0.1', # default Tor address + port ProxyPort => 9050, ) or xbail('err = '.eval('$IO::Socket::Socks::SOCKS_ERROR')); my ($nr, $min, $max, $grp) = $on->group($uri->group); ok($nr > 0 && $min > 0 && $min < $max, 'nr, min, max make sense') or diag explain([$nr, $min, $max, $grp]); is($grp, $uri->group, 'group matches'); done_testing; public-inbox-1.9.0/xt/net_writer-imap.t000066400000000000000000000235071430031475700200550ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use Sys::Hostname qw(hostname); use POSIX qw(strftime); use PublicInbox::OnDestroy; use PublicInbox::URIimap; use PublicInbox::Config; use PublicInbox::DS; use PublicInbox::InboxIdle; use Fcntl qw(O_EXCL O_WRONLY O_CREAT); my $imap_url = $ENV{TEST_IMAP_WRITE_URL} or plan skip_all => 'TEST_IMAP_WRITE_URL unset'; my $uri = PublicInbox::URIimap->new($imap_url); defined($uri->path) and plan skip_all => "$imap_url should not be a mailbox (just host:port)"; require_mods('Mail::IMAPClient'); require_ok 'PublicInbox::NetWriter'; my $host = (split(/\./, hostname))[0]; my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!); my $SEP = $ENV{IMAP_SEPARATOR} || '.'; my $folder = "INBOX$SEP$base-$host-".strftime('%Y%m%d%H%M%S', gmtime(time)). "-$$-".sprintf('%x', int(rand(0xffffffff))); my $nwr = PublicInbox::NetWriter->new; chop($imap_url) if substr($imap_url, -1) eq '/'; my $folder_url = "$imap_url/$folder"; my $folder_uri = PublicInbox::URIimap->new($folder_url); is($folder_uri->mailbox, $folder, 'folder correct') or BAIL_OUT "BUG: bad $$uri"; $nwr->add_url($$folder_uri); is($nwr->errors, undef, 'no errors'); $nwr->{pi_cfg} = bless {}, 'PublicInbox::Config'; my $set_cred_helper = sub { my ($f, $cred_set) = @_; sysopen(my $fh, $f, O_CREAT|O_EXCL|O_WRONLY) or BAIL_OUT "open $f: $!"; print $fh </dev/null`); if ($cred_helper eq 'store') { my $config = $ENV{XDG_CONFIG_HOME} // "$ENV{HOME}/.config"; for my $f ("$ENV{HOME}/.git-credentials", "$config/git/credentials") { next unless -f $f; @cred_link = ($f, '/.git-credentials'); last; } $cred_set = qq("$cred_helper"); } elsif ($cred_helper =~ /\Acache(?:[ \t]|\z)/) { my $cache = $ENV{XDG_CACHE_HOME} // "$ENV{HOME}/.cache"; for my $d ("$ENV{HOME}/.git-credential-cache", "$cache/git/credential") { next unless -d $d; @cred_link = ($d, '/.git-credential-cache'); $cred_set = qq("$cred_helper"); last; } } elsif (!$cred_helper) { # make the test less painful if no creds configured ($tmpdir, $for_destroy) = tmpdir; my $d = "$tmpdir/.git-credential-cache"; mkdir($d, 0700) or BAIL_OUT $!; $cred_set = "cache --timeout=60"; @cred_link = ($d, '/.git-credential-cache'); } else { diag "credential.helper=$cred_helper will not be used for this test"; } my $mics = do { local $ENV{HOME} = $tmpdir // $ENV{HOME}; if ($tmpdir && $cred_set) { $set_cred_helper->("$ENV{HOME}/.gitconfig", $cred_set) } $nwr->imap_common_init; }; my $mic = (values %$mics)[0]; my $cleanup = PublicInbox::OnDestroy->new($$, sub { if (defined($folder)) { my $mic = $nwr->mic_get($uri); $mic->delete($folder) or fail "delete $folder <$folder_uri>: $@"; } if ($tmpdir && -f "$tmpdir/.gitconfig") { local $ENV{HOME} = $tmpdir; system(qw(git credential-cache exit)); } }); my $imap_append = $nwr->can('imap_append'); my $smsg = bless { kw => [ 'seen' ] }, 'PublicInbox::Smsg'; $imap_append->($mic, $folder, undef, $smsg, eml_load('t/plack-qp.eml')); $nwr->{quiet} = 1; my $imap_slurp_all = sub { my ($url, $uid, $kw, $eml, $res) = @_; push @$res, [ $kw, $eml ]; }; $nwr->imap_each($folder_uri, $imap_slurp_all, my $res = []); is(scalar(@$res), 1, 'got appended message'); my $plack_qp_eml = eml_load('t/plack-qp.eml'); is_deeply($res, [ [ [ 'seen' ], $plack_qp_eml ] ], 'uploaded message read back'); $res = $mic = $mics = undef; test_lei(sub { my ($ro_home, $cfg_path) = setup_public_inboxes; my $cfg = PublicInbox::Config->new($cfg_path); $cfg->each_inbox(sub { my ($ibx) = @_; lei_ok qw(add-external -q), $ibx->{inboxdir} or BAIL_OUT; }); # cred_link[0] may be on a different (hopefully encrypted) FS, # we only symlink to it here, so we don't copy any sensitive data # into the temporary directory if (@cred_link && !symlink($cred_link[0], $ENV{HOME}.$cred_link[1])) { diag "symlink @cred_link: $! (non-fatal)"; $cred_set = undef; } $set_cred_helper->("$ENV{HOME}/.gitconfig", $cred_set) if $cred_set; # don't combine these two: $ENV{TEST_IMAP_COMPRESS} and lei_ok qw(config imap.compress true); $ENV{TEST_IMAP_DEBUG} and lei_ok qw(config imap.debug true); my $proxy = $ENV{TEST_IMAP_PROXY}; lei_ok(qw(config imap.proxy), $proxy) if $proxy; lei_ok qw(q f:qp@example.com -o), $folder_url; $nwr->imap_each($folder_uri, $imap_slurp_all, my $res = []); is(scalar(@$res), 1, 'got one deduped result') or diag explain($res); is_deeply($res->[0]->[1], $plack_qp_eml, 'lei q wrote expected result'); my $mdir = "$ENV{HOME}/t.mdir"; lei_ok 'convert', $folder_url, '-o', $mdir; my @mdfiles = glob("$mdir/*/*"); is(scalar(@mdfiles), 1, '1 message from IMAP => Maildir conversion'); is_deeply(eml_load($mdfiles[0]), $plack_qp_eml, 'conversion from IMAP to Maildir'); lei_ok qw(q f:matz -a -o), $folder_url; $nwr->imap_each($folder_uri, $imap_slurp_all, my $aug = []); is(scalar(@$aug), 2, '2 results after augment') or diag explain($aug); my $exp = $res->[0]->[1]->as_string; is(scalar(grep { $_->[1]->as_string eq $exp } @$aug), 1, 'original remains after augment'); $exp = eml_load('t/iso-2202-jp.eml')->as_string; is(scalar(grep { $_->[1]->as_string eq $exp } @$aug), 1, 'new result shown after augment'); lei_ok qw(q s:thisbetternotgiveanyresult -o), $folder_url; $nwr->imap_each($folder_uri, $imap_slurp_all, my $empty = []); is(scalar(@$empty), 0, 'no results w/o augment'); my $f = 't/utf8.eml'; # $exp = eml_load($f); lei_ok qw(convert -F eml -o), $folder_url, $f; my (@uid, @res); $nwr->imap_each($folder_uri, sub { my ($u, $uid, $kw, $eml) = @_; push @uid, $uid; push @res, [ $kw, $eml ]; }); is_deeply(\@res, [ [ [], $exp ] ], 'converted to IMAP destination'); is(scalar(@uid), 1, 'got one UID back'); lei_ok qw(q -o /dev/stdout m:testmessage@example.com --no-external); is_deeply(json_utf8->decode($lei_out), [undef], 'no results before import'); lei_ok qw(import -F eml), $f, \'import local copy w/o keywords'; lei_ok 'import', $folder_url; # populate mail_sync.sqlite3 lei_ok qw(tag +kw:seen +kw:answered +kw:flagged), $f; lei_ok 'ls-mail-sync'; my @ls = split(/\n/, $lei_out); is(scalar(@ls), 1, 'only one folder in ls-mail-sync') or xbail(\@ls); for my $l (@ls) { like($l, qr/;UIDVALIDITY=\d+\z/, 'UIDVALIDITY'); } lei_ok 'export-kw', $folder_url; $mic = $nwr->mic_for_folder($folder_uri); my $flags = $mic->flags($uid[0]); is_deeply([sort @$flags], [ qw(\\Answered \\Flagged \\Seen) ], 'IMAP flags set by export-kw') or diag explain($flags); # ensure this imap_set_kw clobbers $nwr->imap_set_kw($mic, $uid[0], [ 'seen' ])->expunge or BAIL_OUT "expunge $@"; $mic = undef; @res = (); $nwr->imap_each($folder_uri, $imap_slurp_all, \@res); is_deeply(\@res, [ [ ['seen'], $exp ] ], 'seen flag set') or diag explain(\@res); lei_ok qw(q s:thisbetternotgiveanyresult -o), $folder_url, \'clobber folder but import flag'; $nwr->imap_each($folder_uri, $imap_slurp_all, $empty = []); is_deeply($empty, [], 'clobbered folder'); lei_ok qw(q -o /dev/stdout m:testmessage@example.com --no-external); $res = json_utf8->decode($lei_out)->[0]; is_deeply([@$res{qw(m kw)}], ['testmessage@example.com', ['seen']], 'kw set'); # prepare messages for watch $mic = $nwr->mic_for_folder($folder_uri); for my $kw (qw(Deleted Seen Answered Draft forwarded)) { my $buf = < EOM my $f = $kw eq 'forwarded' ? '$Forwarded' : "\\$kw"; $mic->append_string($folder_uri->mailbox, $buf, $f) or BAIL_OUT "append $kw $@"; } $mic->disconnect; my $inboxdir = "$ENV{HOME}/wtest"; my @cmd = (qw(-init -Lbasic wtest), $inboxdir, qw(https://example.com/wtest wtest@example.com)); run_script(\@cmd) or BAIL_OUT "init wtest"; xsys(qw(git config), "--file=$ENV{HOME}/.public-inbox/config", 'publicinbox.wtest.watch', $folder_url) == 0 or BAIL_OUT "git config $?"; my $watcherr = "$ENV{HOME}/watch.err"; open my $err_wr, '>>', $watcherr or BAIL_OUT $!; my $pub_cfg = PublicInbox::Config->new; PublicInbox::DS->Reset; my $ii = PublicInbox::InboxIdle->new($pub_cfg); my $cb = sub { PublicInbox::DS->SetPostLoopCallback(sub {}) }; my $obj = bless \$cb, 'PublicInbox::TestCommon::InboxWakeup'; $pub_cfg->each_inbox(sub { $_[0]->subscribe_unlock('ident', $obj) }); my $w = start_script(['-watch'], undef, { 2 => $err_wr }); diag 'waiting for initial fetch...'; PublicInbox::DS::event_loop(); my $ibx = $pub_cfg->lookup_name('wtest'); my $mm = $ibx->mm; ok(defined($mm->num_for('Seen@test.example.com')), '-watch takes seen message'); ok(defined($mm->num_for('Answered@test.example.com')), '-watch takes answered message'); ok(!defined($mm->num_for('Deleted@test.example.com')), '-watch ignored \\Deleted'); ok(!defined($mm->num_for('Draft@test.example.com')), '-watch ignored \\Draft'); ok(defined($mm->num_for('forwarded@test.example.com')), '-watch takes forwarded message'); undef $w; # done with watch lei_ok qw(import), $folder_url; lei_ok qw(q m:forwarded@test.example.com); is_deeply(json_utf8->decode($lei_out)->[0]->{kw}, ['forwarded'], 'forwarded kw imported from IMAP'); lei_ok qw(q m:testmessage --no-external -o), $folder_url; lei_ok qw(up), $folder_url; lei_ok qw(up --all=remote); $mic = $nwr->mic_get($uri); $mic->delete($folder) or fail "delete $folder <$folder_uri>: $@"; $mic->expunge; undef $mic; undef $folder; ok(!lei(qw(export-kw), $folder_url), 'export-kw fails w/ non-existent folder'); }); undef $cleanup; # remove temporary folder done_testing; public-inbox-1.9.0/xt/nntpd-validate.t000066400000000000000000000137201430031475700176550ustar00rootroot00000000000000# Copyright (C) 2019-2021 all contributors # License: AGPL-3.0+ # Integration test to validate compression. use strict; use warnings; use Test::More; use Symbol qw(gensym); use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); use POSIX qw(_exit); use PublicInbox::TestCommon; my $inbox_dir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inbox_dir; my $mid = $ENV{TEST_MID}; # Net::NNTP is part of the standard library, but distros may split it off... require_mods(qw(DBD::SQLite Net::NNTP Compress::Raw::Zlib)); my $test_compress = Net::NNTP->can('compress'); if (!$test_compress) { diag 'Your Net::NNTP does not yet support compression'; diag 'See: https://rt.cpan.org/Ticket/Display.html?id=129967'; } my $test_tls = $ENV{TEST_SKIP_TLS} ? 0 : eval { require IO::Socket::SSL }; my $cert = 'certs/server-cert.pem'; my $key = 'certs/server-key.pem'; if ($test_tls && !-r $key || !-r $cert) { plan skip_all => "certs/ missing for $0, run $^X ./certs/create-certs.perl"; } my ($tmpdir, $ftd) = tmpdir(); $File::Temp::KEEP_ALL = !!$ENV{TEST_KEEP_TMP}; my (%OPT, $td, $host_port, $group); my $batch = 1000; if (($ENV{NNTP_TEST_URL} // '') =~ m!\Anntp://([^/]+)/([^/]+)\z!) { ($host_port, $group) = ($1, $2); $host_port .= ":119" unless index($host_port, ':') > 0; } else { make_local_server(); } my $test_article = $ENV{TEST_ARTICLE} // 0; my $test_xover = $ENV{TEST_XOVER} // 1; if ($test_tls) { my $nntp = Net::NNTP->new($host_port, %OPT); ok($nntp->starttls, 'STARTTLS works'); ok($nntp->compress, 'COMPRESS works') if $test_compress; ok($nntp->quit, 'QUIT after starttls OK'); } if ($test_compress) { my $nntp = Net::NNTP->new($host_port, %OPT); ok($nntp->compress, 'COMPRESS works'); ok($nntp->quit, 'QUIT after compress OK'); } sub do_get_all { my ($methods) = @_; my $desc = join(',', @$methods); my $t0 = clock_gettime(CLOCK_MONOTONIC); my $dig = Digest::SHA->new(1); my $digfh = gensym; my $tmpfh; if ($File::Temp::KEEP_ALL) { open $tmpfh, '>', "$tmpdir/$desc.raw" or die $!; } my $tmp = { dig => $dig, tmpfh => $tmpfh }; tie *$digfh, 'DigestPipe', $tmp; my $nntp = Net::NNTP->new($host_port, %OPT); $nntp->article("<$mid>", $digfh) if $mid; foreach my $m (@$methods) { my $res = $nntp->$m; print STDERR "# $m got $res ($desc)\n" if !$res; } $nntp->article("<$mid>", $digfh) if $mid; my ($num, $first, $last) = $nntp->group($group); unless (defined $num && defined $first && defined $last) { warn "Invalid group\n"; return undef; } my $i; for ($i = $first; $i < $last; $i += $batch) { my $j = $i + $batch - 1; $j = $last if $j > $last; if ($test_xover) { my $xover = $nntp->xover("$i-$j"); for my $n (sort { $a <=> $b } keys %$xover) { my $line = join("\t", @{$xover->{$n}}); $line =~ tr/\r//d; $dig->add("$n\t".$line); } } if ($test_article) { for my $n ($i..$j) { $nntp->article($n, $digfh) and next; next if $nntp->code == 423; my $res = $nntp->code.' '. $nntp->message; $res =~ tr/\r\n//d; print STDERR "# Article $n ($desc): $res\n"; } } } # hacky bytes_read thing added to Net::NNTP for testing: my $bytes_read = ''; if ($nntp->can('bytes_read')) { $bytes_read .= ' '.$nntp->bytes_read.'b'; } my $q = $nntp->quit; print STDERR "# quit failed: ".$nntp->code."\n" if !$q; my $elapsed = sprintf('%0.3f', clock_gettime(CLOCK_MONOTONIC) - $t0); my $res = $dig->hexdigest; print STDERR "# $desc - $res (${elapsed}s)$bytes_read\n"; $res; } my @tests = ([]); push @tests, [ 'compress' ] if $test_compress; push @tests, [ 'starttls' ] if $test_tls; push @tests, [ 'starttls', 'compress' ] if $test_tls && $test_compress; my (@keys, %thr, %res); for my $m (@tests) { my $key = join(',', @$m); push @keys, $key; pipe(my ($r, $w)) or die; my $pid = fork; if ($pid == 0) { close $r or die; my $res = do_get_all($m); print $w $res or die; $w->flush; _exit(0); } close $w or die; $thr{$key} = [ $pid, $r ]; } for my $key (@keys) { my ($pid, $r) = @{delete $thr{$key}}; local $/; $res{$key} = <$r>; defined $res{$key} or die "nothing for $key"; my $w = waitpid($pid, 0); defined($w) or die; $w == $pid or die "waitpid($pid) != $w)"; is($?, 0, "`$key' exited successfully") } my $plain = $res{''}; ok($plain, "plain got $plain"); is($res{$_}, $plain, "$_ matches '' result") for @keys; done_testing(); sub make_local_server { require PublicInbox::Inbox; $group = 'inbox.test.perf.nntpd'; my $ibx = { inboxdir => $inbox_dir, newsgroup => $group }; $ibx = PublicInbox::Inbox->new($ibx); my $pi_config = "$tmpdir/config"; { open my $fh, '>', $pi_config or die "open($pi_config): $!"; print $fh <<"" or die "print $pi_config: $!"; [publicinbox "test"] newsgroup = $group inboxdir = $inbox_dir address = test\@example.com close $fh or die "close($pi_config): $!"; } my ($out, $err) = ("$tmpdir/out", "$tmpdir/err"); for ($out, $err) { open my $fh, '>', $_ or die "truncate: $!"; } my $sock = tcp_server(); $host_port = tcp_host_port($sock); # not using multiple workers, here, since we want to increase # the chance of tripping concurrency bugs within PublicInbox/NNTP*.pm my $cmd = [ '-nntpd', "--stdout=$out", "--stderr=$err", '-W0' ]; push @$cmd, "-lnntp://$host_port"; if ($test_tls) { push @$cmd, "--cert=$cert", "--key=$key"; %OPT = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); } print STDERR "# CMD ". join(' ', @$cmd). "\n"; my $env = { PI_CONFIG => $pi_config }; $td = start_script($cmd, $env, { 3 => $sock }); } package DigestPipe; use strict; use warnings; sub TIEHANDLE { my ($class, $self) = @_; bless $self, $class; } sub PRINT { my $self = shift; my $data = join('', @_); # Net::NNTP emit different line-endings depending on TLS or not...: $data =~ tr/\r//d; $self->{dig}->add($data); if (my $tmpfh = $self->{tmpfh}) { print $tmpfh $data; } 1; } 1; public-inbox-1.9.0/xt/over-fsck.perl000066400000000000000000000021501430031475700173340ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # unstable dev script, chasing a bug which may be in LeiSavedSearch->is_dup use v5.12; use Data::Dumper; use PublicInbox::OverIdx; @ARGV == 1 or die "Usage: $0 /path/to/over.sqlite3\n"; my $over = PublicInbox::OverIdx->new($ARGV[0]); my $dbh = $over->dbh; $dbh->do('PRAGMA mmap_size = '.(2 ** 48)); my $num = 0; my ($err, $none, $nr, $ids); $Data::Dumper::Useqq = $Data::Dumper::Sortkeys = 1; do { $ids = $over->ids_after(\$num); $nr += @$ids; for my $n (@$ids) { my $smsg = $over->get_art($n); if (!$smsg) { warn "#$n article missing\n"; ++$err; next; } my $exp = $smsg->{blob}; if ($exp eq '') { ++$none if $smsg->{bytes}; next; } my $xr3 = $over->get_xref3($n, 1); my $found; for my $r (@$xr3) { $r->[2] = unpack('H*', $r->[2]); $found = 1 if $r->[2] eq $exp; } if (!$found) { warn Dumper([$smsg, $xr3 ]); ++$err; } } } while (@$ids); warn "$none/$nr had no blob (external?)\n" if $none; warn "$err errors\n" if $err; exit($err ? 1 : 0); public-inbox-1.9.0/xt/perf-msgview.t000066400000000000000000000034121430031475700173530ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; use PublicInbox::Spawn qw(popen_rd); my $inboxdir = $ENV{GIANT_INBOX_DIR} // $ENV{GIANT_PI_DIR}; my $blob = $ENV{TEST_BLOB}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my @cat = qw(cat-file --buffer --batch-check --batch-all-objects); if (require_git(2.19, 1)) { push @cat, '--unordered'; } else { warn "git <2.19, cat-file lacks --unordered, locality suffers\n"; } require_mods qw(Plack::Util); my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'name' }); my $git = $ibx->git; my $fh = $blob ? undef : $git->popen(@cat); if ($fh) { my $vec = ''; vec($vec, fileno($fh), 1) = 1; select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; } my $ctx = { env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, ibx => $ibx, www => Plack::Util::inline_object(style => sub {''}), }; my ($mime, $res, $oid, $type); my $n = 0; my $obuf = ''; my $m = 0; my $cb = sub { $mime = PublicInbox::Eml->new(shift); PublicInbox::View::multipart_text_as_html($mime, $ctx); ++$m; $obuf = ''; }; my $t = timeit(1, sub { $ctx->{obuf} = \$obuf; $ctx->{mhref} = '../'; if (defined $blob) { my $nr = $ENV{NR} // 10000; for (1..$nr) { ++$n; $git->cat_async($blob, $cb); } } else { while (<$fh>) { ($oid, $type) = split / /; next if $type ne 'blob'; ++$n; $git->cat_async($oid, $cb); } } $git->async_wait_all; }); diag 'multipart_text_as_html took '.timestr($t)." for $n <=> $m messages"; is($m, $n, 'rendered all messages'); done_testing(); public-inbox-1.9.0/xt/perf-nntpd.t000066400000000000000000000052641430031475700170240ustar00rootroot00000000000000# Copyright (C) 2018-2021 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Benchmark qw(:all :hireswallclock); use PublicInbox::Inbox; use Net::NNTP; my $inboxdir = $ENV{GIANT_INBOX_DIR} // $ENV{GIANT_PI_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless defined($inboxdir); my ($host_port, $group, $s, $td, $tmp_obj); use PublicInbox::TestCommon; if (($ENV{NNTP_TEST_URL} || '') =~ m!\Anntp://([^/]+)/([^/]+)\z!) { ($host_port, $group) = ($1, $2); $host_port .= ":119" unless index($host_port, ':') > 0; my $six = substr($host_port, 0, 1) eq '[' ? '6' : ''; my $cls = "IO::Socket::INET$six"; $cls->new(Proto => 'tcp', Timeout => 1, PeerAddr => $host_port); } else { $group = 'inbox.test.perf.nntpd'; my $ibx = { inboxdir => $inboxdir, newsgroup => $group }; $ibx = PublicInbox::Inbox->new($ibx); my $tmpdir; ($tmpdir, $tmp_obj) = tmpdir(); my $pi_config = "$tmpdir/config"; { open my $fh, '>', $pi_config or die "open($pi_config): $!"; print $fh <<"" or die "print $pi_config: $!"; [publicinbox "test"] newsgroup = $group inboxdir = $inboxdir address = test\@example.com close $fh or die "close($pi_config): $!"; } my $sock = tcp_server(); my $cmd = [ '-nntpd', '-W0' ]; $td = start_script($cmd, { PI_CONFIG => $pi_config }, { 3 => $sock }); $host_port = tcp_host_port($sock); $s = tcp_connect($sock); } my $buf = $s->getline; like($buf, qr/\A201 .* ready - post via email\r\n/s, 'got greeting'); my $t = timeit(10, sub { ok($s->print("GROUP $group\r\n"), 'changed group'); $buf = $s->getline; }); diag 'GROUP took: ' . timestr($t); my ($tot, $min, $max) = ($buf =~ /\A211 (\d+) (\d+) (\d+) /); ok($tot && $min && $max, 'got GROUP response'); my $nr = $max - $min; my $nmax = 50000; my $nmin = $max - $nmax; $nmin = $min if $nmin < $min; my $res; my $spec = "$nmin-$max"; my $n; sub read_until_dot ($) { my $n = 0; do { $buf = $s->getline; ++$n } until $buf eq ".\r\n"; $n; } $t = timeit(1, sub { $s->print("XOVER $spec\r\n"); $n = read_until_dot($s); }); diag 'xover took: ' . timestr($t) . " for $n"; $t = timeit(1, sub { $s->print("HDR From $spec\r\n"); $n = read_until_dot($s); }); diag "XHDR From ". timestr($t) . " for $n"; my $date = $ENV{NEWNEWS_DATE}; unless ($date) { my (undef, undef, undef, $d, $m, $y) = gmtime(time - 30 * 86400); $date = sprintf('%04u%02u%02u', $y + 1900, $m + 1, $d); diag "NEWNEWS_DATE undefined, using $date"; } $t = timeit(1, sub { $s->print("NEWNEWS * $date 000000 GMT\r\n"); $n = read_until_dot($s); }); diag 'newnews took: ' . timestr($t) . " for $n"; if ($s) { $s->print("QUIT\r\n"); $s->getline; } done_testing(); 1; public-inbox-1.9.0/xt/perf-obfuscate.t000066400000000000000000000032241430031475700176460ustar00rootroot00000000000000#!perl -w # Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my $obfuscate = $ENV{PI_OBFUSCATE} ? 1 : 0; diag "obfuscate=$obfuscate\n"; my @cat = qw(cat-file --buffer --batch-check --batch-all-objects); if (require_git(2.19, 1)) { push @cat, '--unordered'; } else { warn "git <2.19, cat-file lacks --unordered, locality suffers\n"; } require_mods qw(Plack::Util); use_ok 'Plack::Util'; my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'name' , obfuscate => $obfuscate}); my $git = $ibx->git; my $fh = $git->popen(@cat); my $vec = ''; vec($vec, fileno($fh), 1) = 1; select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; my $ctx = { env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, ibx => $ibx, www => Plack::Util::inline_object(style => sub {''}), }; my ($mime, $res, $oid, $type); my $n = 0; my $obuf = ''; my $m = 0; my $cb = sub { $mime = PublicInbox::Eml->new(shift); PublicInbox::View::multipart_text_as_html($mime, $ctx); ++$m; $obuf = ''; }; my $t = timeit(1, sub { $ctx->{obuf} = \$obuf; $ctx->{mhref} = '../'; while (<$fh>) { ($oid, $type) = split / /; next if $type ne 'blob'; ++$n; $git->cat_async($oid, $cb); } $git->async_wait_all; }); diag 'multipart_text_as_html took '.timestr($t)." for $n <=> $m messages"; is($m, $n, 'rendered all messages'); done_testing(); public-inbox-1.9.0/xt/perf-threading.t000066400000000000000000000017061430031475700176430ustar00rootroot00000000000000# Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ # # real-world testing of search threading use strict; use warnings; use Test::More; use Benchmark qw(:all); use PublicInbox::Inbox; my $inboxdir = $ENV{GIANT_INBOX_DIR} // $ENV{GIANT_PI_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir }); eval { require PublicInbox::Search }; my $srch = $ibx->search; plan skip_all => "$inboxdir not configured for search $0 $@" unless $srch; require PublicInbox::View; my $msgs; my $elapsed = timeit(1, sub { $msgs = $ibx->over->recent({limit => 200000}); }); my $n = scalar(@$msgs); ok($n, 'got some messages'); diag "enquire: ".timestr($elapsed)." for $n"; $elapsed = timeit(1, sub { PublicInbox::View::thread_results({ibx => $ibx}, $msgs); }); diag "thread_results ".timestr($elapsed); done_testing(); public-inbox-1.9.0/xt/pop3d-mpop.t000066400000000000000000000043111430031475700167350ustar00rootroot00000000000000#!perl -w # Copyright (C) all contributors # License: AGPL-3.0+ # ensure mpop compatibility use v5.12; use File::Path qw(make_path); use PublicInbox::TestCommon; use PublicInbox::Spawn qw(which spawn); my $inboxdir = $ENV{GIANT_INBOX_DIR}; (defined($inboxdir) && -d $inboxdir) or plan skip_all => "GIANT_INBOX_DIR not defined for $0"; plan skip_all => "bad characters in $inboxdir" if $inboxdir =~ m![^\w\.\-/]!; my $uuidgen = which('uuidgen') or plan skip_all => 'uuidgen(1) missing'; require_mods(qw(DBD::SQLite)); require_git('2.6'); # for v2 require_mods(qw(File::FcntlLock)) if $^O !~ /\A(?:linux|freebsd)\z/; my ($tmpdir, $for_destroy) = tmpdir(); my $cfg = "$tmpdir/cfg"; my $newsgroup = 'inbox.test'; my %pids; { open my $fh, '>', $cfg or xbail "open: $!"; print $fh < $cfg }; my $td = start_script($cmd, $env, { 3 => $sock }) or xbail "-xbail $?"; chomp(my $uuid = xqx([$uuidgen])); make_path("$tmpdir/home/.config/mpop", map { "$tmpdir/md/$_" } qw(new cur tmp)); SKIP: { my $mpop = which('mpop') or skip('mpop(1) missing', 1); open my $fh, '>', "$tmpdir/home/.config/mpop/config" or xbail "open $!"; chmod 0600, $fh; print $fh <sockhost} port ${\$sock->sockport} user $uuid\@$newsgroup auth user password anonymous received_header off EOM close $fh or xbail "close $!"; delete local $ENV{XDG_CONFIG_HOME}; # mpop uses this local $ENV{HOME} = "$tmpdir/home"; my $cmd = [ $mpop, '-q' ]; my $pid = spawn($cmd, undef, { 1 => 2 }); $pids{$pid} = $cmd; } while (scalar keys %pids) { my $pid = waitpid(-1, 0) or next; my $cmd = delete $pids{$pid} or next; is($?, 0, join(' ', @$cmd, 'done')); } $td->kill; $td->join; is($?, 0, 'no error on -pop3d exit'); done_testing; public-inbox-1.9.0/xt/solver.t000066400000000000000000000041441430031475700162550ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020-2021 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Config; # this relies on PI_CONFIG // ~/.public-inbox/config my @psgi = qw(HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); require_mods(qw(DBD::SQLite Search::Xapian), @psgi); use_ok($_) for @psgi; use_ok 'PublicInbox::WWW'; my $cfg = PublicInbox::Config->new; my $www = PublicInbox::WWW->new($cfg); my $app = sub { my $env = shift; $env->{'psgi.errors'} = \*STDERR; $www->call($env); }; # TODO: convert these to self-contained test cases my $todo = { 'git' => [ '9e9048b02bd04d287461543d85db0bb715b89f8c' .'/s/?b=t%2Ft3420%2Fremove-ids.sed', 'eebf7a8/s/?b=t%2Ftest-lib.sh', 'eb580ca513/s/?b=remote-odb.c', '776fa90f7f/s/?b=contrib/git-jump/git-jump', '5cd8845/s/?b=submodule.c', '81c1164ae5/s/?b=builtin/log.c', '6aa8857a11/s/?b=protocol.c', '96f1c7f/s/', # TODO: b=contrib/completion/git-completion.bash 'b76f2c0/s/?b=po/zh_CN.po', ], }; my ($ibx_name, $urls, @gone); my $client = sub { my ($cb) = @_; for (@$urls) { my $url = "/$ibx_name/$_"; my $res = $cb->(GET($url)); is($res->code, 200, $url); next if $res->code == 200; # diag $res->content; diag "$url failed"; } }; my $nr = 0; while (($ibx_name, $urls) = each %$todo) { SKIP: { if (!$cfg->lookup_name($ibx_name)) { push @gone, $ibx_name; skip("$ibx_name not configured", scalar(@$urls)); } test_psgi($app, $client); $nr++; } } SKIP: { require_mods(qw(Plack::Test::ExternalServer), $nr); delete @$todo{@gone}; my $sock = tcp_server() or BAIL_OUT $!; my ($tmpdir, $for_destroy) = tmpdir(); my ($out, $err) = map { "$tmpdir/std$_.log" } qw(out err); my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, undef, { 3 => $sock }); my ($h, $p) = tcp_host_port($sock); local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; while (($ibx_name, $urls) = each %$todo) { Plack::Test::ExternalServer::test_psgi(client => $client); } } done_testing(); public-inbox-1.9.0/xt/stress-sharedkv.t000066400000000000000000000022341430031475700200710ustar00rootroot00000000000000# Copyright (C) 2021 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use Benchmark qw(:all); use PublicInbox::TestCommon; require_ok 'PublicInbox::SharedKV'; my ($tmpdir, $for_destroy) = tmpdir(); local $ENV{TMPDIR} = $tmpdir; my $skv = PublicInbox::SharedKV->new; my $ipc = bless {}, 'StressSharedKV'; $ipc->wq_workers_start('stress-sharedkv', $ENV{TEST_NPROC}//4); my $nr = $ENV{TEST_STRESS_NR} // 100_000; my $ios = []; my $t = timeit(1, sub { for my $i (1..$nr) { $ipc->wq_io_do('test_set_maybe', $ios, $skv, $i); $ipc->wq_io_do('test_set_maybe', $ios, $skv, $i); } }); diag "$nr sets done ".timestr($t); for my $w ($ipc->wq_workers) { $ipc->wq_io_do('test_skv_done', $ios); } diag "done requested"; $ipc->wq_close; done_testing; package StressSharedKV; use strict; use v5.10.1; use parent qw(PublicInbox::IPC); use Digest::SHA qw(sha1); sub test_set_maybe { my ($self, $skv, $i) = @_; my $wcb = $self->{wcb} //= do { $skv->dbh; sub { $skv->set_maybe(sha1($_[0]), '') }; }; $wcb->($i + time); } sub test_skv_done { my ($self) = @_; delete $self->{wcb}; }