pax_global_header00006660000000000000000000000064137734612030014520gustar00rootroot0000000000000052 comment=3bacac503f6ff0bcf19aa581151c9c89fa35fe55 public-inbox-1.6.1/000077500000000000000000000000001377346120300141205ustar00rootroot00000000000000public-inbox-1.6.1/.gitattributes000066400000000000000000000001051377346120300170070ustar00rootroot00000000000000# Email signatures start with "-- \n" *.eml whitespace=-blank-at-eol public-inbox-1.6.1/.gitignore000066400000000000000000000002711377346120300161100ustar00rootroot00000000000000/.prove /.proverc /config.mak /MANIFEST.gen /Makefile.old /pm_to_blib /MYMETA.* /Makefile /blib /cover_db *.1 *.5 *.7 *.8 *.html *.gz .*.cols .*.check /NEWS.html /NEWS.atom /NEWS *.log public-inbox-1.6.1/AUTHORS000066400000000000000000000003421377346120300151670ustar00rootroot00000000000000This list only includes major contributors. See history in git (via `git clone https://public-inbox.org/public-inbox.git') for a full history of the project. * Eric Wong (BDFL) * The Linux Foundation (v2 work) public-inbox-1.6.1/COPYING000066400000000000000000001033301377346120300151530ustar00rootroot00000000000000 GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software. A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public. The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version. An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU Affero General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Remote Network Interaction; Use with the GNU General Public License. Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a "Source" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements. You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see . public-inbox-1.6.1/Documentation/000077500000000000000000000000001377346120300167315ustar00rootroot00000000000000public-inbox-1.6.1/Documentation/.gitignore000066400000000000000000000001011377346120300207110ustar00rootroot00000000000000/public-inbox-*.txt /public-inbox.cgi.txt /standards.txt /.*.txt public-inbox-1.6.1/Documentation/RelNotes/000077500000000000000000000000001377346120300204645ustar00rootroot00000000000000public-inbox-1.6.1/Documentation/RelNotes/v1.0.0.eml000066400000000000000000000013351377346120300220070ustar00rootroot00000000000000From e@80x24.org Thu Feb 8 02:33:57 2018 Date: Thu, 8 Feb 2018 02:33:57 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.0.0 Message-ID: <20180208023357.GA32591@80x24.org> After some 3.5 odd years of working on this, I suppose now is as good a time as any to tar this up and call it 1.0.0. The TODO list is still very long and there'll be some new development in coming weeks :> So, here you have a release: https://public-inbox.org/releases/public-inbox-1.0.0.tar.gz Checksums, mainly as a safeguard against accidental file corruption: SHA-256 4a08569f3d99310f713bb32bec0aa4819d6b41871e0421ec4eec0657a5582216 (in other words, don't trust me; instead read the code :>) public-inbox-1.6.1/Documentation/RelNotes/v1.1.0-pre1.eml000066400000000000000000000347621377346120300226670ustar00rootroot00000000000000From e@80x24.org Wed May 9 20:23:03 2018 Date: Wed, 9 May 2018 20:23:03 +0000 From: Eric Wong To: meta@public-inbox.org Cc: Konstantin Ryabitsev Subject: [ANNOUNCE] public-inbox 1.1.0-pre1 Message-ID: <20180509202303.GA15156@dcvr> Pre-release for v2 repository support. Thanks to The Linux Foundation for supporting this work! https://public-inbox.org/releases/public-inbox-1.1.0-pre1.tar.gz SHA-256: d0023770a63ca109e6fe2c58b04c58987d4f81572ac69d18f95d6af0915fa009 (only intended to guard against accidental file corruption) shortlog below: Eric Wong (27): nntp: improve fairness during XOVER and similar commands nntp: do not drain rbuf if there is a command pending extmsg: use news.gmane.org for Message-ID lookups searchview: fix non-numeric comparison mbox: do not barf on queries which return no results nntp: allow and ignore empty commands ensure SQLite and Xapian files respect core.sharedRepository TODO: a few more updates filter/rubylang: do not set altid on spam training import: cleanup git cat-file processes when ->done disallow "\t" and "\n" in OVER headers searchidx: release lock again during v1 batch callback searchidx: remove leftover debugging code convert: copy description and git config from v1 repo view: untangle loop when showing message headers view: wrap To: and Cc: headers in HTML display view: drop redundant References: display code TODO: add EPOLLEXCLUSIVE item searchview: do not blindly append "l" parameter to URL search: avoid repeated mbox results from search msgmap: add limit to response for NNTP thread: prevent hidden threads in /$INBOX/ landing page thread: sort incoming messages by Date searchidx: preserve umask when starting/committing transactions scripts/import_slrnspool: support v2 repos scripts/import_slrnspool: cleanup progress messages public-inbox 1.1.0-pre1 Eric Wong (Contractor, The Linux Foundation) (239): AUTHORS: add The Linux Foundation watch_maildir: allow '-' in mail filename scripts/import_vger_from_mbox: relax From_ line match slightly import: stop writing legacy ssoma.index by default import: begin supporting this without ssoma.lock import: initial handling for v2 t/import: test for last_object_id insertion content_id: add test case searchmsg: add mid_mime import for _extract_mid scripts/import_vger_from_mbox: support --dry-run option import: APIs to support v2 use search: free up 'Q' prefix for a real unique identifier searchidx: fix comment around next_thread_id address: extract more characters from email addresses import: pass "raw" dates to git-fast-import(1) scripts/import_vger_from_mbox: use v2 layout for import import: quiet down warnings from bogus From: lines import: allow the epoch (0s) as a valid time extmsg: fix broken Xapian MID lookup search: stop assuming Message-ID is unique www: stop assuming mainrepo == git_dir v2writable: initial cut for repo-rotation git: reload alternates file on missing blob v2: support Xapian + SQLite indexing import_vger_from_inbox: allow "-V" option import_vger_from_mbox: use PublicInbox::MIME and avoid clobbering v2: parallelize Xapian indexing v2writable: round-robin to partitions based on article number searchidxpart: increase pipe size for partitions v2writable: warn on duplicate Message-IDs searchidx: do not modify Xapian DB while iterating v2/ui: some hacky things to get the PSGI UI to show up v2/ui: retry DB reopens in a few more places v2writable: cleanup unused pipes in partitions searchidxpart: binmode use PublicInbox::MIME consistently searchidxpart: chomp line before splitting searchidx*: name child subprocesses searchidx: get rid of pointless index_blob wrapper view: remove X-PI-TS reference searchidxthread: load doc data for references searchidxpart: force integers into add_message search: reopen skeleton DB as well searchidx: index values in the threader search: use different Enquire object for skeleton queries rename SearchIdxThread to SearchIdxSkeleton v2writable: commit to skeleton via remote partitions searchidxskeleton: extra error checking searchidx: do not modify Xapian DB while iterating search: query_xover uses skeleton DB iff available v2/ui: get nntpd and init tests running on v2 v2writable: delete ::Import obj when ->done search: remove informational "warning" message searchidx: add PID to error message when die-ing content_id: special treatment for Message-Id headers evcleanup: disable outside of daemon v2writable: deduplicate detection on add evcleanup: do not create event loop if nothing was registered mid: add `mids' and `references' methods for extraction content_id: use `mids' and `references' for MID extraction searchidx: use new `references' method for parsing References content_id: no need to be human-friendly v2writable: inject new Message-IDs on true duplicates search: revert to using 'Q' as a uniQue id per-Xapian conventions searchidx: support indexing multiple MIDs mid: be strict with References, but loose on Message-Id searchidx: avoid excessive XNQ indexing with diffs searchidxskeleton: add a note about locking v2writable: generated Message-ID goes first searchidx: use add_boolean_term for internal terms searchidx: add NNTP article number as a searchable term mid: truncate excessively long MIDs early nntp: use NNTP article numbers for lookups nntp: fix NEWNEWS command searchidx: store the primary MID in doc data for NNTP import: consolidate object info for v2 imports v2: avoid redundant/repeated configs for git partition repos INSTALL: document more optional dependencies search: favor skeleton DB for lookup_mail search: each_smsg_by_mid uses skeleton if available v2writable: remove unnecessary skeleton commit favor Received: date over Date: header globally import: fall back to Sender for extracting name and email scripts/import_vger_from_mbox: perform mboxrd or mboxo escaping v2writable: detect and use previous partition count extmsg: rework partial MID matching to favor current inbox extmsg: rework partial MID matching to favor current inbox content_id: use Sender header if From is not available v2writable: support "barrier" operation to avoid reforking use string ref for Email::Simple->new v2writable: remove unnecessary idx_init call searchidx: do not delete documents while iterating search: allow ->reopen to be chainable v2writable: implement remove correctly skeleton: barrier init requires a lock import: (v2) delete writes the blob into history in subdir import: (v2): write deletes to a separate '_' subdirectory import: implement barrier operation for v1 repos mid: mid_mime uses v2-compatible mids function watchmaildir: use content_digest to generate Message-Id import: force Message-ID generation for v1 here import: switch to URL-safe Base64 for Message-IDs v2writable: test for idempotent removals import: enable locking under v2 index: s/GIT_DIR/REPO_DIR/ Lock: new base class for writable lockers t/watch_maildir: note the reason for FIFO creation v2writable: ensure ->done is idempotent watchmaildir: support v2 repositories searchidxpart: s/barrier/remote_barrier/ v2writable: allow disabling parallelization scripts/import_vger_from_mbox: filter out same headers as MDA v2writable: add DEBUG_DIFF env support v2writable: remove "resent" message for duplicate Message-IDs content_id: do not take Message-Id into account introduce InboxWritable class import: discard all the same headers as MDA InboxWritable: add mbox/maildir parsing + import logic use both Date: and Received: times msgmap: add tmp_clone to create an anonymous copy fix syntax warnings v2writable: support reindexing Xapian t/altid.t: extra tests for mid_set v2writable: add NNTP article number regeneration support v2writable: clarify header cleanups v2writable: DEBUG_DIFF respects $TMPDIR feed: $INBOX/new.atom endpoint supports v2 inboxes import: consolidate mid prepend logic, here www: $MESSAGE_ID/raw endpoint supports "duplicates" search: reopen DB if each_smsg_by_mid fails t/psgi_v2: minimal test for Atom feed and t.mbox.gz feed: fix new.html for v2 view: permalink (per-message) view shows multiple messages searchidx: warn about vivifying multiple ghosts v2writable: warn on unseen deleted files www: get rid of unnecessary 'inbox' name reference searchview: remove unnecessary imports from MID module view: depend on SearchMsg for Message-ID http: fix modification of read-only value githttpbackend: avoid infinite loop on generic PSGI servers www: support cloning individual v2 git partitions http: fix modification of read-only value githttpbackend: avoid infinite loop on generic PSGI servers www: remove unnecessary ghost checks v2writable: append, instead of prepending generated Message-ID lookup by Message-ID favors the "primary" one www: fix attachment downloads for conflicted Message-IDs searchmsg: document why we store To: and Cc: for NNTP public-inbox-convert: tool for converting old to new inboxes v2writable: support purging messages from git entirely search: cleanup uniqueness checking search: get rid of most lookup_* subroutines search: move find_doc_ids to searchidx v2writable: cleanup: get rid of unused fields mbox: avoid extracting Message-ID for linkification www: cleanup expensive fallback for legacy URLs view: get rid of some unnecessary imports search: retry_reopen on first_smsg_by_mid import: run_die supports redirects as spawn does v2writable: initializing an existing inbox is idempotent public-inbox-compact: new tool for driving xapian-compact mda: support v2 inboxes search: warn on reopens and die on total failure v2writable: allow gaps in git partitions v2writable: convert some fatal reindex errors to warnings wwwstream: flesh out clone instructions for v2 v2writable: go backwards through alternate Message-IDs view: speed up homepage loading time with date clamp view: drop load_results feed: optimize query for feeds, too msgtime: parse 3-digit years properly convert: avoid redundant "done\n" statement for fast-import search: move permissions handling to InboxWritable t/v2writable: use simplify permissions reading v2: respect core.sharedRepository in git configs searchidx: correct warning for over-vivification v2: one file, really v2writable: fix parallel termination truncate Message-IDs and References consistently scripts/import_vger_from_mbox: set address properly search: reduce columns stored in Xapian replace Xapian skeleton with SQLite overview DB v2writable: simplify barrier vs checkpoints t/over: test empty Subject: line matching www: rework query responses to avoid COUNT in SQLite over: speedup get_thread by avoiding JOIN nntp: fix NEWNEWS command t/thread-all.t: modernize test to support modern inboxes rename+rewrite test using Benchmark module nntp: make XOVER, XHDR, OVER, HDR and NEWNEWS faster view: avoid offset during pagination mbox: remove remaining OFFSET usage in SQLite msgmap: replace id_batch with ids_after nntp: simplify the long_response API searchidx: ensure duplicated Message-IDs can be linked together init: s/GIT_DIR/REPO_DIR/ in usage import: rewrite less history during purge v2: support incremental indexing + purge v2writable: do not modify DBs while iterating for ->remove v2writable: recount partitions after acquiring lock searchmsg: remove unused `tid' and `path' methods search: remove unnecessary OP_AND of query mbox: do not sort search results searchview: minor cleanup support altid mechanism for v2 compact: better handling of over.sqlite3* files v2writable: remove redundant remove from Over DB v2writable: allow tracking parallel versions v2writable: refer to git each repository as "epoch" over: use only supported and safe SQLite APIs search: index and allow searching by date-time altid: fix miscopied field name nntp: set Xref across multiple inboxes www: favor reading more from SQLite, and less from Xapian ensure Xapian and SQLite are still optional for v1 tests psgi: ensure /$INBOX/$MESSAGE_ID/T/ endpoint is chronological over: avoid excessive SELECT over: remove forked subprocess v2writable: reduce barriers index: allow specifying --jobs=0 to disable multiprocess convert: support converting with altid defined store less data in the Xapian document msgmap: speed up minmax with separate queries feed: respect feedmax, again v1: remove articles from overview DB compact: do not merge v2 repos by default v2writable: reduce partititions by one search: preserve References in Xapian smsg for x=t view v2: generate better Message-IDs for duplicates v2: improve deduplication checks import: cat_blob drops leading 'From ' lines like Inbox searchidx: regenerate and avoid article number gaps on full index extmsg: remove expensive git path checks use %H consistently to disable abbreviations searchidx: increase term positions for all text terms searchidx: revert default BATCH_BYTES to 1_000_000 Merge remote-tracking branch 'origin/master' into v2 fix tests to run without Xapian installed extmsg: use Xapian only for partial matches Jonathan Corbet (3): Don't use LIMIT in UPDATE statements Update the installation instructions with Fedora package names Allow specification of the number of search results to return -- git clone https://public-inbox.org/ public-inbox (working on a homepage... sorta :) public-inbox-1.6.1/Documentation/RelNotes/v1.2.0.eml000066400000000000000000000065771377346120300220260ustar00rootroot00000000000000From e@80x24.org Sun Nov 3 03:12:41 2019 Date: Sun, 3 Nov 2019 03:12:41 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.2.0 Content-Type: text/plain; charset=utf-8 Message-ID: <20191103030500.public-inbox-1.2.0@released> * first non-pre/rc release with v2 format support for scalability. See public-inbox-v2-format(5) manpage for more details. * new admin tools for v2 inboxes: - public-inbox-convert - converts v1 to v2 repo formats - public-inbox-compact - v2 convenience wrapper for xapian-compact(1) - public-inbox-purge - purges entire messages out of v2 history - public-inbox-edit - edits sensitive data out messages from v2 history - public-inbox-xcpdb - copydatabase(1) wrapper to upgrade Xapian formats (e.g. from "chert" to "glass") and resharding of v2 inboxes * SQLite3 support decoupled from Xapian support, and Xapian DBs may be configured without phrase support to save space. See "indexlevel" in public-inbox-config(5) manpage for more info. * codebase now uses Perl 5.10.1+ features (e.g. "//") * public-inbox-nntpd - support STARTTLS and NNTPS - support COMPRESS extension - fix several RFC3977 compliance bugs - improved interopability with picky clients such as leafnode and Alpine * public-inbox-watch - support multiple spam training directories - support mapping multiple inboxes per Maildir - List-ID header support (see "listid" in public-inbox-config(5)) * public-inbox-mda - List-ID header support (see above) * PublicInbox::WWW - grokmirror-compatible manifest.js.gz endpoint generation - user-configurable color support in $INBOX_URL/_/text/color/ - BOFHs may set default colors via "publicinbox.css" (see public-inbox-config(5)) - ability to map git code repositories and run cgit (see "coderepo" and "cgitrc" in public-inbox-config(5)) - able to recreate blobs with coderepo associations and Xapian - search results may be reversed - reduce memory usage when rendering large threads - syntax highlighting for patches and blobs * public-inbox-httpd / public-inbox-nntpd: - lower memory usage in C10K scenarios - buffers slow clients to filesystem (TMPDIR) instead of RAM - improved FreeBSD support - Danga::Socket is no longer a runtime dependency * many documentation updates, new manpages for: - PublicInbox::SaPlugin::ListMirror - public-inbox-init - public-inbox-learn * workaround memory leaks on Perl 5.16.3 (on CentOS/RHEL 7.x) Thanks to Ali Alnubani, Alyssa Ross, Amitai Schleier, Dave Taht, Dmitry Alexandrov, Eric W. Biederman, Jan Kiszka, Jonathan Corbet, Kyle Meyer, Leah Neukirchen, Mateusz Łoskot, Nicolás Ojeda Bär, SZEDER Gábor, Urs Janßen, Wang Kang, and edef for all their help, bug reports, patches and suggestions. Special thanks to Konstantin Ryabitsev and The Linux Foundation for their sponsorship and support over the past two years. https://public-inbox.org/releases/public-inbox-1.2.0.tar.gz SHA256: dabc735a5cfe396f457ac721559de26ae38abbaaa74612eb786e9e2e1ca94269 Chances are: You don't know me and never will. Everybody else can verify the tarball and sign a reply saying they've verified it, instead. The more who do this, the better, but don't trust the BOFH :P See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.6.1/Documentation/RelNotes/v1.3.0.eml000066400000000000000000000063571377346120300220230ustar00rootroot00000000000000From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.3.0 Date: Mon, 10 Feb 2020 05:52:41 +0000 Message-Id: <20200210055200.public-inbox-1.3.0-rele@sed> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Many internal improvements to improve the developer experience, long-term maintainability, ease-of-installation and compatibility. There are also several bugfixes. Some of the internal improvements involve avoiding Perl startup time in tests. "make check" now runs about 50% faster than before, and the new "make check-run" can be around 30% faster than "make check" after being primed by "make check". Most closures (anonymous subroutines) are purged from the -nntpd, -httpd and WWW code paths to make checking for memory leaks easier. * documentation now builds on BSD make * Date::Parse (TimeDate CPAN distribution) is now optional, allowing installation from OpenBSD systems via "pkg". * the work-in-progress Xapian.pm SWIG bindings are now supported in addition to the traditional Search::Xapian XS bindings. Only the SWIG bindings are packaged for OpenBSD. * Plack is optional for users who wish to avoid web-related components * Filesys::Notify::Simple is optional for non-watch users (but Plack will still pull it in) * improved internal error checking and reporting in numerous places * fixed Perl 5.10.1 compatibility (tested with Devel::PatchPerl) * IPC::Run and XML::Feed are no longer used in tests, though XML::TreePP becomes an optional test dependency. * Email::Address::XS used if available (newer Email::MIME requires it), it should handle more corner cases. * PublicInbox::WWW: - "nested" search results page now shows relevancy percentages - many solver bugs fixed - solver works on "-U0" patches using "git apply --unidiff-zero" - solver now compatible with git < v1.8.5 (but >= v1.8.0) - raw HTML no longer shown inline in multipart/alternative messages (v1.2.0 regression) - reduced memory usage for displaying multipart messages - static file responses support Last-Modified/If-Modified-Since - avoid trailing underlines in diffstat linkification - more consistent handling of messages without Subjects * public-inbox-httpd / public-inbox-nntpd: - MSG_MORE used consistently in long responses - fixed IO::KQueue usage on *BSDs - listen sockets are closed immediately on graceful shutdown - missed signals avoided with signalfd or EVFILT_SIGNAL - Linux x32 ABI support * public-inbox-nntpd: - Y2020 workaround for Time::Local * public-inbox-watch: - avoid memory leak from cyclic reference on SIGHUP - fix documentation of publicinboxwatch.watchspam * public-inbox-convert: - avoid article number jumps when converting indexed v1 inboxes * public-inbox-compact / public-inbox-xcpdb: - concurrent invocations of -compact and -xcpdb commands, not just -mda, -watch, -learn, -purge * examples/unsubscribe.milter: - support unique mailto: unsubscribe Release tarball available for download at: https://public-inbox.org/public-inbox.git/snapshot/public-inbox-1.3.0.tar.gz Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.6.1/Documentation/RelNotes/v1.4.0.eml000066400000000000000000000060371377346120300220170ustar00rootroot00000000000000Date: Fri, 17 Apr 2020 08:48:59 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.4.0 Message-ID: <20200417084800.public-inbox-1.4.0-rele@sed> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline This release focuses on reproducibility improvements and bugfixes for corner-cases. Busy instances of PublicInbox::WWW may also notice memory usage reductions. For rare messages lacking Date and/or Received headers, mirrors now fall back to using the git author/commit times to reindex them. This ensures search and filtering queries behave identically on mirrors as they do on the original machine. "altid" SQLite dumps are now accessible to all over the WWW interface via `POST /$INBOX/$ALTID.sql.gz'. Busy instances of PublicInbox::WWW (whether via public-inbox-httpd or another PSGI server) may notice significant memory usage reductions from the single message "permalink" lifetime optimization. There also ongoing work to improve memory lifetime management to reduce the potential for memory fragmentation in daemons. * general changes: - `include.*' directives in the public-inbox-config(5) file are now honored as documented in git-config(1), thanks to Andreas Rottmann. - `+0000' is assumed for dates missing TZ offsets; thanks to Leah Neukirchen for spotting this regression from v1.2.0. - `<' and `>' characters are dropped to avoid errors in git in addresses for git, thanks again to Leah for noticing this long-standing bug. * PublicInbox::WWW: - memory reductions for message display and rendering - code preload improved to reduce memory fragmentation - remove redundant "a=" parameter in links to solver - escape '&' in hrefs properly - fix optional address obfuscation in search results - `POST /$INBOX/$ALTID.sql.gz' endpoint to retrieve SQLite dumps * public-inbox-httpd + public-inbox-nntpd: - fix SIGUSR2 upgrade in worker-less instances (-W0) * public-inbox-httpd: - fix RFC 7230 conformance when Content-Length and "chunked" are both specified * public-inbox-index: - reproduce original date and time stamps in mirrors for messages lacking Date: and/or Received: headers - new `--compact' (or `-c') switch to perform the equivalent of public-inbox-compact(1) after indexing each inbox * documentation: - add Documentation/technical/data_structures.txt for new hackers * scripts/import_vger_from_mbox: (not really a production-level script) - fix ">From" unescaping thanks to a bug report from Kyle Meyer Thanks to Andreas Rottmann, Leah Neukirchen and Kyle Meyer for their contributions to this release. Release tarball available for download over HTTPS or Tor .onion: https://yhbt.net/public-inbox.git/snapshot/public-inbox-1.4.0.tar.gz http://ou63pmih66umazou.onion/public-inbox.git/snapshot/public-inbox-1.4.0.tar.gz Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.6.1/Documentation/RelNotes/v1.5.0.eml000066400000000000000000000040251377346120300220130ustar00rootroot00000000000000From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.5.0 Date: Sun, 10 May 2020 07:04:00 +0000 Message-ID: <20200510.public-inbox-1.5.0-rele@sed> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline This release introduces a new pure-Perl lazy email parser, PublicInbox::Eml, which uses roughly 10% less memory and is up to 2x faster than Email::MIME. This is a major internal change Limits commonly enforced by MTAs are also enforced in the new parser, as messages may bypass MTA transports. Email::MIME and other Email::* modules are no longer dependencies nor used at all outside of maintainer validation tests. * public-inbox-index - `--max-size=SIZE' CLI switch and `publicinbox.indexMaxSize' config file option added to prevent indexing of overly large messages. - List-Id headers are indexed in new messages, old messages can be found after `--reindex'. * public-inbox-watch - multiple values of `publicinbox..watchheader' are now supported, thanks to Kyle Meyer - List-Id headers are matched case-insensitively as specified by RFC 2919 * PublicInbox::WWW - $INBOX_DIR/description and $INBOX_DIR/cloneurl are not memoized if missing - improved display of threads, thanks to Kyle Meyer - search for List-Id is available via `l:' prefix if indexed - all encodings are preloaded at startup to reduce fragmentation - diffstat linkification and highlighting are stricter and less likely to linkify tables in cover letters - fix hunk header links to solver which were off-by-one line, thanks again to Kyle Meyer Release tarball available for download over HTTPS or Tor .onion: https://yhbt.net/public-inbox.git/snapshot/public-inbox-1.5.0.tar.gz http://ou63pmih66umazou.onion/public-inbox.git/snapshot/public-inbox-1.5.0.tar.gz Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.6.1/Documentation/RelNotes/v1.6.0.eml000066400000000000000000000133221377346120300220140ustar00rootroot00000000000000From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.6.0 Date: Wed, 16 Sep 2020 20:03:09 +0000 Message-ID: <20200916200309.public-inbox-1.6.0-rele@sed> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline A big release containing several performance optimizations, a new anonymous IMAP server, and more. It represents an incremental improvement over 1.5 in several areas with more to come in 1.7. The read-only httpd and nntpd daemons no longer block the event loop when retrieving blobs from git, making better use of SMP systems while accomodating slow storage. Indexing can be now be tuned to give somewhat usable performance on HDD storage, though we can't defy the laws of physics, either. * General changes: - ~/.cache/public-inbox/inline-c is automatically used for Inline::C if it exists. PERL_INLINE_DIRECTORY in env remains supported and prioritized to support `nobody'-type users without HOME. - msgmap.sqlite3 uses journal_mode=TRUNCATE, matching over.sqlite3 behavior for a minor reduction in VFS traffic - public-inbox-tuning(7) - new manpage containing pointers to various tuning options and tips for certain HW and OS setups. - Copy-on-write is disabled on BTRFS for new indices to avoid fragmentation. See the new public-inbox-tuning(7) manpage. - message/{rfc822,news,global} attachments are decoded recursively and indexed for search. Reindexing (see below) is required to ensure these attachments are indexed in old messages. - inbox.lock (v2) and ssoma.lock (v1) files are written to on message delivery (or spam removal) to wake up read-only daemons via inotify or kqueue. - `--help' switch supported by command-line tools * Upgrading for new features in 1.6 The ordering of these steps is only necessary if you intend to use some new features in 1.6. Future releases may have different instructions (or be entirely transparent). 0. install (use your OS package manager, or "make install") 1. restart public-inbox-watch instances if you have any 2. Optional: remove Plack::Middleware::Deflater if you're using a custom .psgi file for PublicInbox::WWW. This only saves some memory and CPU cycles, and you may also skip this step if you expect to roll back to 1.5.0 for any reason. Steps 3a and 3b may happen in any order, 3b is optional and is only required to use new WWW and IMAP features. 3a. restart existing read-only daemons if you have them (public-inbox-nntpd, public-inbox-httpd) 3b. run "public-inbox-index -c --reindex --rethread --all" to reindex all configured inboxes 4. configure and start the new public-inbox-imapd. This requires reindexing in 3b, but there's no obligation to run an IMAP server, either. * public-inbox-index There are several new options to improve usability on slow, rotational storage. - `--batch-size=BYTES' or publicinbox.indexBatchSize parameter to reduce frequency of random writes on HDDs - `--sequential-shard' or publicInbox.sequentialShard parameter to improve OS page cache utilization on HDDs. - `--no-fsync' when combined with Xapian 1.4+ can be used to speed up indexing on SSDs and small (default) `--batch-size' - `--rethread' option to go with `--reindex' (use sparringly, see manpage) - parallelize v2 updates by default, `--sequential-shard' and `-j0' is (once again) allowed to disable parallelization - (re-)indexing parallelizes blob reads from git - `--all' may be specified to index all configured inboxes * public-inbox-learn - `rm' supports `--all' to remove from all configured inboxes * public-inbox-imapd - new read-only IMAP daemon similar to public-inbox-nntpd `AUTH=ANONYMOUS' is supported, but any username and password for clients without `AUTH=ANONYMOUS' support. * public-inbox-nntpd - blob reads from git are handled asynchronously * public-inbox-httpd - Plack::Middleware::Deflater is no longer loaded by default when no .psgi file is specified; PublicInbox::WWW can rely on gzip for buffering (see below) * PublicInbox::WWW - use consistent blank line around attachment links - Attachments in message/{rfc822,news,global} messages can be individually downloaded. Downloading the entire message/rfc822 file in full remains supported - $INBOX_DIR/description is treated as UTF-8 - HTML, Atom, and text/plain responses are gzipped without relying on Plack::Middleware::Deflater - Multi-message endpoints (/t.mbox.gz, /T/, /t/, etc) are ~10% faster when running under public-inbox-httpd with asynchronous blob retrieval - mbox search results may now include all messages pertaining to that thread. Needs `--reindex' mentioned above in `Upgrading for new features in 1.6'. - fix mbox.gz search results downloads for lynx users - small navigation tweaks, more prominent mirroring instructions * public-inbox-watch - Linux::Inotify2 or IO::KQueue is used directly, Filesys::Notify::Simple is no longer required - NNTP groups and IMAP mailboxes may be watched in addition to Maildirs (lightly tested). * Ongoing internal changes - reduce event loop hogging for many-inbox support - use more Perl v5.10-isms, future-proof against Perl 8 - more consistent variable and field naming, improve internal documentation and comments - start supporting >=40 char git identifiers for SHA-256 - test -httpd-specific code paths via Plack::Test::ExternalServer in addition to generic PSGI paths. Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.6.1/Documentation/RelNotes/v1.6.1.eml000066400000000000000000000042621377346120300220200ustar00rootroot00000000000000From: Eric Wong To: meta@public-inbox.org Subject: [ANNOUNCE] public-inbox 1.6.1 Date: Thu, 31 Dec 2020 23:45:56 +0000 Message-ID: <20201231234556.public-inbox-1.6.1-rele@sed> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline A small, bugfix release on top of 1.6.0 from September 2020. Bug fixes: * MIME header decoding no longer warns on undefined variables, with Perl <5.28. Thanks to a bug report by Ali Alnubani. https://public-inbox.org/meta/DM6PR12MB49106F8E3BD697B63B943A22DADB0@DM6PR12MB4910.namprd12.prod.outlook.com/ * Fixed a message threading bug thanks to a report from Kyle Meyer. "public-inbox-index --rethread --reindex" will be necessary in case of certain messages arrive out-of-order. Link: https://public-inbox.org/meta/87360nlc44.fsf@kyleam.com/ * WWW: per-inbox grokmirror manifests no longer return info for all inboxes, only the root /manifest.js.gz includes all inboxes. This regression appeared in 1.6. * public-inbox-mda matches List-Id headers insensitively, matching public-inbox-watch behavior. Similarly, List-Id is always indexed lower-cased for boolean matches to avoid matching an incorrect term. * Newsgroup and Path NNTP headers are now emitted in conformance with RFC 5536 3.1.[45]. Thanks to Andrey Melnikov for the report: https://public-inbox.org/meta/CA+PODjpUN5Q4gBFQhAzUNuMasVEdmp9f=8Uo0Ej0mFumdSwi4w@mail.gmail.com/ * Inotify fixes for public-inbox-imapd users relying on SIGHUP reloads and thousands of watches. * Read-only daemon fixes around TLS and Linux <4.5 systems Bugfixes with minor behavior changes: * The X-Status mbox header is now excluded from imports, just like the Status: header has been for many years. They have no place in public archives and can be privacy concern for people sharing archives. * WWW prevents deep-linking to attachments to limit abuse vectors. Noticed by Leah Neukirchen: https://public-inbox.org/meta/87imagyap9.fsf@vuxu.org/ There are also several ocumentation fixes from Uwe Kleine-König and Kyle Meyer. Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. public-inbox-1.6.1/Documentation/RelNotes/v1.7.0.wip000066400000000000000000000007131377346120300220370ustar00rootroot00000000000000To: meta@public-inbox.org Subject: [WIP] public-inbox 1.7.0 MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline TODO: gcf2, detached indices, JMAP, ... Compatibility: * Rollbacks all the way to public-inbox 1.2.0 remain supported Please report bugs via plain-text mail to: meta@public-inbox.org See archives at https://public-inbox.org/meta/ for all history. See https://public-inbox.org/TODO for what the future holds. public-inbox-1.6.1/Documentation/clients.txt000066400000000000000000000027701377346120300211410ustar00rootroot00000000000000clients and tools related to public-inbox ----------------------------------------- While public-inbox exposes NNTP and gzipped mboxrd over HTTP, some public-inbox-specific/aware tools have sprung up. Below is a non-exhaustive list of them. Feel free to send additions, corrections and discussions to meta@public-inbox.org Discussions will be visible from our own public-inbox instance: https://public-inbox.org/meta/ Disclaimer: public-inbox itself comes with no warranty or guarantees; so don't treat any of these links as endorsements, either. * l2md - Maildir and procmail importer using C + libgit2 https://git.kernel.org/pub/scm/linux/kernel/git/dborkman/l2md.git * b4 - helper utility for patch-based workflows https://git.kernel.org/pub/scm/utils/b4/b4.git * impibe - Perl script to import v1 or v2 to Maildir https://leahneukirchen.org/dotfiles/bin/impibe discussion: https://public-inbox.org/meta/87v9m0l8t1.fsf@vuxu.org/ * kernel.org helpers - various scripts used by *.kernel.org https://git.kernel.org/pub/scm/linux/kernel/git/mricon/korg-helpers.git * grokmirror - git mirroring tool (not public-inbox-specific) https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git * ssoma - v1 only, abandoned in favor of NNTP https://80x24.org/ssoma.git * piem - Emacs tools for working with public-index (and b4) https://git.kyleam.com/piem/about/ There's also a bunch of random scripts in the scripts/ directory of our source tree at: git clone https://public-inbox.org/public-inbox.git public-inbox-1.6.1/Documentation/dc-dlvr-spam-flow.txt000066400000000000000000000040431377346120300227310ustar00rootroot00000000000000dc-dlvr spam/ham training system flow ------------------------------------- An overview of the Maildir + inotify-based spam training system Eric uses on his mail server. This idea may be implemented for kqueue-based systems, too. The idea is to use inotify (via incron) to watch for new files appearing in Maildirs. We only want to train seen messages as ham, and old (but not necessarily seen) messages as spam. The overall goal of this is to allow a user to train their filters without leaving his favorite mail user agent. Every message written to Maildir involves a rename, so we only have incron watch for IN_MOVED_TO events. The generic flow is as follows, all for a single Unix user account: incron -> report-spam +-> sendmail -> MTA -> dc-dlvr -> spamc -> spamd | V ... For public-inbox, Eric uses a separate Unix account ("pi") to add a layer of protection from fat-fingering something. So his report-spam script delivers to a second recipient for training, the "pi" user: ... | +-> sendmail -> MTA -> dc-dlvr | V ~pi/.dc-dlvr.pre | V public-inbox-learn public-inbox-learn will then internally handle the "spamc -> spamd" delivery path as well as removing the message from the git tree. * incron - run commands based on filesystem events: http://incron.aiken.cz/ * sendmail / MTA - we use and recommend use postfix, which includes a sendmail-compatible wrapper: http://www.postfix.org/ * spamc / spamd - SpamAssassin: http://spamassassin.apache.org/ * report-spam / dc-dlvr - distributed with public-inbox in the scripts/ directory: git clone https://public-inbox.org/public-inbox.git public-inbox-1.6.1/Documentation/design_notes.txt000066400000000000000000000126551377346120300221640ustar00rootroot00000000000000public-inbox design notes ------------------------- Challenges to running normal mailing lists ------------------------------------------ 1) spam 2) bounce processing of invalid/bad email addresses 3) processing subscribe/unsubscribe requests Issues 2) and 3) are side-stepped entirely by moving reader subscriptions to git repository synchronization and Atom feeds. There's no chance of faked subscription requests and no need to deal with confused users who cannot unsubscribe. Use existing infrastructure --------------------------- * public-inbox can coexist with existing mailing lists, any subscriber to the existing mailing list can begin delivering messages to public-inbox-mda(1) * public-inbox uses SMTP for posting. Posting a message to a public-inbox instance is no different than sending a message to any _open_ mailing list. * Existing spam filtering on an SMTP server is also effective on public-inbox. * Readers may continue using use their choice of NNTP and mail clients. * Atom is a reasonable feed format for casual readers and is supported by a variety of feed readers. Why email? ---------- * Freedom from proprietary services, tools and APIs. Communicating with developers and users of Free Software should not rely on proprietary tools or services. * Existing infrastructure, tools, and user familiarity. There is already a large variety of tools, clients, and email providers available. There are also many resources for users to run their own SMTP server on a domain they control. * All public discussion mediums are affected by spam and advertising. There exist several good Free Software anti-spam tools for email. * Privacy is not an issue for public discussion. Public mailing list archives are common and accepted by Free Software communities. There is no need to ask the NSA for backups of your mail archives :) * git, one of the most widely-used version control systems, includes many tools for for email, including: git-format-patch(1), git-send-email(1), git-am(1), git-imap-send(1). Furthermore, the development of git itself is based on the git mailing list: https://public-inbox.org/git/ (or http://hjrcffqmbrq6wope.onion/git/ for Tor users) * Email is already the de-facto form of communication in many Free Software communities.. * Fallback/transition to private email and other lists, in case the public-inbox host becomes unavailable, users may still directly email each other (or Cc: lists for related/dependent projects). Why git? -------- * git is distributed and robust while being both fast and space-efficient with text data. NNTP was considered, but does not support delta-compression and places no guarantees on data/transport integrity. However, a read-only NNTP gateway is implemented. * As of 2016, git is widely used and known to nearly all Free Software developers. For non-developers it is packaged for all major GNU/Linux and *BSD distributions. NNTP is not as widely-used nowadays. Why perl 5? ----------- * Perl 5 is widely available on modern *nix systems with good a history of backwards and forward compatibility. * git and SpamAssassin both use it, so it should be one less thing for admins to install and waste disk space with. * Distributing compiled binaries has higher costs for storage/cache space is required for each architecture. Having a runnable, source-only distribution means any user already has access to all of our source. Laziness -------- * Stick to dependencies available in Debian main, this should make it easier for potential users to install, and easier for distro maintainers to pick up. * A list server being turned into an SMTP spam relay and being blacklisted while an admin is asleep is scary. Sidestep that entirely by having clients pull. * Eric has a great Maildir+inotify-based Bayes training setup going back many years. Document, integrate and publicize it for public-inbox usage, encouraging other admins to use it (it works as long as admins read their public-inbox). * Custom, difficult-for-Bayes requires custom anti-spam rules. We may steal rules from the Debian listmasters: svn://anonscm.debian.org/pkg-listmaster * Full archives are easily distributable with git, so somebody else can take over the list if we give up. Anybody may also run an SMTP notifier/delivery service based on the archives. * Avoids bikeshedding about web UI decisions, GUI-lovers can write their own GUI-friendly interfaces (HTML or native) based on public archives. Web notes --------- * Getting users to install/run any new tool is difficult. The web views must be easily read/cache/mirror-able. * There may also be a significant number of webmail users without an MUA or feed reader; so a web view is necessary. * Expose Message-ID in web views to encourage replies from drive-by contributors. * Raw text endpoint allows users to write client-side endpoints without hosting the data themselves (or on a different server). What sucks about public-inbox ----------------------------- * Lack of push notification. On the other hand, feeds seem popular. * some (mostly GUI) mail clients cannot set In-Reply-To headers properly without the original message. Scalability notes ----------------- See the public-inbox-v2-format(5) manpage for all the scalability problems solved. Copyright --------- Copyright 2013-2020 all contributors License: AGPL-3.0+ public-inbox-1.6.1/Documentation/design_www.txt000066400000000000000000000130661377346120300216550ustar00rootroot00000000000000PublicInbox::WWW (PSGI interface) design notes URL and anchor naming --------------------- ### Unstable endpoints /$INBOX/?r=$GIT_COMMIT -> HTML only /$INBOX/new.atom -> Atom feed #### Optional, relies on Search::Xapian (or Xapian SWIG binding) /$INBOX/$MESSAGE_ID/t/ -> HTML content of thread (nested) /$INBOX/$MESSAGE_ID/T/ -> HTML content of thread (flat) anchors: #u location of $MESSAGE_ID in URL #m per-message links, where is of the Message-ID of each message (stable) #s relative numeric position of message in thread (unstable) #i<...> diffstat location for patch emails #Z?<...> per-file diff header location for patch emails /$INBOX/$MESSAGE_ID/t.atom -> Atom feed for thread /$INBOX/$MESSAGE_ID/t.mbox.gz -> gzipped mbox of thread /$INBOX/$GIT_OID/s/ -> "git show" (via "git apply") This endpoint requires "coderepo" entries configured for a given inbox. It can recreate ("solve") blobs from patch emails using Xapian and git-apply(1). It can also display non-blob content, but that remains a work-in-progress. /$INBOX/$GIT_OID/s/$FILENAME -> "git show", raw output As above, but shows the raw (usually text/plain) output. ### Stable endpoints /$INBOX/$MESSAGE_ID/ -> HTML content anchors: #r location of the current message in thread skeleton (requires Xapian search) #b start of the message body (linked from thread skeleton) /$INBOX/$MESSAGE_ID -> 301 to /$INBOX/$MESSAGE_ID/ /$INBOX/$MESSAGE_ID/raw -> raw mbox /$INBOX/$MESSAGE_ID/#R -> HTML reply instructions # Covering up a pre-1.0 design mistake: /$INBOX/$MESSAGE_ID/f/ -> 301 to /$INBOX/$MESSAGE_ID/ ### Legacy endpoints (may be ambiguous given Message-IDs with similar suffixes) /$INBOX/m/$MESSAGE_ID/ -> 301 to /$INBOX/$MESSAGE_ID/ /$INBOX/m/$MESSAGE_ID.html -> 301 to /$INBOX/$MESSAGE_ID/ /$INBOX/m/$MESSAGE_ID.txt -> 301 to /$INBOX/$MESSAGE_ID/raw /$INBOX/f/$MESSAGE_ID.html -> 301 to /$INBOX/$MESSAGE_ID/ /$INBOX/f/$MESSAGE_ID.txt [1] -> 301 to /$INBOX/$MESSAGE_ID/raw /$INBOX/atom.xml [2] -> identical to /$INBOX/new.atom Additionally, we support git clone/fetch over HTTP (dumb and smart): git clone --mirror http://$HOSTNAME/$INBOX FIXME: we must refactor/cleanup/add tests for most of our CGI before adding more endpoints and features. [1] These URLs were never linked, but only exist as a convenience to folks who edit existing URLs [2] Do not make this into a 301 since feed readers may not follow them as well as normal browsers do. Encoding notes -------------- Raw HTML and XML should only contain us-ascii characters which render to UTF-8. We must not rely on users having the necessary fonts installed to render uncommon characters. Plain text (raw message) endpoints display in the original encoding(s) of the original email. Offline friendly ---------------- The "/t/", "/T/", "t.mbox.gz" endpoints are designed to be useful for reading long threads for users with intermittent connections or saved for offline viewing. Date displays are always absolute, not the "X hours ago" pattern commonly seen because readers may be reading a previously-saved or cached copy. HTML URLs end with '/' or "$FILENAME.html". The reason many URLs end with the '/' character is so it can trivially be saved to a directory via wget or similar tools as "index.html", making it easy to mirror all files ending in ".html" using any static web server. Guidelines for using limited HTML --------------------------------- We mainly use HTML for linking pages together with . We also set to make window management easier. We favor <pre>-formatted text since public-inbox is intended as a place to share and discuss patches and code. Unfortunately, long paragraphs tends to be less readable with fixed-width serif fonts which GUI browsers default to. * No graphics, images, or icons at all. We tolerate, but do not encourage the use of GUIs. * No setting font sizes, power to users to decide those. We will include and document <span class=?> to support colors for user-supplied CSS. * Only one font type: fixed. This is for accessibility, we must not blow certain elements out-of-proportion with different fonts on the page when a reader increases font size. * Bold and underline elements are OK since they should render fine regardless of chosen font and gracefully degrade if a display does not support them. Italics and strike-through elements must be avoided as they do not render well with some displays or user-chosen fonts. * No JavaScript. JS is historically too buggy and insecure, and we will never expect our readers to do either of the following: a) read and audit all our code for on every single page load b) trust us and and run code without reading it * We only use CSS for one reason: wrapping pre-formatted text This is necessary because unfortunate GUI browsers tend to be prone to layout widening from unwrapped mailers. Do not expect CSS to be enabled, especially with scary things like: https://thejh.net/misc/website-terminal-copy-paste However, we will try to make it easy for users to supply their own colors via user-side CSS. CSS classes (for user-supplied CSS) ----------------------------------- See examples in contrib/css/ and lib/PublicInbox/WwwText.pm (or https://public-inbox.org/meta/_/text/color/ soon) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/extman.perl��������������������������������������������������������0000775�0000000�0000000�00000002335�13773461203�0021117�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/perl -w # Copyright (C) 2019-2020 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # prints a manpage to stdout use strict; my $xapmsg = 'See https://xapian.org/ for more information on Xapian'; my $usage = "$0 /path/to/manpage.SECTION.txt"; my $manpage = shift or die $usage; my $MAN = $ENV{MAN} || 'man'; my @args; $manpage = (split('/', $manpage))[-1]; $manpage =~ s/\.txt\z//; $manpage =~ s/\A\.//; # no leading dot (see Documentation/include.mk) $manpage =~ s/\.(\d+.*)\z// and push @args, $1; # section push @args, $manpage; # don't use UTF-8 characters which readers may not have fonts for $ENV{LC_ALL} = $ENV{LANG} = 'C'; $ENV{COLUMNS} = '76'; # same as pod2text default $ENV{PAGER} = 'cat'; my $cmd = join(' ', $MAN, @args); system($MAN, @args) and die "$cmd failed: $!\n"; $manpage =~ /\A(?:copydatabase|xapian-compact)\z/ and print "\n\n", $xapmsg, "\n"; # touch -r $(man -w $section $manpage) output.txt if (-f \*STDOUT) { open(my $fh, '-|', $MAN, '-w', @args) or die "$MAN -w broken?: $!\n"; chomp(my $path = <$fh>); my @st = stat($path) or die "stat($path) failed: $!\n"; # 9 - mtime utime($st[9], $st[9], \*STDOUT) or die "utime(STDOUT) failed: $!\n"; } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/flow.ge������������������������������������������������������������0000664�0000000�0000000�00000001331�13773461203�0020213�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# public-inbox data flow # # Note: choose either "delivery tools" OR "git mirroring tools" # for a given inboxdir. Combining them for the SAME inboxdir # will cause conflicts. Of course, different inboxdirs may # choose different means of getting mail into them. graph { flow: down } [delivery tools:\n public-inbox-mda\n public-inbox-watch\n public-inbox-learn] -> [inboxdir] [git mirroring tools:\n grok-pull,\n various scripts ] -- git (clone|fetch) &&\n public-inbox-index --> [inboxdir] [inboxdir] -> [read-only daemons:\n public-inbox-httpd\n public-inbox-imapd\n public-inbox-nntpd] # Copyright 2020 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/flow.txt�����������������������������������������������������������0000664�0000000�0000000�00000003614�13773461203�0020445�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# public-inbox data flow # # Note: choose either "delivery tools" OR "git mirroring tools" # for a given inboxdir. Combining them for the SAME inboxdir # will cause conflicts. Of course, different inboxdirs may # choose different means of getting mail into them. +--------------------+ | delivery tools: | | public-inbox-mda | | public-inbox-watch | | public-inbox-learn | +--------------------+ | | v +----------------------+ +--------------------+ | git mirroring tools: | git (clone|fetch) && | | | grok-pull, | public-inbox-index | inboxdir | | various scripts | ----------------------> | | +----------------------+ +--------------------+ | | v +--------------------+ | read-only daemons: | | public-inbox-httpd | | public-inbox-imapd | | public-inbox-nntpd | +--------------------+ # Copyright 2020 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> ��������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/hosted.txt���������������������������������������������������������0000664�0000000�0000000�00000002720�13773461203�0020761�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������unofficially hosted mirrors at public-inbox.org In addition to eating our own dogfood at <https://public-inbox.org/meta/>, public-inbox.org hosts unofficial archives for several other projects to further test our own software. These mirrors are NOT to be considered reliable or permanent. Interested parties are strongly encouraged to host their own mirrors. The presence of these archives does not imply these projects endorse public-inbox or public-inbox.org in any way. * https://public-inbox.org/bug-gnulib/ bug-gnulib@gnu.org Discussion for Gnulib portability/common source project https://lists.gnu.org/mailman/listinfo/bug-gnulib * https://public-inbox.org/git/ git@vger.kernel.org Mailing list for the git version control system http://vger.kernel.org/majordomo-info.html * https://public-inbox.org/libc-alpha/ libc-alpha@sourceware.org Mailing list for GNU C library development https://www.gnu.org/software/libc/involved.html * https://public-inbox.org/rack-devel/ rack-devel@googlegroups.com Development list for the Ruby webserver interface https://groups.google.com/group/rack-devel * https://public-inbox.org/sox-users/ sox-users@lists.sourceforge.net Users' list for the SoX sound processing tool https://lists.sourceforge.net/lists/listinfo/sox-users * https://public-inbox.org/sox-devel/ sox-devel@lists.sourceforge.net Developers' list for the SoX sound processing tool https://lists.sourceforge.net/lists/listinfo/sox-devel ������������������������������������������������public-inbox-1.6.1/Documentation/include.mk���������������������������������������������������������0000664�0000000�0000000�00000006650�13773461203�0020714�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> all:: RSYNC = rsync RSYNC_DEST = public-inbox.org:/srv/public-inbox/ AWK = awk MAN = man # this is "xml" on FreeBSD and maybe some other distros: XMLSTARLET = xmlstarlet # libgraph-easy-perl from Debian, Graph::Easy from CPAN GRAPH_EASY = graph-easy # same as pod2text COLUMNS = 76 INSTALL = install PODMAN = pod2man PODMAN_OPTS = -v --stderr -d 1993-10-02 -c 'public-inbox user manual' PODMAN_OPTS += -r public-inbox.git podman = $(PODMAN) $(PODMAN_OPTS) PODTEXT = pod2text PODTEXT_OPTS = --stderr podtext = $(PODTEXT) $(PODTEXT_OPTS) all:: man manpages = $(man1) $(man5) $(man7) $(man8) man: $(manpages) prefix ?= $(PREFIX) prefix ?= $(HOME) mandir ?= $(INSTALLMAN1DIR)/.. man5dir = $(mandir)/man5 man7dir = $(mandir)/man7 man8dir = $(mandir)/man8 install-man: man $(INSTALL) -d -m 755 $(DESTDIR)$(INSTALLMAN1DIR) $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir) $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir) $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir) $(INSTALL) -m 644 $(man1) $(DESTDIR)$(INSTALLMAN1DIR) $(INSTALL) -m 644 $(man5) $(DESTDIR)$(man5dir) $(INSTALL) -m 644 $(man7) $(DESTDIR)$(man7dir) $(INSTALL) -m 644 $(man8) $(DESTDIR)$(man8dir) doc_install :: install-man check :: check-man check_man = $(AWK) '{gsub(/\b./,"")}length>80{print;err=1}END{exit(err)}'\ >&2 && >$@ check-man :: $(check_80) all :: $(docs) txt2pre = $(PERL) -I lib ./Documentation/txt2pre >$@ Documentation/standards.txt : Documentation/standards.perl $(PERL) -w Documentation/standards.perl >$@+ touch -r Documentation/standards.perl $@+ mv $@+ $@ # flow.txt is checked into git since Graph::Easy isn't in many distros Documentation/flow.txt : Documentation/flow.ge (sed -ne '1,/^$$/p' <Documentation/flow.ge; \ $(GRAPH_EASY) Documentation/flow.ge || \ cat Documentation/flow.txt; \ echo; \ sed -ne '/^# Copyright/,$$p' <Documentation/flow.ge \ ) >$@+ touch -r Documentation/flow.ge $@+ mv $@+ $@ NEWS NEWS.atom NEWS.html : $(news_deps) $(PERL) -I lib -w Documentation/mknews.perl $@ $(RELEASES) # check for internal API changes: check :: NEWS .NEWS.atom.check NEWS.html .NEWS.atom.check: NEWS.atom $(XMLSTARLET) val NEWS.atom || \ { e=$$?; test $$e -eq 0 || test $$e -eq 127; } >$@ html: $(docs_html) doc: $(docs) %.gz: % gzip -9 --rsyncable <$< >$@+ touch -r $< $@+ mv $@+ $@ gz-doc: $(gz_docs) gz-xdoc: $(gz_xdocs) rsync-doc: NEWS.atom.gz # /usr/share/doc/rsync/scripts/git-set-file-times{.gz} on Debian systems # It is also at: https://yhbt.net/git-set-file-times -git set-file-times $(docs) $(txt) $(MAKE) gz-doc gz-xdoc $(RSYNC) --chmod=Fugo=r -av $(rsync_docs) $(rsync_xdocs) $(RSYNC_DEST) clean-doc: $(RM_F) $(man1) $(man5) $(man7) $(man8) $(gz_docs) $(docs_html) \ $(mantxt) $(rsync_xdocs) \ NEWS NEWS.atom NEWS.html Documentation/standards.txt \ Documentation/flow.html Documentation/flow.html.gz \ Documentation/flow.txt.gz clean :: clean-doc # No camel-cased tarballs or pathnames which MakeMaker creates, # this may not always be a Perl project. This should match what # cgit generate, since git maintainers ensure git-archive has # stable tar output DIST_TREE = HEAD^{tree} DIST_VER = git-dist : ver=$$(git describe $(DIST_VER) | sed -ne s/v//p); \ pkgpfx=public-inbox-$$ver; \ git archive --prefix=$$pkgpfx/ --format=tar $(DIST_TREE) \ | gzip -n >$$pkgpfx.tar.gz; \ echo $$pkgpfx.tar.gz created ����������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/marketing.txt������������������������������������������������������0000664�0000000�0000000�00000001665�13773461203�0021463�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������marketing guide for public-inbox TL; DR: Don't market this. If you must: don't be pushy and annoying about it. Slow down. Please no superlatives, hype or BS. It's online and public, so it already markets itself. Being informative is not a bad thing, being insistent is. Chances are, you're preaching to the choir; or the folks you're trying to convince are not ready for everything our project represents to the resistance against centralization. Baby steps... There's never a need for anybody to migrate to using our software, or to use any particular instance of it. It's designed to coexist with other mail archives, especially other installations of public-inbox. Most importantly, we take victories even when our software doesn't get adopted. Freedom from lock-in is more important than the adoption of any software. Every time somebody recognizes and rejects various forms of lock-in and centralization is already a victory for us. ���������������������������������������������������������������������������public-inbox-1.6.1/Documentation/mknews.perl��������������������������������������������������������0000775�0000000�0000000�00000011451�13773461203�0021126�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/perl -w # Copyright (C) 2019-2020 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # Generates NEWS, NEWS.atom, and NEWS.html files using release emails # this uses unstable internal APIs of public-inbox, and this script # needs to be updated if they change. use strict; use PublicInbox::Eml; use PublicInbox::View; use PublicInbox::Hval qw(fmt_ts); use PublicInbox::MsgTime qw(msg_datestamp); use PublicInbox::MID qw(mids mid_escape); END { $INC{'Plack/Util.pm'} and warn "$0 should not have loaded Plack::Util\n" } my $dst = shift @ARGV or die "Usage: $0 <NEWS|NEWS.atom|NEWS.html>"; # newest to oldest my @releases = @ARGV; my $dir = 'Documentation/RelNotes'; my $base_url = 'https://public-inbox.org/meta'; my $html_url = 'https://public-inbox.org/NEWS.html'; my $atom_url = 'https://public-inbox.org/NEWS.atom'; my $addr = 'meta@public-inbox.org'; my $latest = shift(@releases) or die 'no releases?'; my $mtime; my $mime_latest = release2mime($latest, \$mtime); my $tmp = "$dst+"; my $out; if ($dst eq 'NEWS') { open $out, '>:encoding(utf8)', $tmp or die; mime2txt($out, $mime_latest); for my $v (@releases) { print $out "\n" or die; mime2txt($out, release2mime($v)); } } elsif ($dst eq 'NEWS.atom' || $dst eq 'NEWS.html') { open $out, '>', $tmp or die; my $ibx = My::MockObject->new( description => 'public-inbox releases', over => undef, search => 1, # for WwwStream::html_top base_url => "$base_url/", ); $ibx->{-primary_address} = $addr; my $ctx = { -inbox => $ibx, -upfx => "$base_url/", -hr => 1, }; if ($dst eq 'NEWS.html') { html_start($out, $ctx); mime2html($out, $mime_latest, $ctx); while (defined(my $v = shift(@releases))) { mime2html($out, release2mime($v), $ctx); } html_end($out, $ctx); } elsif ($dst eq 'NEWS.atom') { my $astream = atom_start($out, $ctx, $mtime); for my $v (reverse(@releases)) { mime2atom($out, $astream, release2mime($v), $ctx); } mime2atom($out, $astream, $mime_latest, $ctx); print $out '</feed>' or die; } else { die "BUG: Unrecognized $dst\n"; } } else { die "Unrecognized $dst\n"; } close($out) or die; utime($mtime, $mtime, $tmp) or die; rename($tmp, $dst) or die; exit 0; sub release2mime { my ($release, $mtime_ref) = @_; my $f = "$dir/$release.eml"; open(my $fh, '<', $f) or die "open($f): $!"; my $mime = PublicInbox::Eml->new(\(do { local $/; <$fh> })); # Documentation/include.mk relies on mtimes of each .eml file # to trigger rebuild, so make sure we sync the mtime to the Date: # header in the .eml my $mtime = msg_datestamp($mime->header_obj); utime($mtime, $mtime, $fh) or warn "futimes $f: $!"; $$mtime_ref = $mtime if $mtime_ref; $mime; } sub mime2txt { my ($out, $mime) = @_; my $title = $mime->header('Subject'); $title =~ s/^\s*\[\w+\]\s*//g; # [ANNOUNCE] or [ANN] my $dtime = msg_datestamp($mime->header_obj); $title .= ' - ' . fmt_ts($dtime) . ' UTC'; print $out $title, "\n" or die; my $uline = '=' x length($title); print $out $uline, "\n\n" or die; my $mid = mids($mime)->[0]; print $out 'Link: ', $base_url, '/', mid_escape($mid), "/\n\n" or die; print $out $mime->body_str or die; } sub mime2html { my ($out, $eml, $ctx) = @_; my $smsg = $ctx->{smsg} = bless {}, 'PublicInbox::Smsg'; $smsg->populate($eml); $ctx->{msgs} = [ 1 ]; # for <hr> in eml_entry print $out PublicInbox::View::eml_entry($ctx, $eml) or die; } sub html_start { my ($out, $ctx) = @_; require PublicInbox::WwwStream; $ctx->{www} = My::MockObject->new(style => ''); my $www_stream = PublicInbox::WwwStream::init($ctx); print $out $www_stream->html_top, '<pre>' or die; } sub html_end { print $out <<EOF or die; git clone $PublicInbox::WwwStream::CODE_URL </pre></body></html> EOF } sub atom_start { my ($out, $ctx, $mtime) = @_; require PublicInbox::WwwAtomStream; # WwwAtomStream stats this dir for mtime my $astream = PublicInbox::WwwAtomStream->new($ctx); delete $astream->{emit_header}; my $ibx = $ctx->{-inbox}; my $title = PublicInbox::WwwAtomStream::title_tag($ibx->description); my $updated = PublicInbox::WwwAtomStream::feed_updated($mtime); print $out <<EOF or die; <?xml version="1.0" encoding="us-ascii"?> <feed xmlns="http://www.w3.org/2005/Atom" xmlns:thr="http://purl.org/syndication/thread/1.0">$title<link rel="alternate" type="text/html" href="$html_url"/><link rel="self" href="$atom_url"/><id>$atom_url</id>$updated EOF $astream; } sub mime2atom { my ($out, $astream, $eml, $ctx) = @_; my $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->populate($eml); if (defined(my $str = $astream->feed_entry($smsg, $eml))) { print $out $str or die; } } package My::MockObject; use strict; our $AUTOLOAD; sub new { my ($class, %values) = @_; bless \%values, $class; } sub AUTOLOAD { my ($self) = @_; my $attr = (split(/::/, $AUTOLOAD))[-1]; $self->{$attr}; } 1; �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-compact.pod�������������������������������������������0000664�0000000�0000000�00000003335�13773461203�0023460�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-compact - compact Xapian DBs in an inbox =head1 SYNOPSIS public-inbox-compact INBOX_DIR public-inbox-compact --all =head1 DESCRIPTION public-inbox-compact is a wrapper for L<xapian-compact(1)> which locks the inbox and prevents other processes such as L<public-inbox-watch(1)> or L<public-inbox-mda(1)> from writing while it operates. It enforces the use of the C<--no-renumber> option of L<xapian-compact(1)> which is required to work with the rest of the public-inbox search code. This command is rarely needed for active inboxes. Using the C<--compact> option of L<public-inbox-index(1)> is recommended, instead, and only when doing a C<--reindex>. =head1 OPTIONS =over =item --all Compact all inboxes configured in ~/.public-inbox/config. This is an alternative to specifying individual inboxes directories on the command-line. =item --blocksize / --no-full / --fuller These options are passed directly to L<xapian-compact(1)>. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =item XAPIAN_FLUSH_THRESHOLD The number of documents to update before committing changes to disk. This environment is handled directly by Xapian, refer to Xapian API documentation for more details. Default: 10000 =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2018-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<xapian-compact(1)>, L<public-inbox-index(1)> ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-config.pod��������������������������������������������0000664�0000000�0000000�00000026444�13773461203�0023305�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-config - public-inbox config file description =head1 SYNOPSIS ~/.public-inbox/config =head1 DESCRIPTION The public-inbox config file is parseable by L<git-config(1)>. This is a global configuration file for mapping/discovering all public-inboxes used by a particular user. =head1 CONFIGURATION FILE =head2 EXAMPLE [publicinbox "test"] inboxdir = /home/user/path/to/test.git ; multiple addresses are supported address = test@example.com ; address = alternate@example.com url = http://example.com/test newsgroup = inbox.test ; backwards compatibility with public-inbox pre-1.2.0, ; "inboxdir" takes precedence over "mainrepo" mainrepo = /home/user/path/to/test.git =head2 VARIABLES =over 8 =item publicinbox.<name>.address The email address of the public-inbox. May be specified more than once for merging multiple mailing lists (or migrating to new addresses). This must be specified at least once, the first value will be considered the primary address for informational purposes. Default: none, required =item publicinbox.<name>.inboxdir The absolute path to the directory which hosts the public-inbox. This must be specified once. This was previously known as "mainrepo", which remains supported, but "inboxdir" takes precedence. Default: none, required =item publicinbox.<name>.url The primary URL for hosting the HTTP/HTTPS archives. Additional HTTP/HTTPS URLs may be specified via C<$GIT_DIR/cloneurl> as documented in L<gitweb(1)> Default: none, optional =item publicinbox.<name>.newsgroup The NNTP group name for use with L<public-inbox-nntpd(8)>. This may be any newsgroup name with hierarchies delimited by C<.>. For example, the newsgroup for L<mailto:meta@public-inbox.org> is: C<inbox.comp.mail.public-inbox.meta> Omitting this for the given inbox will prevent the group from being read by L<public-inbox-nntpd(1)> Default: none, optional =item publicinbox.<name>.watch See L<public-inbox-watch(1)> =item publicinbox.<name>.watchheader See L<public-inbox-watch(1)> =item publicinbox.<name>.listid The L<rfc2919|https://tools.ietf.org/html/rfc2919> header without angle brackets for L<public-inbox-mda(1)> deliveries and L<public-inbox-watch(1)>. For public-inbox-watch users, this is a shortcut for specifying C<publicinbox.$NAME.watchheader=List-Id:E<lt>foo.example.comE<gt>> For public-inbox-mda users, this may be used to avoid recipient matching via C<ORIGINAL_RECIPIENT> environment variable. This may be specified multiple times for merging multiple mailing lists into a single public-inbox, only one C<List-Id> header needs to match. Default: none =item publicinbox.<name>.nntpmirror This may be the full NNTP URL of an independently-run mirror. For example, the https://public-inbox.org/meta/ inbox is mirrored by Gmane at C<nntp://news.gmane.io/gmane.mail.public-inbox.general> Default: none =item publicinbox.<name>.indexlevel The indexing level for L<public-inbox-index(1)> C<basic> only requires L<DBD::SQLite(3pm)> and provides all NNTP functionality along with thread-awareness in the WWW interface. C<medium> requires L<Search::Xapian(3pm)> to provide full-text term search functionality in the WWW UI. C<full> also includes positional information used by Xapian to allow for searching for phrases using quoted text. (e.g. C<"looking for a complete sentence">) Default: C<full> =item publicinbox.<name>.indexSequentialShard See L<public-inbox-index(1)/publicInbox.indexSequentialShard> =item publicinbox.<name>.httpbackendmax If a digit, the maximum number of parallel L<git-http-backend(1)> processes to allow for cloning this particular inbox. If an alphanumeric value starting with a lowercase alphabetic character is specified, the inbox will use a L</NAMED LIMITER> which can be shared by multiple inboxes. Default: 32 (using a default limiter shared by all inboxes) =item publicinbox.<name>.coderepo The nickname of a "coderepo" section associated with the inbox. May be specified more than once for M:N mapping of code repos to inboxes. If enabled, diff hunk headers in patch emails will link to the line numbers of blobs. Default: none =item publicinbox.<name>.replyto May be used to control how reply instructions in the PSGI interface are displayed. ":none=dead inbox" may be specified to denote an inactive list ("dead inbox" may be replaced with another phrase). A list of comma-delimited email addresses may be specified. This can be useful for dedicated inboxes for bot emails, but discussion happens on a separate mailing list/inbox. Mirrors of existing centralized mailing lists may use ":list" here to redirect mail only to the configured inbox address. The use of ":list" is discouraged for new mailing lists, as it leads to centralization. Default: :all =item publicinbox.css The local path name of a CSS file for the PSGI web interface. May contain the attributes "media", "title" and "href" which match the associated attributes of the HTML <style> tag. "href" may be specified to point to the URL of an remote CSS file and the path may be "/dev/null" or any empty file. Multiple files may be specified and will be included in the order specified. =item publicinboxmda.spamcheck This may be set to C<none> to disable the use of SpamAssassin L<spamc(1)> for filtering spam before it is imported into git history. Other spam filtering backends may be supported in the future. Default: spamc =item publicinboxwatch.spamcheck See L<public-inbox-watch(1)> =item publicinboxwatch.watchspam See L<public-inbox-watch(1)> =item publicinbox.nntpserver Set this to point to the hostname of the L<public-inbox-nntpd(1)> instance. This is used to advertise the existence of the NNTP endpoint in the L<PublicInbox::WWW> HTML interface. Multiple values are allowed for instances with multiple hostnames or mirrors. Default: none =item publicinbox.<name>.feedmax The size of an Atom feed for the inbox. If specified more than once, only the last value is used. Invalid values (<= 0) will be treated as the default value. Default: 25 =item publicinbox.<name>.hide A comma-delimited list of listings to hide the inbox from. Valid values are currently C<www> and C<manifest>. Default: none =item coderepo.<nick>.dir The path to a git repository for "publicinbox.<name>.coderepo" =item coderepo.<nick>.cgitUrl The URL of the cgit instance associated with the coderepo. Default: none =item publicinbox.cgitrc A path to a L<cgitrc(5)> file. "repo.url" directives in the cgitrc will be mapped to the nickname of a coderepo (without trailing slash), and "repo.path" directives map to "coderepo.<nick>.dir". Use of this directive allows admins of existing cgit installations to skip declaring coderepo sections and map inboxes directly to code repositories known to cgit. Macro expansion (e.g. C<$HTTP_HOST>) is not yet supported. =item publicinbox.cgitbin A path to the C<cgit.cgi> executable. The L<PublicInbox::WWW> interface can spawn cgit as a fallback if the publicinbox.cgitrc directive is configured. Default: /var/www/htdocs/cgit/cgit.cgi or /usr/lib/cgit/cgit.cgi =item publicinbox.cgitdata A path to the data directory used by cgit for storing static files. Typically guessed based the location of C<cgit.cgi> (from C<publicinbox.cgitbin>, but may be overridden. Default: basename of C<publicinbox.cgitbin>, /var/www/htdocs/cgit/ or /usr/share/cgit/ =item publicinbox.mailEditor See L<public-inbox-edit(1)> =item publicinbox.indexMaxSize =item publicinbox.indexBatchSize =item publicinbox.indexSequentialShard See L<public-inbox-index(1)> =item publicinbox.wwwlisting Enable a HTML listing style when the root path of the URL '/' is accessed. Valid values are: =over 8 =item * all - Show all inboxes =item * 404 - Return a 404 page. This is useful to allow customization with L<Plack::App::Cascade(3pm)> =item * match=domain - Only show inboxes with URLs which belong to the domain of the HTTP request =for TODO comment support showing cgit listing =back Default: C<404> =item publicinbox.grokmanifest Controls the generation of a grokmirror-compatible gzipped JSON file at the top-level of the PSGI interface. You generally do not need to change this from the default. Valid values are: =over 8 =item * match=domain - Only include inboxes with URLs which belong to the domain of the HTTP request. This is compatible with virtual hosting where several domains come from the same host. =item * all - All inboxes are present in C<manifest.js.gz>, regardless of domain. Only use this if you're serving HTTP requests in a domain-agnostic manner. =item * 404 - C<manifest.js.gz> will only contain an empty JSON array. This does NOT affect C<$INBOX_URL/manifest.js.gz>, which will always contain all git repos used by the inbox at C<$INBOX_URL> =back Default: C<match=domain> =back =head2 NAMED LIMITER (PSGI) Named limiters are useful for preventing large inboxes from monopolizing (or overloading) the server. Since serving git clones (via L<git-http-backend(1)> can be memory-intensive for large inboxes, it makes sense to put large inboxes on a named limiter with a low max value; while smaller inboxes can use the default limiter. C<RLIMIT_*> keys may be set to enforce resource limits for a particular limiter. Default named-limiters are prefixed with "-". Currently, the "-cgit" named limiter is reserved for instances spawning cgit via C<publicinbox.cgitrc> =over 8 =item publicinboxlimiter.<name>.max The maximum number of parallel processes for the given limiter. =item publicinboxlimiter.<name>.rlimitCore =item publicinboxlimiter.<name>.rlimitCPU =item publicinboxlimiter.<name>.rlimitData The maximum core size, CPU time, or data size processes run with the given limiter will use. This may be comma-separated to distinguish soft and hard limits. The word "INFINITY" is accepted as the RLIM_INFINITY constant (if supported by your OS). See L<setrlimit(2)> for more info on the behavior of RLIMIT_CORE, RLIMIT_CPU, and RLIMIT_DATA for you operating system. =back =head3 EXAMPLE WITH NAMED LIMITERS ; big inboxes which require lots of memory to clone: [publicinbox "big1"] inboxdir = /path/to/big1 address = big1@example.com httpbackendmax = big [publicinbox "big2"] inboxdir = /path/to/big2 address = big2@example.com httpbackendmax = big ; tiny inboxes which are easily cloned: [publicinbox "tiny1"] inboxdir = /path/to/tiny1 address = tiny1@example.com [publicinbox "tiny2"] inboxdir = /path/to/tiny2 address = tiny2@example.com [publicinboxlimiter "big"] max = 4 In the above example, the "big1" and "big2" are limited to four parallel L<git-http-backend(1)> processes between them. However, "tiny1" and "tiny2" will share the default limiter which means there can be 32 L<git-http-backend(1)> processes between them. =head1 ENVIRONMENT =over 8 =item PI_CONFIG Used to override the default "~/.public-inbox/config" value. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2016-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>, L<public-inbox-mda(1)>, L<public-inbox-watch(1)>, L<grokmirror|https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-convert.pod�������������������������������������������0000664�0000000�0000000�00000004764�13773461203�0023521�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-convert - convert v1 inboxes to v2 =head1 SYNOPSIS public-inbox-convert [OPTIONS] OLD_DIR NEW_DIR =head1 DESCRIPTION public-inbox-convert copies the contents of an old "v1" inbox into a new "v2" inbox. It makes no changes to the old inbox and users are expected to update the "inboxdir" path in L<public-inbox-config(5)> to point to the path of NEW_DIR once they are satisfied with the conversion. =head1 OPTIONS =over =item --no-index Disables Xapian and overview DB indexing on the new inbox. By default, public-inbox-convert creates a new index in the v2 inbox and indexes all existing messages, a lengthy operation for large inboxes. =item -j JOBS, --jobs=JOBS Control the number of indexing jobs and Xapian shards of the v2 inbox. By default, this is the detected CPU count but capped at 4 due to various bottlenecks. The number of Xapian shards will be 1 less than the JOBS value, since there is a single process which distributes work to the Xapian shards. =item -L LEVEL, --index-level=LEVEL =item -c, --compact =item -v, --verbose =item --no-fsync =item --sequential-shard =item --batch-size=BYTES =item --max-size=BYTES These options affect indexing. They have no effect if L</--no-index> is specified See L<public-inbox-index(1)> for a description of these options. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =back =head1 UPGRADING Editing "~/.public-inbox/config" (or whatever C<PI_CONFIG> is set to) will be required to start using the new directory. =head1 BUGS Writes from L<public-inbox-mda(1)> or L<git-fetch(1)> to the v1 inbox which occur after the start of the conversion will not be picked up in the v2 inbox. Users of L<public-inbox-watch(1)> do not have to worry about this. They only need to update the config file to point to the v2 inbox, send C<SIGHUP> public-inbox-watch process to reload the config file, and then C<SIGUSR1> to rescan existing Maildirs. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-init(1)>, L<public-inbox-index(1)>, L<public-inbox-config(5)>, L<public-inbox-v1-format(5)>, L<public-inbox-v2-format(5)> ������������public-inbox-1.6.1/Documentation/public-inbox-daemon.pod��������������������������������������������0000664�0000000�0000000�00000011476�13773461203�0023302�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-daemon - common usage for public-inbox network daemons =head1 SYNOPSIS public-inbox-httpd public-inbox-nntpd =head1 DESCRIPTION This manual describes common options and behavior for public-inbox network daemons. Network daemons for public-inbox provide read-only NNTP and HTTP access to public-inboxes. Write access to a public-inbox will never be required to run these. These daemons are implemented with a common core using non-blocking sockets and optimized for fairness; even with thousands of connected clients over slow links. They also provide graceful shutdown/upgrade support to avoid breaking existing connections during software upgrades. These daemons may also utilize multiple pre-forked worker processes to take advantage of multiple CPUs. =head1 OPTIONS =over =item -l, --listen ADDRESS This takes an absolute path to a Unix socket or HOST:PORT to listen on. For example, to listen to TCP connections on port 119, use: C<-l 0.0.0.0:119>. This may also point to a Unix socket (C<-l /path/to/http.sock>) for a reverse proxy like L<nginx(8)> to use. May be specified multiple times to allow listening on multiple sockets. This does not need to be specified at all if relying on L<systemd.socket(5)> or similar Default: server-dependent unless socket activation is used with L<systemd(1)> or similar (see L<systemd.socket(5)>). =item -1, --stdout PATH Specify an appendable path to redirect stdout descriptor (1) to. Using this is preferable to setting up the redirect externally (e.g. E<gt>E<gt>/path/to/log in shell) since it allows SIGUSR1 to be handled (see L<SIGNALS/SIGNALS> below). Default: /dev/null =item -2, --stderr PATH Like C<--stdout>, but for the stderr descriptor (2). =item -W, --worker-processes Set the number of worker processes. Normally, this should match the number of CPUs on the system to take full advantage of the hardware. However, users of memory-constrained systems may want to lower this. Setting this to zero (C<-W0>) disables the master/worker split; saving some memory but removing the ability to use SIGTTIN to increase worker processes or have the worker restarted by the master on crashes. Default: 1 =back =head1 SIGNALS Most of our signal handling behavior is copied from L<nginx(8)> and/or L<starman(1)>; so it is possible to reuse common scripts for managing them. =over 8 =item SIGUSR1 Reopens log files pointed to by --stdout and --stderr options. =item SIGUSR2 Spawn a new process with the intention to replace the running one. See L</UPGRADING> below. =item SIGHUP Reload config files associated with the process. (FIXME: not tested for -httpd, yet) =item SIGTTIN Increase the number of running workers processes by one. =item SIGTTOU Decrease the number of running worker processes by one. =item SIGWINCH Stop all running worker processes. SIGHUP or SIGTTIN may be used to restart workers. =item SIGQUIT Gracefully terminate the running process. =back SIGTTOU, SIGTTIN, SIGWINCH all have no effect when worker processes are disabled with C<-W0> on the command-line. =head1 ENVIRONMENT =over 8 =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =item LISTEN_FDS, LISTEN_PID Used by systemd (and compatible) installations for socket activation. See L<systemd.socket(5)> and L<sd_listen_fds(3)>. =item PERL_INLINE_DIRECTORY Pointing this to point to a writable directory enables the use of L<Inline> and L<Inline::C> extensions which may provide platform-specific performance improvements. Currently, this enables the use of L<vfork(2)> which speeds up subprocess spawning with the Linux kernel. public-inbox will never enable L<Inline::C> automatically without this environment variable set or C<~/.cache/public-inbox/inline-c> created by a user. See L<Inline> and L<Inline::C> for more details. =back =head1 UPGRADING There are two ways to upgrade a running process. Users of process management systems with socket activation (L<systemd(1)> or similar) may rely on multiple instances For systemd, this means using two (or more) '@' instances for each service (e.g. C<SERVICENAME@INSTANCE>) as documented in L<systemd.unit(5)>. Users of traditional SysV init may use SIGUSR2 to spawn a replacement process and gracefully terminate the old process using SIGQUIT. In either case, the old process will not truncate running responses; so responses to expensive requests do not get interrupted and lost. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-httpd(1)>, L<public-inbox-nntpd(1)> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-edit.pod����������������������������������������������0000664�0000000�0000000�00000006253�13773461203�0022761�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-edit - destructively edit messages in a public inbox =head1 SYNOPSIS public-inbox-edit -m MESSAGE-ID --all|INBOX_DIR public-inbox-edit -F RAW_FILE --all|INBOX_DIR [.. INBOX_DIR] =head1 DESCRIPTION public-inbox-edit allows editing messages in a given inbox to remove sensitive information. It is only intended as a last resort, as it will cause discontiguous git history and draw more attention to the sensitive data in mirrors. =head1 OPTIONS =over =item --all Edit the message in all inboxes configured in ~/.public-inbox/config. This is an alternative to specifying individual inboxes directories on the command-line. =item -m MESSAGE-ID Edits the message corresponding to the given C<MESSAGE-ID>. If the C<MESSAGE-ID> is ambiguous, C<--force> or using the C<--file> of the original will be required. =item -F FILE Edits the message corresponding to the Message-ID: header and content given in C<FILE>. This requires the unmodified raw message, and the contents of C<FILE> will not itself be modified. This is useful if a Message-ID is ambiguous due to filtering/munging rules or other edits. =item --force Forcibly perform the edit even if Message-ID is ambiguous. =item --raw Do not perform "From " line escaping. By default, this generates a mboxrd variant file to detect unpurged messages in the new mbox. This makes sense if your configured C<publicinbox.mailEditor> is a regular editor and not something like C<mutt -f> =back =head1 CONFIGURATION =over 8 =item publicinbox.mailEditor The command to perform the edit with. An example of this would be C<mutt -f>, and the user would then use the facilities in L<mutt(1)> to edit the mail. This is useful for editing attachments or Base64-encoded emails which are more difficult to edit with a normal editor (configured via C<GIT_EDITOR>, C<VISUAL> or C<EDITOR>). Default: none =back =head1 ENVIRONMENT =over 8 =item GIT_EDITOR / VISUAL / EDITOR =for comment MAIL_EDITOR is undocumented (unstable, don't want naming conflicts) public-inbox-edit will fall back to using one of these variables (in that order) if C<publicinbox.mailEditor> is unset. =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =back =head1 LIMITATIONS Only L<v2|public-inbox-v2-format(5)> repositories are supported. This is safe to run while normal inbox writing tools (L<public-inbox-mda(1)>, L<public-inbox-watch(1)>, L<public-inbox-learn(1)>) are active. Running this in parallel with L<public-inbox-xcpdb(1)> or C<"public-inbox-index --reindex"> can lead to errors or edited data remaining indexed. Incremental L<public-inbox-index(1)> (without C<--reindex>) is fine. Keep in mind this is a last resort, as it will be disruptive to anyone using L<git(1)> to mirror the inbox being edited. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-purge(1)> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-httpd.pod���������������������������������������������0000664�0000000�0000000�00000002027�13773461203�0023152�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-httpd - PSGI server optimized for public-inbox =head1 SYNOPSIS public-inbox-httpd [OPTIONS] [/path/to/myapp.psgi] =head1 DESCRIPTION public-inbox-httpd is a PSGI/Plack server supporting HTTP/1.1 and HTTP/1.0. It uses options and environment variables common to all L<public-inbox-daemon(8)> implementations in addition to the PSGI file. If a PSGI file is not specified, L<PublicInbox::WWW> is loaded with a default middleware stack consisting of L<Plack::Middleware::ReverseProxy>, and L<Plack::Middleware::Head> This may point to a PSGI file for supporting generic PSGI apps. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>, L<Plack> ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-imapd.pod���������������������������������������������0000664�0000000�0000000�00000005356�13773461203�0023131�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-imapd - IMAP server for sharing public-inboxes =head1 SYNOPSIS public-inbox-imapd [OPTIONS] =head1 DESCRIPTION public-inbox-imapd provides a read-only IMAP daemon for public-inbox. It uses options and environment variables common to all L<public-inbox-daemon(8)> implementations. Like L<public-inbox-nntpd(1)> and L<public-inbox-httpd(1)>, C<public-inbox-imapd> will never require write access to the directory where the public-inboxes are stored, so it may be run as a different user than the user running L<public-inbox-watch(1)>, L<public-inbox-mda(1)>, or L<git-fetch(1)>. =head1 OPTIONS See common options in L<public-inbox-daemon(8)/OPTIONS>. Additionally, IMAP-specific behavior for certain options are supported and documented below. =over =item -l, --listen PROTO://ADDRESS/?cert=/path/to/cert,key=/path/to/key In addition to the normal C<-l>/C<--listen> switch described in L<public-inbox-daemon(8)>, the C<PROTO> prefix (e.g. C<imap://> or C<imaps://>) may be specified to force a given protocol. For STARTTLS and IMAPS support, the C<cert> and C<key> may be specified on a per-listener basis after a C<?> character and separated by C<,>. These directives are per-directive, and it's possible to use a different cert for every listener. =item --cert /path/to/cert The default TLS certificate for optional STARTTLS and IMAPS support if the C<cert> option is not given with C<--listen>. If using systemd-compatible socket activation and a TCP listener on port 993 is inherited, it is automatically IMAPS when this option is given. When a listener on port 143 is inherited and this option is given, it automatically gets STARTTLS support. =item --key /path/to/key The default private TLS certificate key for optional STARTTLS and IMAPS support if the C<key> option is not given with C<--listen>. The private key may concatenated into the path used by C<--cert>, in which case this option is not needed. =back =head1 CONFIGURATION C<public-inbox-imapd> uses the same configuration knobs as L<public-inbox-nntpd(1)>, see L<public-inbox-nntpd(1)> and L<public-inbox-config(5)>. =over 8 =item publicinbox.<name>.newsgroup The newsgroup name maps to an IMAP folder name. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/>, L<nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta>, L<nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta> =head1 COPYRIGHT Copyright 2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>, L<public-inbox-config(5)>, L<public-inbox-nntpd(1)> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-index.pod���������������������������������������������0000664�0000000�0000000�00000021617�13773461203�0023144�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-index - create and update search indices =head1 SYNOPSIS public-inbox-index [OPTIONS] INBOX_DIR... public-inbox-index [OPTIONS] --all =head1 DESCRIPTION public-inbox-index creates and updates the search, overview and NNTP article number database used by the read-only public-inbox HTTP and NNTP interfaces. Currently, this requires L<DBD::SQLite> and L<DBI> Perl modules. L<Search::Xapian> is optional, only to support the PSGI search interface. Once the initial indices are created by public-inbox-index, L<public-inbox-mda(1)> and L<public-inbox-watch(1)> will automatically maintain them. Running this manually to update indices is only required if relying on L<git-fetch(1)> to mirror an existing public-inbox; or if upgrading to a new version of public-inbox using the C<--reindex> option. Having the overview and article number database is essential to running the NNTP interface, and strongly recommended for the HTTP interface as it provides thread grouping in addition to normal search functionality. =head1 OPTIONS =over =item --jobs=JOBS, -j Influences the number of Xapian indexing shards in a (L<public-inbox-v2-format(5)>) inbox. See L<public-inbox-init(1)/--jobs> for a full description of sharding. C<--jobs=0> is accepted as of public-inbox 1.6.0 to disable parallel indexing regardless of the number of pre-existing shards. If the inbox has not been indexed or initialized, C<JOBS - 1> shards will be created (one job is always needed for indexing the overview and article number mapping). Default: the number of existing Xapian shards =item --compact / -c Compacts the Xapian DBs after indexing. This is recommended when using C<--reindex> to avoid running out of disk space while indexing multiple inboxes. While option takes a negligible amount of time compared to C<--reindex>, it requires temporarily duplicating the entire contents of the Xapian DB. This switch may be specified twice, in which case compaction happens both before and after indexing to minimize the temporal footprint of the (re)indexing operation. Available since public-inbox 1.4.0. =item --reindex Forces a re-index of all messages in the inbox. This can be used for in-place upgrades and bugfixes while NNTP/HTTP server processes are utilizing the index. Keep in mind this roughly doubles the size of the already-large Xapian database. Using this with C<--compact> or running L<public-inbox-compact(1)> afterwards is recommended to release free space. public-inbox protects writes to various indices with L<flock(2)>, so it is safe to reindex (and rethread) while L<public-inbox-watch(1)>, L<public-inbox-mda(1)> or L<public-inbox-learn(1)> run. This does not touch the NNTP article number database. It does not affect threading unless C<--rethread> is used. =item --all Index all inboxes configured in ~/.public-inbox/config. This is an alternative to specifying individual inboxes directories on the command-line. =item --rethread Regenerate internal THREADID and message thread associations when reindexing. This fixes some bugs in older versions of public-inbox. While it is possible to use this without C<--reindex>, it makes little sense to do so. Available in public-inbox 1.6.0+. =item --prune Run L<git-gc(1)> to prune and expire reflogs if discontiguous history is detected. This is intended to be used in mirrors after running L<public-inbox-edit(1)> or L<public-inbox-purge(1)> to ensure data is expunged from mirrors. Available since public-inbox 1.2.0. =item --max-size SIZE Sets or overrides L</publicinbox.indexMaxSize> on a per-invocation basis. See L</publicinbox.indexMaxSize> below. Available since public-inbox 1.5.0. =item --batch-size SIZE Sets or overrides L</publicinbox.indexBatchSize> on a per-invocation basis. See L</publicinbox.indexBatchSize> below. When using rotational storage but abundant RAM, using a large value (e.g. C<500m>) with C<--sequential-shard> can significantly speed up and reduce fragmentation during the initial index and full C<--reindex> invocations (but not incremental updates). Available in public-inbox 1.6.0+. =item --no-fsync Disables L<fsync(2)> and L<fdatasync(2)> operations on SQLite and Xapian. This is only effective with Xapian 1.4+. This is primarily intended for systems with low RAM and the small (default) C<--batch-size=1m>. Users of large C<--batch-size> may even find disabling L<fdatasync(2)> causes too much dirty data to accumulate, resulting on latency spikes from writeback. Available in public-inbox 1.6.0+. =item --sequential-shard Sets or overrides L</publicinbox.indexSequentialShard> on a per-invocation basis. See L</publicinbox.indexSequentialShard> below. Available in public-inbox 1.6.0+. =item --skip-docdata Stop storing document data in Xapian on an existing inbox. See L<public-inbox-init(1)/--skip-docdata> for description and caveats. Available in public-inbox 1.6.0+. =back =head1 FILES For v1 (ssoma) repositories described in L<public-inbox-v1-format(5)>. All public-inbox-specific files are contained within the C<$GIT_DIR/public-inbox/> directory. v2 inboxes are described in L<public-inbox-v2-format(5)>. =head1 CONFIGURATION =over 8 =item publicinbox.indexMaxSize Prevents indexing of messages larger than the specified size value. A single suffix modifier of C<k>, C<m> or C<g> is supported, thus the value of C<1m> to prevents indexing of messages larger than one megabyte. This is useful for avoiding memory exhaustion in mirrors via git. It does not prevent L<public-inbox-mda(1)> or L<public-inbox-watch(1)> from importing (and indexing) a message. This option is only available in public-inbox 1.5 or later. Default: none =item publicinbox.indexBatchSize Flushes changes to the filesystem and releases locks after indexing the given number of bytes. The default value of C<1m> (one megabyte) is low to minimize memory use and reduce contention with parallel invocations of L<public-inbox-mda(1)>, L<public-inbox-learn(1)>, and L<public-inbox-watch(1)>. Increase this value on powerful systems to improve throughput at the expense of memory use. The reduction of lock granularity may not be noticeable on fast systems. With SSDs, values above C<4m> have little benefit. For L<public-inbox-v2-format(5)> inboxes, this value is multiplied by the number of Xapian shards. Thus a typical v2 inbox with 3 shards will flush every 3 megabytes by default unless parallelism is disabled via C<--sequential-shard> or C<--jobs=0>. This influences memory usage of Xapian, but it is not exact. The actual memory used by Xapian and Perl has been observed in excess of 10x this value. This option is available in public-inbox 1.6 or later. public-inbox 1.5 and earlier used the current default, C<1m>. Default: 1m (one megabyte) =item publicinbox.indexSequentialShard For L<public-inbox-v2-format(5)> inboxes, setting this to C<true> allows indexing Xapian shards in multiple passes. This speeds up indexing on rotational storage with high seek latency by allowing individual shards to fit into the kernel page cache. Using a higher-than-normal number of C<--jobs> with L<public-inbox-init(1)> may be required to ensure individual shards are small enough to fit into cache. Warning: interrupting C<public-inbox-index(1)> while this option is in use may leave the search indices out-of-date with respect to SQLite databases. WWW and IMAP users may notice incomplete search results, but it is otherwise non-fatal. Using C<--reindex> will bring everything back up-to-date. Available in public-inbox 1.6.0+. This is ignored on L<public-inbox-v1-format(5)> inboxes. Default: false, shards are indexed in parallel =item publicinbox.<name>.indexSequentialShard Identical to L</publicinbox.indexSequentialShard>, but only affect the inbox matching E<lt>nameE<gt>. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG Used to override the default "~/.public-inbox/config" value. =item XAPIAN_FLUSH_THRESHOLD The number of documents to update before committing changes to disk. This environment is handled directly by Xapian, refer to Xapian API documentation for more details. For public-inbox 1.6 and later, use C<publicinbox.indexBatchSize> instead. Setting C<XAPIAN_FLUSH_THRESHOLD> or C<publicinbox.indexBatchSize> for a large C<--reindex> may cause L<public-inbox-mda(1)>, L<public-inbox-learn(1)> and L<public-inbox-watch(1)> tasks to wait long and unpredictable periods of time during C<--reindex>. Default: none, uses C<publicinbox.indexBatchSize> =back =head1 UPGRADING Occasionally, public-inbox will update it's schema version and require a full index by running this command. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2016-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<Search::Xapian>, L<DBD::SQLite> �����������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-init.pod����������������������������������������������0000664�0000000�0000000�00000010133�13773461203�0022767�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-init - create or reinitialize a public-inbox =head1 SYNOPSIS B<public-inbox-init> [OPTIONS] NAME INBOX_DIR HTTP_URL ADDRESS [ADDRESS..] =head1 DESCRIPTION Creates an empty public-inbox or reinitializes an existing one. It updates C<~/.public-inbox/config> by creating a C<[publicinbox "NAME"]> section where C<publicinbox.NAME.inboxdir> is C<INBOX_DIR>, C<publicinbox.NAME.url> is C<HTTP_URL>, and C<publicinbox.NAME.address> is C<ADDRESS>. Multiple addresses may be specified for inboxes with multiple addresses. =head1 OPTIONS =over =item -V, --version FORMAT_VERSION Specify C<2> here to use the scalable L<public-inbox-v2-format(5)> if you have L<DBD::SQLite> installed. The default is C<1> for the old L<public-inbox-v1-format(5)>, but C<2> is strongly recommended for scalability if you have L<DBD::SQLite>. Default: C<1> =item -L, --indexlevel <basic|medium|full> Controls the indexing level for L<public-inbox-index(1)> See L<public-inbox-config(5)> for more information. Default: C<full> =item --ng, --newsgroup NEWSGROUP The NNTP group name for use with L<public-inbox-nntpd(8)>. This may be any newsgroup name with hierarchies delimited by C<.>. For example, the newsgroup for L<mailto:meta@public-inbox.org> is: C<inbox.comp.mail.public-inbox.meta> This may be set after-the-fact via C<publicinbox.$NAME.newsgroup> in the configuration file. See L<public-inbox-config(5)> for more info. Available in public-inbox 1.6.0+. Default: none. =item --skip-artnum This option allows archivists to publish incomplete archives with only new mail while allowing NNTP article numbers to be reserved for yet-to-be-archived old mail. This is mainly intended for users of C<--skip-epoch> (documented below) but may be of use to L<public-inbox-v1-format(5)> users. There is no automatic way to use reserved NNTP article numbers when old mail is found, yet. Available in public-inbox 1.6.0+. Default: unset, no NNTP article numbers are skipped =item -S, --skip-epoch For C<-V2> (L<public-inbox-v2-format(5)>) inboxes only, this option allows archivists to publish incomplete archives with newer mail while allowing "0.git" (or "1.git" and so on) epochs to be added-after-the-fact (without affecting "git clone" followers). Available since public-inbox 1.2.0. Default: unset, no epochs are skipped =item -j, --jobs=JOBS Control the number of Xapian index shards in a C<-V2> (L<public-inbox-v2-format(5)>) inbox. It can be useful to use a single shard (C<-j1>) for inboxes on high-latency storage (e.g. rotational HDD) unless the system has enough RAM to cache 5-10x the size of the git repository. Another approach for HDDs is to use the L<public-inbox-index(1)/publicInbox.indexSequentialShard> option and many shards, so each shard may fit into the kernel page cache. Unfortunately, excessive shards slows down read-only query performance. For fast storage, it is generally not useful to specify higher values than the default due to the top-level producer process being a bottleneck. Default: the number of online CPUs, up to 4 (3 shard workers, 1 producer) =item --skip-docdata Do not store document data in Xapian, reducing Xapian storage overhead by around 1.5%. Warning: this option prevents rollbacks to public-inbox 1.5.0 and earlier. Available in public-inbox 1.6.0+. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG Used to override the default C<~/.public-inbox/config> value. =back =head1 LIMITATIONS Some of the options documented in L<public-inbox-config(5)> require editing the config file. Old versions lack the C<--ng>/C<--newsgroup> parameter See L<public-inbox-config(5)> for all the options which may be applied to a given inbox. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git-init(1)>, L<git-config(1)>, L<public-inbox-v1-format(5)>, L<public-inbox-v2-format(5)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-learn.pod���������������������������������������������0000664�0000000�0000000�00000004530�13773461203�0023131�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-learn - spam trainer and remover for public-inbox =head1 SYNOPSIS public-inbox-learn <spam|ham|rm> </path/to/RFC2822_message =head1 DESCRIPTION public-inbox-learn can remove spam or inject ham messages into an inbox while training a SpamAssassin instance. It is intended for users of L<public-inbox-mda(1)> or L<public-inbox-watch(1)>, but not users relying on L<git-fetch(1)> to mirror inboxes. It reads one message from standard input and operates on it depending on the command given: =head1 COMMANDS public-inbox-learn takes one of the following commands as its first and only argument: =over 8 =item spam Treat the message as spam. This will mark the message as removed so it becomes inaccessible via NNTP or WWW endpoints for all configured inboxes. The message remains accessible in git history. It will also be fed to L<spamc(1)> for training purposes unless C<publicinboxmda.spamcheck> is C<none> in L<public-inbox-config(5)>. =item ham Treat standard input as ham. This is useful for manually injecting messages into the archives which failed the spam check run by L<public-inbox-mda(1)> or L<public-inbox-watch(1)>. It relies on the C<To:>, C<Cc:>, and C<List-ID:> headers to match configured inbox addresses and C<listid> directives. It will also be fed to L<spamc(1)> for training purposes unless C<publicinboxmda.spamcheck> is C<none> in L<public-inbox-config(5)>. =item rm This is similar to the C<spam> command above, but does not feed the message to L<spamc(1)> and only removes messages which match on any of the C<To:>, C<Cc:>, and C<List-ID:> headers. The C<--all> option may be used match C<spam> semantics in removing the message from all configured inboxes. C<--all> is only available in public-inbox 1.6.0+. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG Per-user config file parseable by L<git-config(1)>. See L<public-inbox-config(5)>. Default: ~/.public-inbox/config =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<spamc(1)>, L<public-inbox-mda(1)>, L<public-inbox-watch(1)> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-mda.pod�����������������������������������������������0000664�0000000�0000000�00000004014�13773461203�0022566�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-mda - mail delivery agent for public-inbox =head1 SYNOPSIS public-inbox-mda </path/to/RFC2822_message =head1 DESCRIPTION Mail Delivery Agent (MDA) for public-inbox installations. Each system user may have their own public-inbox instances. This may be invoked via L<procmail(1)> or similar tools. By default, it relies on L<spamc(1)> for filtering mail, but may be disabled via L<public-inbox-config(5)/publicinboxmda.spamcheck> =head1 OPTIONS =over 8 =item --no-precheck By default, public-inbox-mda does some simple checks before invoking L<spamc(1)> since it is intended to receive mail before it goes to a mailing list. However, some users prefer to use public-inbox-mda to mirror mailing lists. This option exists to support those users. Using this option, the following prechecks are disabled: * multiple Message-IDs * non-existent Message-IDs * Message-IDs longer than 244 characters long * From: header shorter than 3 characters * Subject: header shorter than 2 characters * unusable Date: headers * inbox address specified in To: or Cc: header =back =head1 ENVIRONMENT =over 8 =item ORIGINAL_RECIPIENT The original recipient email address, set by the MTA. Postfix sets it by default, untested on other MTAs. This does not have to be set if relying on C<publicinbox.$NAME.listid> directives configured in L<public-inbox-config(5)>. =item PI_CONFIG Per-user config file parseable by L<git-config(1)>. See L<public-inbox-config(5)>. Default: ~/.public-inbox/config =item PI_EMERGENCY emergency Maildir destination. Default: ~/.public-inbox/emergency/ =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-v1-format(5)> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-nntpd.pod���������������������������������������������0000664�0000000�0000000�00000005174�13773461203�0023160�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-nntpd - NNTP server for sharing public-inbox =head1 SYNOPSIS public-inbox-nntpd [OPTIONS] =head1 DESCRIPTION public-inbox-nntpd provides a read-only NNTP daemon for public-inbox. It uses options and environment variables common to all L<public-inbox-daemon(8)> implementations. The default configuration will never require write access to the directory where the public-inbox is stored, so it may be run as a different user than the user running L<public-inbox-watch(1)>, L<public-inbox-mda(1)>, or L<git-fetch(1)>. =head1 OPTIONS See common options in L<public-inbox-daemon(8)/OPTIONS>. Additionally, NNTP-specific behavior for certain options are supported and documented below. =over =item -l, --listen PROTO://ADDRESS/?cert=/path/to/cert,key=/path/to/key In addition to the normal C<-l>/C<--listen> switch described in L<public-inbox-daemon(8)>, the protocol prefix (e.g. C<nntp://> or C<nntps://>) may be specified to force a given protocol. For STARTTLS and NNTPS support, the C<cert> and C<key> may be specified on a per-listener basis after a C<?> character and separated by C<,>. These directives are per-directive, and it's possible to use a different cert for every listener. =item --cert /path/to/cert The default TLS certificate for optional STARTTLS and NNTPS support if the C<cert> option is not given with C<--listen>. If using systemd-compatible socket activation and a TCP listener on port 563 is inherited, it is automatically NNTPS when this option is given. When a listener on port 119 is inherited and this option is given, it automatically gets STARTTLS support. =item --key /path/to/key The default private TLS certificate key for optional STARTTLS and NNTPS support if the C<key> option is not given with C<--listen>. The private key may concatenated into the path used by C<--cert>, in which case this option is not needed. =back =head1 CONFIGURATION These configuration knobs should be used in the L<public-inbox-config(5)> =over 8 =item publicinbox.<name>.newsgroup =item publicinbox.nntpserver =back See L<public-inbox-config(5)> for documentation on them. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/>, L<nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta>, L<nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta> =head1 COPYRIGHT Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<git(1)>, L<git-config(1)>, L<public-inbox-daemon(8)>, L<public-inbox-config(5)> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-overview.pod������������������������������������������0000664�0000000�0000000�00000007442�13773461203�0023703�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-overview - an overview of public-inbox =head1 DESCRIPTION public-inbox consists of many pieces which may be used independently or in conjunction of each other for: =over 4 =item 1 Mirroring existing public-inboxes. =item 2 Mirroring mailing lists. =item 3 Hosting standalone inboxes. =back =head2 Mirroring existing public-inboxes Mirroring existing public-inboxes is the easiest way to get started. Your mirror will remain dependent on the REMOTE_URL you are mirroring and you only need to use two new commands in addition to common L<git(1)> commands. Instructions are different depending on whether the inbox is L<public-inbox-v1-format(5)> or L<public-inbox-v2-format(5)>. See the "Archives are clonable:" part of the WWW interface of a given inbox for cloning instructions specific to that inbox. The instructions are roughly: # for v1 inboxes: git clone --mirror URL INBOX_DIR # for v2 inboxes (each epoch needs to be cloned): git clone --mirror URL/EPOCH INBOX_DIR/git/EPOCH.git # The following should create the necessary entry in # ~/.public-inbox/config, use "-V2" only for v2 inboxes: public-inbox-init [-V2] NAME INBOX_DIR MY_URL LIST_ADDRESS # Optional but strongly recommended for hosting HTTP # (and required for NNTP) # enable overview (requires DBD::SQLite) and, if Search::Xapian is # available, search: public-inbox-index INBOX_DIR # Periodically fetch the repo using git-fetch(1) # for v1 inboxes: git --git-dir=INBOX_DIR fetch # for v2 (in most cases, only the newest epoch needs to be fetched): git --git-dir=INBOX_DIR/git/EPOCH.git fetch # index new messages after fetching: public-inbox-index INBOX_DIR See L</"Serving public-inboxes"> below for info on how to expose your mirror to other readers. =head2 Mirroring mailing lists Mirroring mailing lists may be done by any reader of a mailing list using L<public-inbox-watch(1)>. # This will create a new v2 inbox: public-inbox-init -V2 NAME INBOX_DIR MY_URL LIST_ADDRESS Then, see the L<public-inbox-watch(1)> manual for configuring C<watch>, C<watchheader>, C<listid> and the optional C<spamcheck> and C<watchspam> entries. You will need to leave L<public-inbox-watch(1)> running to keep the mailbox up-to-date as messages are delivered to the mailing list. Running L<public-inbox-index(1)> to create search indices is recommended. L<public-inbox-watch(1)> will automatically maintain the indices if they were created by L<public-inbox-index(1)> public-inbox-index INBOX_DIR Instead of using L<public-inbox-watch(1)>, using L<public-inbox-mda(1)> with the C<--no-precheck> option and relying on the C<listid> directive in L<public-inbox-config(5)> is also an option. =head2 Hosting standalone inboxes Using L<public-inbox-init(1)> to initialize the inbox as in the other methods is recommended. See L<public-inbox-mda(1)> for more details; but this also requires MTA-specific knowledge. =head2 Serving public-inboxes Since public-inboxes are git repositories, they may be served to remote clients via L<git-daemon(1)> as well as specialized HTTP and NNTP daemons distributed with public-inbox. See L<public-inbox-httpd(1)> and L<public-inbox-nntpd(1)> for more information on using these daemons. Hosting a public-inbox over HTTP or NNTP will never require write access to any files in the inbox directory. Users familiar with PSGI and L<Plack> may also use L<PublicInbox::WWW> with the preferred server instead of L<public-inbox-httpd(1)> =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2016-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-purge.pod���������������������������������������������0000664�0000000�0000000�00000004106�13773461203�0023151�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-purge - erase messages from a public inbox and its history =head1 SYNOPSIS # requires ~/.public-inbox/config public-inbox-purge --all </path/to/message-to-purge # for testing with unconfigured inboxes: public-inbox-purge $INBOX_DIR </path/to/message-to-purge =head1 DESCRIPTION public-inbox-purge allows removing entire messages in a given inbox from history. It is only intended as a last resort, as it will cause discontiguous git history and draw more attention to the sensitive data in mirrors. For removing spam, L<public-inbox-learn(1)> is preferable as it preserves contiguous git history. For editing sensitive information out of messages, consider L<public-inbox-edit(1)>, instead, but keep in mind it still leads to discontiguous git history. =head1 OPTIONS =over =item --all Purge the message in all inboxes configured in ~/.public-inbox/config. This is an alternative to specifying individual inboxes directories on the command-line. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =back =head1 LIMITATIONS Only L<public-inbox-v2-format(5)> inboxes are supported. This is safe to run while normal inbox writing tools (L<public-inbox-mda(1)>, L<public-inbox-watch(1)>, L<public-inbox-learn(1)>) are active. Running this in parallel with L<public-inbox-xcpdb(1)> or C<"public-inbox-index --reindex"> can lead to errors or purged data remaining indexed. Incremental L<public-inbox-index(1)> (without C<--reindex>) is fine. Keep in mind this is a last resort, as it will be disruptive to anyone using L<git(1)> to mirror the inbox being purged. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-edit(1)>, L<public-inbox-learn(1)> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-tuning.pod��������������������������������������������0000664�0000000�0000000�00000011716�13773461203�0023340�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-tuning - tuning public-inbox =head1 DESCRIPTION public-inbox intends to support a wide variety of hardware. While we strive to provide the best out-of-the-box performance possible, tuning knobs are an unfortunate necessity in some cases. =over 4 =item 1 New inboxes: public-inbox-init -V2 =item 2 Process spawning =item 3 Performance on rotational hard disk drives =item 4 Btrfs (and possibly other copy-on-write filesystems) =item 5 Performance on solid state drives =item 6 Read-only daemons =back =head2 New inboxes: public-inbox-init -V2 If you're starting a new inbox (and not mirroring an existing one), the L<-V2|public-inbox-v2-format(5)> requires L<DBD::SQLite>, but is orders of magnitude more scalable than the original C<-V1> format. =head2 Process spawning Our optional use of L<Inline::C> speeds up subprocess spawning from large daemon processes. To enable L<Inline::C>, either set the C<PERL_INLINE_DIRECTORY> environment variable to point to a writable directory, or create C<~/.cache/public-inbox/inline-c> for any user(s) running public-inbox processes. More (optional) L<Inline::C> use will be introduced in the future to lower memory use and improve scalability. =head2 Performance on rotational hard disk drives Random I/O performance is poor on rotational HDDs. Xapian indexing performance degrades significantly as DBs grow larger than available RAM. Attempts to parallelize random I/O on HDDs leads to pathological slowdowns as inboxes grow. While C<-V2> introduced Xapian shards as a parallelization mechanism for SSDs; enabling C<publicInbox.indexSequentialShard> repurposes sharding as mechanism to reduce the kernel page cache footprint when indexing on HDDs. Initializing a mirror with a high C<--jobs> count to create more shards (in C<-V2> inboxes) will keep each shard smaller and reduce its kernel page cache footprint. Keep in mind excessive sharding imposes a performance penalty for read-only queries. Users with large amounts of RAM are advised to set a large value for C<publicinbox.indexBatchSize> as documented in L<public-inbox-index(1)>. C<dm-crypt> users on Linux 4.0+ are advised to try the C<--perf-same_cpu_crypt> C<--perf-submit_from_crypt_cpus> switches of L<cryptsetup(8)> to reduce I/O contention from kernel workqueue threads. =head2 Btrfs (and possibly other copy-on-write filesystems) L<btrfs(5)> performance degrades from fragmentation when using large databases and random writes. The Xapian + SQLite indices used by public-inbox are no exception to that. public-inbox 1.6.0+ disables copy-on-write (CoW) on Xapian and SQLite indices on btrfs to achieve acceptable performance (even on SSD). Disabling copy-on-write also disables checksumming, thus C<raid1> (or higher) configurations may be corrupt after unsafe shutdowns. Fortunately, these SQLite and Xapian indices are designed to recoverable from git if missing. Disabling CoW does not prevent all fragmentation. Large values of C<publicInbox.indexBatchSize> also limit fragmentation during the initial index. Avoid snapshotting subvolumes containing Xapian and/or SQLite indices. Snapshots use CoW despite our efforts to disable it, resulting in fragmentation. L<filefrag(8)> can be used to monitor fragmentation, and C<btrfs filesystem defragment -fr $INBOX_DIR> may be necessary. Large filesystems benefit significantly from the C<space_cache=v2> mount option documented in L<btrfs(5)>. Older, non-CoW filesystems are generally work well out-of-the-box for our Xapian and SQLite indices. =head2 Performance on solid state drives While SSD read performance is generally good, SSD write performance degrades as the drive ages and/or gets full. Issuing C<TRIM> commands via L<fstrim(8)> or similar is required to sustain write performance. Users of the Flash-Friendly File System L<F2FS|https://en.wikipedia.org/wiki/F2FS> may benefit from optimizations found in SQLite 3.21.0+. Benchmarks are greatly appreciated. =head2 Read-only daemons L<public-inbox-httpd(1)>, L<public-inbox-imapd(1)>, and L<public-inbox-nntpd(1)> are all designed for C10K (or higher) levels of concurrency from a single process. SMP systems may use C<--worker-processes=NUM> as documented in L<public-inbox-daemon(8)> for parallelism. The open file descriptor limit (C<RLIMIT_NOFILE>, C<ulimit -n> in L<sh(1)>, C<LimitNOFILE=> in L<systemd.exec(5)>) may need to be raised to accommodate many concurrent clients. Transport Layer Security (IMAPS, NNTPS, or via STARTTLS) significantly increases memory use of client sockets, sure to account for that in capacity planning. =head1 CONTACT Feedback encouraged via plain-text mail to L<mailto:meta@public-inbox.org> Information for *BSDs and non-traditional filesystems especially welcome. Our archives are hosted at L<https://public-inbox.org/meta/>, L<http://hjrcffqmbrq6wope.onion/meta/>, and other places =head1 COPYRIGHT Copyright 2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> ��������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-v1-format.pod�����������������������������������������0000664�0000000�0000000�00000013521�13773461203�0023644�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������% public-inbox developer manual =head1 NAME public-inbox-v1-format - git repository and tree description (aka "ssoma") =head1 DESCRIPTION WARNING: this does NOT describe the scalable v2 format used by public-inbox. Use of ssoma is not recommended for new installations due to scalability problems. ssoma uses a git repository to store each email as a git blob. The tree filename of the blob is based on the SHA1 hexdigest of the first Message-ID header. A commit is made for each message delivered. The commit SHA-1 identifier is used by ssoma clients to track synchronization state. =head1 PATHNAMES IN TREES A Message-ID may be extremely long and also contain slashes, so using them as a path name is challenging. Instead we use the SHA-1 hexdigest of the Message-ID (excluding the leading "E<lt>" and trailing "E<gt>") to generate a path name. Leading and trailing white space in the Message-ID header is ignored for hashing. A message with Message-ID of: E<lt>20131106023245.GA20224@dcvr.yhbt.netE<gt> Would be stored as: f2/8c6cfd2b0a65f994c3e1be266105413b3d3f63 Thus it is easy to look up the contents of a message matching a given a Message-ID. =head1 MESSAGE-ID CONFLICTS public-inbox v1 repositories currently do not resolve conflicting Message-IDs or messages with multiple Message-IDs. =head1 HEADERS The Message-ID header is required. "Bytes", "Lines" and "Content-Length" headers are stripped and not allowed, they can interfere with further processing. When using ssoma with public-inbox-mda, the "Status" mbox header is also stripped as that header makes no sense in a public archive. =head1 LOCKING L<flock(2)> locking exclusively locks the empty $GIT_DIR/ssoma.lock file for all non-atomic operations. =head1 EXAMPLE INPUT FLOW (SERVER-SIDE MDA) 1. Message is delivered to a mail transport agent (MTA) 1a. (optional) reject/discard spam, this should run before ssoma-mda 1b. (optional) reject/strip unwanted attachments ssoma-mda handles all steps once invoked. 2. Mail transport agent invokes ssoma-mda 3. reads message via stdin, extracting Message-ID 4. acquires exclusive flock lock on $GIT_DIR/ssoma.lock 5. creates or updates the blob of associated 2/38 SHA-1 path 6. updates the index and commits 7. releases $GIT_DIR/ssoma.lock ssoma-mda can also be used as an L<inotify(7)> trigger to monitor maildirs, and the ability to monitor IMAP mailboxes using IDLE will be available in the future. =head1 GIT REPOSITORIES (SERVERS) ssoma uses bare git repositories on both servers and clients. Using the L<git-init(1)> command with --bare is the recommend method of creating a git repository on a server: git init --bare /path/to/wherever/you/want.git There are no standardized paths for servers, administrators make all the choices regarding git repository locations. Special files in $GIT_DIR on the server: =over =item $GIT_DIR/ssoma.lock An empty file for L<flock(2)> locking. This is necessary to ensure the index and commits are updated consistently and multiple processes running MDA do not step on each other. =item $GIT_DIR/public-inbox/msgmap.sqlite3 SQLite3 database maintaining a stable mapping of Message-IDs to NNTP article numbers. Used by L<public-inbox-nntpd(1)> and created and updated by L<public-inbox-index(1)>. Users of the L<PublicInbox::WWW> interface will find it useful for attempting recovery from copy-paste truncations of URLs containing long Message-IDs. Automatically updated by L<public-inbox-mda(1)>, L<public-inbox-learn(1)> and L<public-inbox-watch(1)>. Losing or damaging this file will cause synchronization problems for NNTP clients. This file is expected to be stable and require no updates to its schema. Requires L<DBD::SQLite>. =item $GIT_DIR/public-inbox/xapian$N/ Xapian database for search indices in the PSGI web UI. $N is the value of PublicInbox::Search::SCHEMA_VERSION, and installations may have parallel versions on disk during upgrades or to roll-back upgrades. This is created and updated by L<public-inbox-index(1)>. Automatically updated by L<public-inbox-mda(1)>, L<public-inbox-learn(1)> and L<public-inbox-watch(1)>. This directory can always be regenerated with L<public-inbox-index(1)>. If lost or damaged, there is no need to back it up unless the CPU/memory cost of regenerating it outweighs the storage/transfer cost. Since SCHEMA_VERSION 15 and the development of the v2 format, the "overview" DB also exists in the xapian directory for v1 repositories. See L<public-inbox-v2-format(5)/OVERVIEW DB> Our use of the L</OVERVIEW DB> requires Xapian document IDs to remain stable. Using L<public-inbox-compact(1)> and L<public-inbox-xcpdb(1)> wrappers are recommended over tools provided by Xapian. This directory is large, often two to three times the size of the objects stored in a packed git repository. =item $GIT_DIR/ssoma.index This file is no longer used or created by public-inbox, but it is updated if it exists to remain compatible with ssoma installations. A git index file used for MDA updates. The normal git index (in $GIT_DIR/index) is not used at all as there is typically no working tree. =back Each client $GIT_DIR may have multiple mbox/maildir/command targets. It is possible for a client to extract the mail stored in the git repository to multiple mboxes for compatibility with a variety of different tools. =head1 CAVEATS It is NOT recommended to check out the working directory of a git. there may be many files. It is impossible to completely expunge messages, even spam, as git retains full history. Projects may (with adequate notice) cycle to new repositories/branches with history cleaned up via L<git-filter-repo(1)> or L<git-filter-branch(1)>. This is up to the administrators. =head1 COPYRIGHT Copyright 2013-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<gitrepository-layout(5)>, L<ssoma(1)> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-v2-format.pod�����������������������������������������0000664�0000000�0000000�00000021277�13773461203�0023654�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������% public-inbox developer manual =head1 NAME public-inbox-v2-format - structure of public inbox v2 archives =head1 DESCRIPTION The v2 format is designed primarily to address several scalability problems of the original format described at L<public-inbox-v1-format(5)>. It also handles messages with Message-IDs. =head1 INBOX LAYOUT The key change in v2 is the inbox is no longer a bare git repository, but a directory with two or more git repositories. v2 divides git repositories by time "epochs" and Xapian databases for parallelism by "shards". =head2 INBOX OVERVIEW AND DEFINITIONS $EPOCH - Integer starting with 0 based on time $SCHEMA_VERSION - DB schema version (for Xapian) $SHARD - Integer starting with 0 based on parallelism foo/ # "foo" is the name of the inbox - inbox.lock # lock file to protect global state - git/$EPOCH.git # normal git repositories - all.git # empty, alternates to $EPOCH.git - xap$SCHEMA_VERSION/$SHARD # per-shard Xapian DB - xap$SCHEMA_VERSION/over.sqlite3 # OVER-view DB for NNTP, threading - msgmap.sqlite3 # same the v1 msgmap For blob lookups, the reader only needs to open the "all.git" repository with $GIT_DIR/objects/info/alternates which references every $EPOCH.git repo. Individual $EPOCH.git repos DO NOT use alternates themselves as git currently limits recursion of alternates nesting depth to 5. =head2 GIT EPOCHS One of the inherent scalability problems with git itself is the full history of a project must be stored and carried around to all clients. To address this problem, the v2 format uses multiple git repositories, stored as time-based "epochs". We currently divide epochs into roughly one gigabyte segments; but this size can be configurable (if needed) in the future. A pleasant side-effect of this design is the git packs of older epochs are stable, allowing them to be cloned without requiring expensive pack generation. This also allows clients to clone only the epochs they are interested in to save bandwidth and storage. To minimize changes to existing v1-based code and simplify our code, we use the "alternates" mechanism described in L<gitrepository-layout(5)> to link all the epoch repositories with a single read-only "all.git" endpoint. Processes retrieve blobs via the "all.git" repository, while writers write blobs directly to epochs. =head2 GIT TREE LAYOUT One key problem specific to v1 was large trees were frequently a performance problem as name lookups are expensive and there were limited deltafication opportunities with unpredictable file names. As a result, all Xapian-enabled installations retrieve blob object_ids directly in v1, bypassing tree lookups. While dividing git repositories into epochs caps the growth of trees, worst-case tree size was still unnecessary overhead and worth eliminating. So in contrast to the big trees of v1, the v2 git tree contains only a single file at the top-level of the tree, either 'm' (for 'mail' or 'message') or 'd' (for deleted). A tree does not have 'm' and 'd' at the same time. Mail is still stored in blobs (instead of inline with the commit object) as we still need a stable reference in the indices in case commit history is rewritten to comply with legal requirements. After-the-fact invocations of L<public-inbox-index> will ignore messages written to 'd' after they are written to 'm'. Deltafication is not significantly improved over v1, but overall storage for trees is made as as small as possible. Initial statistics and benchmarks showing the benefits of this approach are documented at: L<https://public-inbox.org/meta/20180209205140.GA11047@dcvr/> =head2 XAPIAN SHARDS Another second scalability problem in v1 was the inability to utilize multiple CPU cores for Xapian indexing. This is addressed by using shards in Xapian to perform import indexing in parallel. As with git alternates, Xapian natively supports a read-only interface which transparently abstracts away the knowledge of multiple shards. This allows us to simplify our read-only code paths. The performance of the storage device is now the bottleneck on larger multi-core systems. In our experience, performance is improved with high-quality and high-quantity solid-state storage. Issuing TRIM commands with L<fstrim(8)> was necessary to maintain consistent performance while developing this feature. Rotational storage devices perform significantly worse than solid state storage for indexing of large mail archives; but are fine for backup and usable for small instances. As of public-inbox 1.6.0, the C<publicInbox.indexSequentialShard> option of L<public-inbox-index(1)> may be used with a high shard count to ensure individual shards fit into page cache when the entire Xapian DB cannot. Our use of the L</OVERVIEW DB> requires Xapian document IDs to remain stable. Using L<public-inbox-compact(1)> and L<public-inbox-xcpdb(1)> wrappers are recommended over tools provided by Xapian. =head2 OVERVIEW DB Towards the end of v2 development, it became apparent Xapian did not perform well for sorting large result sets used to generate the landing page in the PSGI UI (/$INBOX/) or many queries used by the NNTP server. Thus, SQLite was employed and the Xapian "skeleton" DB was renamed to the "overview" DB (after the NNTP OVER/XOVER commands). The overview DB maintains all the header information necessary to implement the NNTP OVER/XOVER commands and non-search endpoints of the PSGI UI. Xapian has become completely optional for v2 (as it is for v1), but SQLite remains required for v2. SQLite turns out to be powerful enough to maintain overview information. Most of the PSGI and all of the NNTP functionality is possible with only SQLite in addition to git. The overview DB was an instrumental piece in maintaining near constant-time read performance on a dataset 2-3 times larger than LKML history as of 2018. =head3 GHOST MESSAGES The overview DB also includes references to "ghost" messages, or messages which have replies but have not been seen by us. Thus it is expected to have more rows than the "msgmap" DB described below. =head2 msgmap.sqlite3 The SQLite msgmap DB is unchanged from v1, but it is now at the top-level of the directory. =head1 OBJECT IDENTIFIERS There are three distinct type of identifiers. content_hash is the new one for v2 and should make message removal and deduplication easier. object_id and Message-ID are already known. =over =item object_id The blob identifier git uses (currently SHA-1). No need to publicly expose this outside of normal git ops (cloning) and there's no need to make this searchable. As with v1 of public-inbox, this is stored as part of the Xapian document so expensive name lookups can be avoided for document retrieval. =item Message-ID The email header; duplicates allowed for archival purposes. This remains a searchable field in Xapian. Note: it's possible for emails to have multiple Message-ID headers (and L<git-send-email(1)> had that bug for a bit); so we take all of them into account. In case of conflicts detected by content_hash below, we generate a new Message-ID based on content_hash; if the generated Message-ID still conflicts, a random one is generated. =item content_hash A hash of relevant headers and raw body content for purging of unwanted content. This is not stored anywhere, but always calculated on-the-fly. For now, the relevant headers are: Subject, From, Date, References, In-Reply-To, To, Cc Received, List-Id, and similar headers are NOT part of content_hash as they differ across lists and we will want removal to be able to cross lists. The textual parts of the body are decoded, CRLF normalized to LF, and trailing whitespace stripped. Notably, hashing the raw body risks being broken by list signatures; but we can use filters (e.g. PublicInbox::Filter::Vger) to clean the body for imports. content_hash is SHA-256 for now; but can be changed at any time without making DB changes. =back =head1 LOCKING L<flock(2)> locking exclusively locks the empty inbox.lock file for all non-atomic operations. =head1 HEADERS Same handling as with v1, except the Message-ID header will be generated if not provided or conflicting. "Bytes", "Lines" and "Content-Length" headers are stripped and not allowed, they can interfere with further processing. The "Status" mbox header is also stripped as that header makes no sense in a public archive. =head1 THANKS Thanks to the Linux Foundation for sponsoring the development and testing of the v2 format. =head1 COPYRIGHT Copyright 2018-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<http://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<gitrepository-layout(5)>, L<public-inbox-v1-format(5)> ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox-watch.pod���������������������������������������������0000664�0000000�0000000�00000012753�13773461203�0023144�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-watch - mailbox watcher for public-inbox =head1 SYNOPSIS public-inbox-watch In ~/.public-inbox/config: [publicinbox "test"] ; generic public-inbox-config keys: address = test@example.com url = http://example.com/test inboxdir = /path/to/test.example.com.git ; config keys specific to public-inbox-watch: watch = maildir:/path/to/maildirs/.INBOX.test/ ; optional, emails that don't have a header matching ; value will be skipped watchheader = List-Id:<test.example.com> [publicinboxwatch] ; optional, enable use of spamc(1) for checking: spamcheck = spamc ; optional, emails marked as read which appear ; here will be trained as spam and deleted from ; the inboxdirs of any public-inboxes which are ; configured for watch. ; This is global for all publicinbox.* sections watchspam = maildir:/path/to/maildirs/.INBOX.spam =head1 DESCRIPTION public-inbox-watch allows watching a mailbox or newsgroup for the arrival of new messages and automatically importing them into public-inbox git repositories and indices. public-inbox-watch is useful in situations when a user wishes to mirror an existing mailing list, but has no access to run L<public-inbox-mda(1)> on a server. Unlike public-inbox-mda which is invoked once per-message, public-inbox-watch is a persistent process, making it faster for after-the-fact imports of large Maildirs. Upon startup, it scans the mailbox for new messages to be imported while it was not running. As of public-inbox 1.6.0, Maildirs, IMAP folders, and NNTP newsgroups are supported. Previous versions of public-inbox only supported Maildirs. public-inbox-watch should be run inside a L<screen(1)> session or as a L<systemd(1)> service. Errors are emitted to stderr. =head1 OPTIONS public-inbox-watch takes no command-line options. =head1 CONFIGURATION These configuration knobs should be used in the L<public-inbox-config(5)> file =over 8 =item publicinbox.<name>.watch A location to watch. public-inbox 1.5.0 and earlier only supported C<maildir:> paths: [publicinbox "test"] watch = maildir:/path/to/maildirs/.INBOX.test/ public-inbox 1.6.0 supports C<nntp://>, C<nntps://>, C<imap://> and C<imaps://> URLs: watch = nntp://news.example.com/inbox.test.group watch = imaps://user@mail.example.com/INBOX.test This may be specified multiple times to combine several mailboxes into a single public-inbox. URLs requiring authentication will require L<netrc(5)> and/or L<git-credential(1)> to fill in the username and password. Default: none =item publicinbox.<name>.watchheader [publicinbox "test"] watchheader = List-Id:<test.example.com> If specified, L<public-inbox-watch(1)> will only process mail matching the given header. If specified multiple times in public-inbox 1.5 or later, mail will be processed if it matches any of the values. Only the last value was used in public-inbox 1.4 and earlier. Default: none =item publicinboxwatch.spamcheck This may be set to C<spamc> to enable the use of SpamAssassin L<spamc(1)> for filtering spam before it is imported into git history. Other spam filtering backends may be supported in the future. Default: none =item publicinboxwatch.watchspam A Maildir to watch for confirmed spam messages to appear in. Messages which appear in this folder with the (S)een flag will be hidden from all configured inboxes based on Message-ID and content matching. Messages without the (S)een flag are not considered for hiding. This hiding affects all configured public-inboxes in PI_CONFIG. As with C<publicinbox.$NAME.watch>, C<imap://> and C<imaps://> URLs are supported in public-inbox 1.6.0+. Default: none; only for L<public-inbox-watch(1)> users =item imap.Starttls / imap.$URL.Starttls Whether or not to use C<STARTTLS> on plain C<imap://> connections. May be specified for certain URLs via L<git-config(1)/--get-urlmatch> in C<git(1)> 1.8.5+. Default: C<true> =item imap.Compress / imap.$URL.Compress Whether or not to use the IMAP COMPRESS (RFC4978) extension to save bandwidth. This is not supported by all IMAP servers and some advertising this feature may not implement it correctly. May be specified only for certain URLs if L<git(1)> 1.8.5+ is installed to use L<git-config(1)/--get-urlmatch> Default: C<false> =item nntp.Starttls / nntp.$URL.Starttls Whether or not to use C<STARTTLS> on plain C<nntp://> connections. May be specified for certain URLs via L<git-config(1)/--get-urlmatch> in C<git(1)> 1.8.5+. Default: C<false> if the hostname is a Tor C<.onion>, C<true> otherwise =back =head1 SIGNALS =over 8 =item SIGHUP Reload the config file (default: ~/.public-inbox/config) =item SIGUSR1 Rescan all watched mailboxes. This is done automatically after startup. =item SIGQUIT / SIGTERM / SIGINT Gracefully shut down. In-flight messages will be stored and indexed. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG config file. default: ~/.public-inbox/config See L<public-inbox-config(5)> =item PERL_INLINE_DIRECTORY This may affect any public-inbox processes, but is intended for long-lived ones such as C<public-inbox-watch> or network daemons. See L<public-inbox-daemon(8)>. =back =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2016-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-config(5)> ���������������������public-inbox-1.6.1/Documentation/public-inbox-xcpdb.pod���������������������������������������������0000664�0000000�0000000�00000007434�13773461203�0023136�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox-xcpdb - upgrade Xapian DB formats =head1 SYNOPSIS public-inbox-xcpdb [OPTIONS] INBOX_DIR public-inbox-xcpdb [OPTIONS] --all =head1 DESCRIPTION public-inbox-xcpdb is similar to L<copydatabase(1)> for upgrading to the latest database format supported by Xapian (e.g. "glass" or "honey"), but is designed to tolerate and accept parallel Xapian database modifications from L<public-inbox-watch(1)>, L<public-inbox-mda(1)>, L<public-inbox-learn(1)>, and L<public-inbox-index(1)>. This command is rarely used, as Xapian DB formats rarely change. =head1 OPTIONS =over =item --all Copy all inboxes configured in ~/.public-inbox/config. This is an alternative to specifying individual inboxes directories on the command-line. =item -c, --compact In addition to performing the copy operation, run L<xapian-compact(1)> on each Xapian shard after copying but before finalizing it. Compared to the cost of copying a Xapian database, compacting a Xapian database takes only around 5% of the time required to copy. Compared to L<public-inbox-compact(1)>, use of this option is preferable for gigantic inboxes where the coarse-grained lock currently required for L<public-inbox-compact(1)> can cause the compaction to take hours at-a-time. =item --reshard=N / -R N Reshard the Xapian database on a L<v2|public-inbox-v2-format(5)> inbox to C<N> shards . Since L<xapian-compact(1)> is not suitable for merging, users can rely on this switch to reshard the existing Xapian database(s) to any positive value of C<N>. This is useful in case the Xapian DB was created with too few or too many shards given the capabilities of the current hardware. =item --blocksize / --no-full / --fuller These options are passed directly to L<xapian-compact(1)> when used with C<--compact>. =item --no-fsync Disable L<fsync(2)> and L<fdatasync(2)>. See L<public-inbox-index(1)/--no-fsync> for caveats. Available in public-inbox 1.6.0+. =item --sequential-shard Copy each shard sequentially, ignoring C<--jobs>. This also affects indexing done at the end of a run. =item --batch-size=BYTES =item --max-size=BYTES See L<public-inbox-index(1)> for a description of these options. These indexing options indexing at the end of a run. C<public-inbox-xcpdb> may run in parallel with with L<public-inbox-index(1)>, and C<public-inbox-xcpdb> needs to reindex changes made to the old Xapian DBs by L<public-inbox-index(1)> while it was running. =back =head1 ENVIRONMENT =over 8 =item PI_CONFIG The default config file, normally "~/.public-inbox/config". See L<public-inbox-config(5)> =item XAPIAN_FLUSH_THRESHOLD The number of documents to update before committing changes to disk. This environment is handled directly by Xapian, refer to Xapian API documentation for more details. Default: 10000 =back =head1 UPGRADING This tool is intended for admins upgrading Xapian search databases used by public-inbox, NOT users upgrading public-inbox itself. In particular, it DOES NOT upgrade the schema used by the PSGI search interface (see L<public-inbox-index(1)>). =head1 LIMITATIONS Do not use L<public-inbox-purge(1)> or L<public-inbox-edit(1)> while this is running; old (purged or edited data) may show up. Normal invocations L<public-inbox-index(1)> can safely run while this is running, too. However, reindexing via the L<public-inbox-index(1)/--reindex> switch will be a waste of computing resources. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<copydatabase(1)>, L<xapian-compact(1)>, L<public-inbox-index(1)> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/public-inbox.cgi.pod�����������������������������������������������0000664�0000000�0000000�00000001676�13773461203�0022603�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������=head1 NAME public-inbox.cgi - CGI wrapper for PublicInbox::WWW =head1 SYNOPSIS You generally want to run public-inbox-httpd, instead =head1 DESCRIPTION public-inbox.cgi provides a CGI interface wrapper on top of the PSGI/Plack L<PublicInbox::WWW> module. It is only provided for compatibility reasons and NOT recommended. CGI with Perl is slow due to code loading overhead and web servers lack the scheduling fairness of L<public-inbox-httpd(1)> for handling git clones and streaming large mbox downloads. =head1 CONTACT Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org> The mail archives are hosted at L<https://public-inbox.org/meta/> and L<http://hjrcffqmbrq6wope.onion/meta/> =head1 COPYRIGHT Copyright 2019-2020 all contributors L<mailto:meta@public-inbox.org> License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO L<public-inbox-httpd(1)>, L<PublicInbox::WWW>, L<public-inbox-daemon(8)>, ������������������������������������������������������������������public-inbox-1.6.1/Documentation/reproducibility.txt������������������������������������������������0000664�0000000�0000000�00000002210�13773461203�0022676�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������reproducibility => forkability ------------------------------ The ability to fork a project is a checks and balances system for free software projects. Reproducibility is key to forkability since every mirror is potential fork. git makes the code history of projects fully reproducible. public-inbox uses git to make the email history of projects reproducible. Keeping all communications as email ensures the full history of the entire project can be mirrored by anyone with the resources to do so. Compact, low-complexity data requires less resources to mirror, so sticking with plain-text ensures more parties can mirror and potentially fork the project with all its data. Any private or irreproducible data is a barrier to forking. These include mailing list subscriber information and non-federated user identities. The "pull" subscriber model of NNTP and Atom feeds combined with open-to-all posting means there's no need for private data. If these things make power hungry project leaders and admins uncomfortable, good. That was the point. It's how checks and balances ought to work. Comments, corrections, etc welcome: meta@public-inbox.org ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/standards.perl�����������������������������������������������������0000775�0000000�0000000�00000006001�13773461203�0021600�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/perl -w use strict; # Copyright 2019-2020 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> print <<EOF; Relevant standards for public-inbox users and hackers ----------------------------------------------------- Non-exhaustive list of standards public-inbox software attempts or intends to implement. This list is intended to be a quick reference for hackers and users. Given the goals of interoperability and accessibility; strict conformance to standards is not always possible, but rather best-effort taking into account real-world cases. In particular, "obsolete" standards remain relevant as long as clients and data exists. IETF RFCs --------- EOF my $rfcs = [ 3977 => 'NNTP', 977 => 'NNTP (old)', 1036 => 'Standard for Interchange of USENET Messages', 5536 => 'Netnews Article Format', 5537 => 'Netnews Architecture and Protocols', 6048 => 'NNTP additions to LIST command (TODO)', 8054 => 'NNTP compression', 4642 => 'NNTP TLS', 8143 => 'NNTP TLS', 2980 => 'NNTP extensions (obsolete, but NOT irrelevant)', 4287 => 'Atom syndication', 4685 => 'Atom threading extensions', 2919 => 'List-Id mail header', 5064 => 'Archived-At mail header', 3986 => 'URI escaping', 1521 => 'MIME extensions', 2616 => 'HTTP/1.1 (newer updates should apply, too)', 7230 => 'HTTP/1.1 message syntax and routing', 7231 => 'HTTP/1.1 semantics and content', 822 => 'Internet message format (1982)', 2822 => 'Internet message format (2001)', 5322 => 'Internet message format (2008)', 3501 => 'IMAP4rev1', 2177 => 'IMAP IDLE', 2683 => 'IMAP4 Implementation Recommendations', # 5032 = 'WITHIN search extension for IMAP', 4978 => 'IMAP COMPRESS Extension', # 5182 = 'IMAP Extension for Referencing the Last SEARCH Result', # 5256 => 'IMAP SORT and THREAD extensions', # 5738 => 'IMAP Support for UTF-8', # 8474 => 'IMAP Extension for Object Identifiers', # 8620 => JSON Meta Application Protocol (JMAP) # 8621 => JSON Meta Application Protocol (JMAP) for Mail # ... # TODO: flesh this out ]; my @rfc_urls = qw(tools.ietf.org/html/rfc%d www.rfc-editor.org/errata_search.php?rfc=%d); for (my $i = 0; $i < $#$rfcs;) { my $num = $rfcs->[$i++]; my $txt = $rfcs->[$i++]; print "rfc$num\t- $txt\n"; printf "\thttps://$_\n", $num foreach @rfc_urls; print "\n"; } print <<'EOF' Other relevant documentation ---------------------------- * IMAP capabilities registry and response codes: https://www.iana.org/assignments/imap-capabilities https://www.iana.org/assignments/imap-response-codes * Documentation/technical/http-protocol.txt in git source code: https://public-inbox.org/git/9c5b6f0fac/s * Various mbox formats (we currently emit and parse mboxrd) https://en.wikipedia.org/wiki/Mbox * PSGI/Plack specifications (as long as our web frontend uses Perl5) git clone https://github.com/plack/psgi-specs.git Copyright --------- Copyright (C) 2019-2020 all contributors <meta@public-inbox.org> License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> EOF �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/technical/���������������������������������������������������������0000775�0000000�0000000�00000000000�13773461203�0020663�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/technical/data_structures.txt��������������������������������������0000664�0000000�0000000�00000017200�13773461203�0024640�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������Internal data structures of public-inbox This is a guide for hackers new to our code base. Do not consider our internal data structures stable for external consumers, this document should be updated when internals change. I recommend reading this document from the source tree, with the source code easily accessible if you need examples. This mainly documents in-memory data structures. If you're interested in the stable on-filesystem formats, see the public-inbox-config(5), public-inbox-v1-format(5) and public-inbox-v2-format(5) manpages. Common abbreviations when used outside of their packages are documented. `$self' is the common variable name when used within their package. PublicInbox::Config ------------------- PublicInbox::Config is the root class which loads a public-inbox-config file and instantiates PublicInbox::Inbox, PublicInbox::WWW, PublicInbox::NNTPD, and other top-level classes. Outside of tests, this is typically a singleton. Per-message classes ------------------- * PublicInbox::Eml - Email::MIME-like class Common abbreviation: $mime, $eml Used by: PublicInbox::WWW, PublicInbox::SearchIdx An representation of an entire email, multipart or not. An option to use libgmime or libmailutils may be supported in the future for performance and memory use. This can be a memory hog with big messages and giant attachments, so our PublicInbox::WWW interface only keeps one object of this class in memory at-a-time. In other words, this is the "meat" of the message, whereas $smsg (below) is just the "skeleton". Our PublicInbox::V2Writable class may have two objects of this type in memory at-a-time for deduplication. In public-inbox 1.4 and earlier, Email::MIME and its subclass, PublicInbox::MIME were used. Despite still slurping, PublicInbox::Eml is faster and uses less memory due to lazy header parsing and lazy subpart instantiation with shorter object lifetimes. * PublicInbox::Smsg - small message skeleton Used by: PublicInbox::{NNTP,WWW,SearchIdx} Common abbreviation: $smsg Represents headers shown in NNTP overview and PSGI message summaries (thread skeleton). This is loaded from either the overview DB (over.sqlite3) or the Xapian DB (docdata.glass), though the Xapian docdata is won't hold NNTP-only fields (Cc:/To:) There may be hundreds or thousands of these objects in memory at-a-time, so fields are pruned if unneeded. * PublicInbox::SearchThread::Msg - subclass of Smsg Common abbreviation: $cont or $node Used by: PublicInbox::WWW The structure we use for a non-recursive[1] variant of JWZ's algorithm: <https://www.jwz.org/doc/threading.html>. Nowadays, this is a re-blessed $smsg with additional fields. As with $smsg objects, there may be hundreds or thousands of these objects in memory at-a-time. We also do not use a linked-list for storing children as JWZ describes, but instead a Perl hashref for {children} which becomes an arrayref upon sorting. [1] https://rt.cpan.org/Ticket/Display.html?id=116727 Per-inbox classes ----------------- * PublicInbox::Inbox - represents a single public-inbox Common abbreviation: $ibx Used everywhere This represents a "publicinbox" section in the config file, see public-inbox-config(5) for details. * PublicInbox::Git - represents a single git repository Common abbreviation: $git, $ibx->git Used everywhere. Each configured "publicinbox" or "coderepo" has one of these. * PublicInbox::Msgmap - msgmap.sqlite3 read-write interface Common abbreviation: $mm, $ibx->mm Used everywhere if SQLite is available. Each indexed inbox has one of these, see public-inbox-v1-format(5) and public-inbox-v2-format(5) manpages for details. * PublicInbox::Over - over.sqlite3 read-only interface Common abbreviation: $over, $ibx->over Used everywhere if SQLite is available. Each indexed inbox has one of these, see public-inbox-v1-format(5) and public-inbox-v2-format(5) manpages for details. * PublicInbox::Search - Xapian read-only interface Common abbreviation: $srch, $ibx->search Used everywhere if Search::Xapian (or Xapian.pm) is available. Each indexed inbox has one of these, see public-inbox-v1-format(5) and public-inbox-v2-format(5) manpages for details. PublicInbox::WWW ---------------- The main PSGI web interface, uses several other packages to form our web interface. PublicInbox::SolverGit ---------------------- This is instantiated from the $INBOX/$BLOB_OID/s/ WWW endpoint and represents the stages and states for "solving" a blob by searching for and applying patches. See the code and comments in PublicInbox/SolverGit.pm PublicInbox::Qspawn ------------------- This is instantiated from various WWW endpoints and represents the stages and states for running and managing subprocesses in a way which won't exceed configured process limits defined via "publicinboxlimiter.*" directives in public-inbox-config(5). ad-hoc structures shared across packages ---------------------------------------- * $ctx - PublicInbox::WWW app request context This holds the PSGI $env as well as any internal variables used by various modules of PublicInbox::WWW. As with the PSGI $env, there is one per-active WWW request+response cycle. It does not exist for idle HTTP clients. daemon classes -------------- * PublicInbox::NNTP - a NNTP client socket Common abbreviation: $nntp Used by: PublicInbox::DS, public-inbox-nntpd Unlike PublicInbox::HTTP, all of the NNTP client logic for serving to NNTP clients is here, including what would be in $ctx on the HTTP or WWW side. There may be thousands of these since we support thousands of NNTP clients. * PublicInbox::HTTP - a HTTP client socket Common abbreviation: $http Used by: PublicInbox::DS, public-inbox-httpd Unlike PublicInbox::NNTP, this class no knowledge of any of the email or git-specific parts of public-inbox, only PSGI. However, it supports APIs and behaviors (e.g. streaming large responses) which PublicInbox::WWW may take advantage of. There may be thousands of these since we support thousands of HTTP clients. * PublicInbox::Listener - a SOCK_STREAM listen socket (TCP or Unix) Used by: PublicInbox::DS, public-inbox-httpd, public-inbox-nntpd Common abbreviation: @listeners in PublicInbox::Daemon This class calls non-blocking accept(2) or accept4(2) on a listen socket to create new PublicInbox::HTTP and PublicInbox::HTTP instances. * PublicInbox::HTTPD Common abbreviation: $httpd Represents an HTTP daemon which creates PublicInbox::HTTP wrappers around client sockets accepted from PublicInbox::Listener. Since the SERVER_NAME and SERVER_PORT PSGI variables needs to be exposed for HTTP/1.0 requests when Host: headers are missing, this is per-Listener socket. * PublicInbox::HTTPD::Async Common abbreviation: $async Used for implementing an asynchronous "push" interface for slow, expensive responses which may require spawning git-httpd-backend(1), git-apply(1) or other commands. This will also be used for dealing with future asynchronous operations such as HTTP reverse proxying and slow storage retrieval operations. * PublicInbox::NNTPD Common abbreviation: $nntpd Represents an NNTP daemon which creates PublicInbox::NNTP wrappers around client sockets accepted from PublicInbox::Listener. This is currently a singleton, but it is associated with a given PublicInbox::Config which may be instantiated more than once in the future. * PublicInbox::ParentPipe Per-worker process class to detect shutdown of master process. This is not used if using -W0 to disable worker processes in public-inbox-httpd or public-inbox-nntpd. This is a per-worker singleton. ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/technical/ds.txt���������������������������������������������������0000664�0000000�0000000�00000011600�13773461203�0022030�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������PublicInbox::DS - event loop and async I/O base class Our PublicInbox::DS event loop which powers public-inbox-nntpd and public-inbox-httpd diverges significantly from the unmaintained Danga::Socket package we forked from. In fact, it's probably different from most other event loops out there. Most notably: * There is one and only one callback: ->event_step. Unlike other event loops, there are no separate callbacks for read, write, error or hangup events. In fact, we never care which kevent filter or poll/epoll event flag (e.g. POLLIN/POLLOUT/POLLHUP) triggers a call. The lack of read/write callback distinction is driven by the fact TLS libraries (e.g. OpenSSL via IO::Socket::SSL) may declare SSL_WANT_READ on SSL_write(), and SSL_WANT_READ on SSL_read(). So we end up having to let each user object decide whether it wants to make read or write calls depending on its internal state, completely independent of the event loop. Error and hangup (POLLERR and POLLHUP) callbacks are redundant and only triggered in rare cases. They're redundant because the result of every read and write call in ->event_step must be checked, anyways. At best, callbacks for POLLHUP and POLLERR can save one syscall per socket lifetime and not worth the extra code it imposes. Reducing the user-supplied code down to a single callback allows subclasses to keep their logic self-contained. The combination of this change and one-shot wakeups (see below) for bidirectional data flows make asynchronous code easier to reason about. Other divergences: * ->write buffering uses temporary files whereas Danga::Socket used the heap. The rationale for this is the kernel already provides ample (and configurable) space for socket buffers. Modern kernels also cache FS operations aggressively, so systems with ample RAM are unlikely to notice degradation, while small systems are less likely to suffer unpredictable heap fragmentation, swap and OOM penalties. In the future, we may introduce sendfile and mmap+SSL_write to reduce data copies, and use FALLOC_FL_PUNCH_HOLE on Linux to release space after the buffer is partially cleared. Augmented features: * obj->write(CODEREF) passes the object itself to the CODEREF Being able to enqueue subroutine calls is a powerful feature in Danga::Socket for keeping linear logic in an asynchronous environment. Unfortunately, each subroutine takes several kilobytes of memory. One small change to Danga::Socket is to pass the receiver object (aka "$self") to the CODEREF. $self can store any necessary state it needs for a normal (named) subroutine. This allows us to put the same sub into multiple queues without paying a large memory penalty for each one. This idea is also more easily ported to C or other languages which lack anonymous subroutines (aka "closures"). * ->requeue support. An optimization of the AddTimer(0, ...) idiom for immediately dispatching code at the next event loop iteration. public-inbox uses this for fairly generating large responses iteratively (see PublicInbox::NNTP::long_response or git_async_cat for blob retrievals). New features * One-shot wakeups allowed via EPOLLONESHOT or EV_DISPATCH. These flags allow us to simplify code in ->event_step callbacks for bidirectional sockets (NNTP and HTTP). Instead of merely reacting to events, control is handed over at ->event_step in one-shot scenarios. The event_step caller (NNTP || HTTP) then becomes proactive in declaring which (if any) events it's interested in for the next loop iteration. * Edge-triggering available via EPOLLET or EV_CLEAR. These reduce wakeups for unidirectional classes (e.g. PublicInbox::Listener sockets, and pipes via PublicInbox::HTTPD::Async). * IO::Socket::SSL support (for NNTPS, STARTTLS+NNTP, HTTPS) * dwaitpid (waitpid wrapper) support for reaping dead children * reliable signal wakeups are supported via signalfd on Linux, EVFILT_SIGNAL on *BSDs via IO::KQueue. Removed features * Many fields removed or moved to subclasses, so the underlying hash is smaller and suitable for FDs other than stream sockets. Some fields we enforce (e.g. wbuf, wbuf_off) are autovivified on an as-needed basis to save memory when they're not needed. * TCP_CORK support removed, instead we use MSG_MORE on non-TLS sockets and we may use vectored I/O support via GnuTLS in the future for TLS sockets. * per-FD PLCMap (post-loop callback) removed, we got ->requeue support where no extra hash lookups or assignments are necessary. * read push backs removed. Some subclasses use a read buffer ({rbuf}) but they control it, not this event loop. * Profiling and debug logging removed. Perl and OS-specific tracers and profilers are sufficient. * ->AddOtherFds support removed, everything watched is a subclass of PublicInbox::DS, but we've slimmed down the fields to eliminate the memory penalty for objects. ��������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/technical/memory.txt�����������������������������������������������0000664�0000000�0000000�00000003622�13773461203�0022737�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������semi-automatic memory management in public-inbox ------------------------------------------------ The majority of public-inbox is implemented in Perl 5, a language and interpreter not particularly known for being memory-efficient. We strive to keep processes small to improve locality, allow the kernel to cache more files, and to be a good neighbor to other processes running on the machine. Taking advantage of automatic reference counting (ARC) in Perl allows us deterministically release memory back to the heap. We start with a simple data model with few circular references. This both eases human understanding and reduces the likelyhood of bugs. Knowing the relative sizes and quantities of our data structures, we limit the scope of allocations as much as possible and keep large allocations shortest-lived. This minimizes both the cognitive overhead on humans in addition to reducing memory pressure on the machine. Short-lived non-immortal closures (aka "anonymous subs") are avoided in long-running daemons unless required for compatibility with PSGI. Closures are memory-intensive and may make allocation lifetimes less obvious to humans. They are also the source of memory leaks in older versions of Perl, including 5.16.3 found in enterprise distros. We also use Perl's `delete' and `undef' built-ins to drop reference counts sooner than scope allows. These functions are required to break the few reference cycles we have that would otherwise lead to leaks. Of note, `undef' may be used in two ways: 1. to free(3) the underlying buffer: undef $scalar; 2. to reset a buffer but reduce realloc(3) on subsequent growth: $scalar = ""; # useful when repeated appending $scalar = undef; # usually not needed In the future, our internal data model will be further flattened and simplified to reduce the overhead imposed by small objects. Large allocations may also be avoided by optionally using Inline::C. ��������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/technical/whyperl.txt����������������������������������������������0000664�0000000�0000000�00000014504�13773461203�0023122�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������why public-inbox is currently implemented in Perl 5 --------------------------------------------------- While Perl has many detractors and there's a lot not to like about Perl, we use it anyways because it offers benefits not (yet) available from other languages. This document is somewhat inspired by https://sqlite.org/whyc.html Other languages and runtimes may eventually be a possibility for us, and this document can serve as our requirements list for possible replacements. As always, comments and corrections and additions welcome at <meta@public-inbox.org>. We're not Perl experts, either. Good Things ----------- * Availability Perl 5 is installed on many, if not most GNU/Linux and BSD-based servers and workstations. It is likely the most widely-installed programming environment that offers a significant amount of POSIX functionality. Users won't have to waste bandwidth or space with giant toolchains or architecture-specific binaries. Furthermore, Perl documentation is typically installed locally as manpages, allowing users to quickly refer to documentation as needed. * Scripted, always editable by the end user Users cannot lose access to the source code. Code written entirely in any scripting language automatically satisfies the GPL-2.0, making it easier to satisfy the AGPL-3.0. Use of a scripting language improves auditability for malicious changes. It also reduces storage and bandwidth requirements for distributors, as the same scripts can be shared across multiple OSes and architectures. Perl's availability and the low barrier to entry of scripting ensures it's easy for users to exercise their software freedom. * Predictable performance While Perl is neither fast or memory-efficient, its performance and memory use are predictable and does not require GC tuning by the user. public-inbox is developed for (and mostly on) old hardware. Perl was fast enough to power the web of the late 1990s, and any cheap VPS today has more than enough RAM and CPU for handling plain-text email. Low hardware requirements increases the reach of our software to more users, improving centralization resistance. * Compatibility Unlike similarly powerful scripting languages, there is no forced migration to a major new version. From 2000-2020, Perl had fewer breaking changes than Python or Ruby; we expect that trend to continue given the inertia of Perl 5. Note: this document was written before the Perl 7 announcement. We'll continue to monitor and adapt to the situation around what distros are doing in regard to maintaining compatibility. * Built for text processing Our focus is plain-text mail, and Perl has many built-ins optimized for text processing. It also has good support for UTF-8 and legacy encodings found in old mail archives. * Integration with distros and non-Perl libraries Perl modules and bindings to common libraries such as SQLite and Xapian are already distributed by many GNU/Linux distros and BSD ports. There should be no need to rely on language-specific package managers such as cpan(1), those systems increase the learning curve for users and systems administrators. * Compactness and terseness Less code generally means less bugs. We try to avoid the "line noise" stereotype of some Perl codebases, yet still manage to write less code than one would with non-scripting languages. * Performance ceiling and escape hatch With optional Inline::C, we can be "as fast as C" in some cases. Inline::C is widely-packaged by distros and it gives us an escape hatch for dealing with missing bindings or performance problems should they arise. Inline::C use (as opposed to XS) also preserves the software freedom and auditability benefits to all users. Unfortunately, most C toolchains are big; so Inline::C will always be optional for users who cannot afford the bandwidth or space. Bad Things ---------- * Slow startup time. Tokenization, parsing, and compilation of pure Perl is not cached. Inline::C does cache its results, however. We work around slow startup times in tests by preloading code, similar to how mod_perl works for CGI. * High space overhead and poor locality of small data structures, including the optree. This may not be fixable in Perl itself given compatibility requirements of the C API. These problems are exacerbated on modern 64-bit platforms, though the Linux x32 ABI offers promise. * Lack of vectored I/O support (writev, sendmmsg, etc. syscalls) and "newer" POSIX functions in general. APIs end up being slurpy, favoring large buffers and memory copies for concatenation rather than rope (aka "cord") structures. * While mmap(2) is available via PerlIO::mmap, string ops (m//, substr(), index(), etc.) still require memory copies into userspace, negating a benefit of zero-copy. * The XS/C API make it difficult to improve internals while preserving compatibility. * Lack of optional type checking. This may be a blessing in disguise, though, as it encourages us to simplify our data models and lowers cognitive overhead. * SMP support is mostly limited to fork(), since many libraries (including much of the standard library) are not thread-safe. Even with threads.pm, sharing data between interpreters within the same process is inefficient due to the lack of lock-free and wait-free data structures from projects such as Userspace RCU. * Process spawning speed degrades as memory use increases. We work around this optionally via Inline::C and vfork(2), since Perl lacks an approximation of posix_spawn(3). We also use `undef' and `delete' ops to free large buffers as soon as we're done using them to save memory. Red herrings to ignore when evaluating other runtimes ----------------------------------------------------- These don't discount a language or runtime from being being used, they're just not interesting. * Lightweight threading While lightweight threading implementations are convenient, they tend to be significantly heavier than a pure event-loop systems (or multi-threaded event-loop systems) Lightweight threading implementations have stack overhead and growth typically measured in kilobytes. The userspace state overhead of event-based systems is an order of magnitude less, and a sunk cost regardless of concurrency model. ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/Documentation/txt2pre������������������������������������������������������������0000775�0000000�0000000�00000006745�13773461203�0020303�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env perl # Copyright (C) 2014-2020 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Stupid script to make HTML from preformatted, utf-8 text versions, # only generating links for http(s). Markdown does too much # and requires indentation to output preformatted text. use strict; use warnings; use PublicInbox::Linkify; use PublicInbox::Hval qw(ascii_html); my %xurls; for (qw[public-inbox.cgi(1) public-inbox-compact(1) public-inbox-config(5) public-inbox-convert(1) public-inbox-daemon(8) public-inbox-edit(1) public-inbox-httpd(1) public-inbox-imapd(1) public-inbox-index(1) public-inbox-init(1) public-inbox-learn(1) public-inbox-mda(1) public-inbox-nntpd(1) public-inbox-overview(7) public-inbox-purge(1) public-inbox-v1-format(5) public-inbox-v2-format(5) public-inbox-watch(1) public-inbox-xcpdb(1) ]) { my ($n) = (/([\w\-\.]+)/); $xurls{$_} = "$n.html"; $xurls{$n} = "$n.html"; } for (qw[copydatabase(1) xapian-compact(1)]) { my ($n) = (/([\w\-\.]+)/); $xurls{$_} = ".$n.1.html" } for (qw[make(1) flock(2) setrlimit(2) vfork(2) tmpfs(5)]) { my ($n, $s) = (/([\w\-]+)\((\d)\)/); $xurls{$_} = "http://www.man7.org/linux/man-pages/man$s/$n.$s.html"; } for (qw[git(1) git-am(1) git-apply(1) git-config(1) git-credential(1) git-daemon(1) git-fast-import(1) git-fetch(1) git-filter-branch(1) git-format-patch(1) git-gc(1) git-http-backend(1) git-imap-send(1) git-init(1) git-send-email(1) gitrepository-layout(5) ]) { my ($n) = (/([\w\-\.]+)/); $xurls{$_} = "https://kernel.org/pub/software/scm/git/docs/$n.html" } for (qw[ sd_listen_fds(3) systemd(1) systemd.unit(5) systemd.socket(5) ]) { my ($n) = (/([\w\-\.]+)/); $xurls{$_} = "https://www.freedesktop.org/software/systemd/man/$n.html"; } $xurls{'netrc(5)'} = 'https://manpages.debian.org/stable/ftp/netrc.5.en.html'; $xurls{'mbsync(1)'} = 'https://manpages.debian.org/stable/isync/mbsync.1.en.html'; $xurls{'offlineimap(1)'} = 'https://manpages.debian.org/stable/offlineimap/offlineimap.1.en.html'; $xurls{'spamc(1)'} = 'https://spamassassin.apache.org/full/3.4.x/doc/spamc.html'; $xurls{'grok-pull'} = 'https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git' . '/tree/man/grok-pull.1.rst'; $xurls{'git-filter-repo(1)'} = 'https://github.com/newren/git-filter-repo'. '/blob/master/Documentation/git-filter-repo.txt'; $xurls{'ssoma(1)'} = 'https://ssoma.public-inbox.org/ssoma.txt'; $xurls{'cgitrc(5)'} = 'https://git.zx2c4.com/cgit/tree/cgitrc.5.txt'; $xurls{'prove(1)'} = 'https://perldoc.perl.org/prove.html'; my $str = do { local $/; <STDIN> }; my ($title) = ($str =~ /\A([^\n]+)/); if ($str =~ /^NAME\n\s+([^\n]+)/sm) { # don't link to ourselves $title = $1; if ($title =~ /([\w\.\-]+)/) { delete $xurls{$1}; } } $title = ascii_html($title); my $l = PublicInbox::Linkify->new; $str = $l->linkify_1($str); $str = ascii_html($str); # longest matches, first my @keys = sort { length($b) <=> length($a) } keys %xurls; my $xkeys = join('|', map { quotemeta } @keys); $str =~ s,(?<![>\w_])($xkeys)(?!(?:[\w<\-]|\.html)), qq(<a\nhref=").$xurls{$1}.qq(">$1).($2//'').'</a>',sge; $str = $l->linkify_2($str); print '<html><head>', qq(<meta\nhttp-equiv="Content-Type"\ncontent="text/html; charset=utf-8"\n/>), "<title>$title", "
",  $str , '
'; STDOUT->flush; # keep mtime on website consistent so clients can cache if (-f STDIN && -f STDOUT) { my @st = stat(STDIN); utime($st[8], $st[9], \*STDOUT); } public-inbox-1.6.1/HACKING000066400000000000000000000065711377346120300151200ustar00rootroot00000000000000hacking public-inbox -------------------- Send all patches and "git request-pull"-formatted emails to our self-hosting inbox at meta@public-inbox.org It is archived at: https://public-inbox.org/meta/ and http://hjrcffqmbrq6wope.onion/meta/ (using Tor) Contributions are email-driven, just like contributing to git itself or the Linux kernel; however anonymous and pseudonymous contributions will always be welcome. Please consider our goals in mind: Decentralization, Accessibility, Compatibility, Performance These goals apply to everyone: users viewing over the web or NNTP, sysadmins running public-inbox, and other hackers working public-inbox. We will reject any feature which advocates or contributes to any particular instance of a public-inbox becoming a single point of failure. Things we've considered but rejected include: * exposing article serial numbers outside of NNTP * allowing readers to inject metadata (e.g. votes) We care about being accessible to folks with vision problems and/or lack the computing resources to view so-called "modern" websites. This includes folks on slow connections and ancient browsers which may be too difficult to upgrade due to resource demands. Only depend on Free Software packages which exist in the "main" section of Debian "stable" distribution. That is Debian 9.x ("stretch") as of this writing, but "oldstable" (8.x, "jessie") remains supported for v1 inboxes. In general, we favor mature and well-tested old things rather than the shiny new. Avoid relying on compiled modules too much. Even if it is Free, compiled code makes packages more expensive to audit, build, distribute and verify. public-inbox itself will only be implemented in scripting languages (currently Perl 5) and optional JIT-compiled C (via Inline::C) Do not recurse on user-supplied data. Neither Perl or C handle deep recursion gracefully. See lib/PublicInbox/SearchThread.pm and lib/PublicInbox/MsgIter.pm for examples of non-recursive alternatives to previously-recursive algorithms. Performance should be reasonably good for server administrators, too, and we will sacrifice features to achieve predictable performance. Encouraging folks to self-host will be easier with lower hardware requirements. See design_www.txt and design_notes.txt in the Documentation/ directory for design decisions made during development. See Documentation/technical/ in the source tree for more details on specific topics, in particular data_structures.txt Faster tests ------------ The `make test' target provided by MakeMaker does not run in parallel. Our `make check' target supports parallel runs, and it also creates a `.prove' file to optimize `make check-run'. The prove(1) command (distributed with Perl) may also be used for finer-grained testing: prove -bvw t/foo.t If using a make(1) (e.g. GNU make) with `include' support, the `config.mak' Makefile snippet can be used to set environment variables such as PERL_INLINE_DIRECTORY and TMPDIR. With PERL_INLINE_DIRECTORY set to enable Inline::C support and TMPDIR pointed to a tmpfs(5) mount, `make check-run' takes 6-10s (load-dependent) on a busy workstation built in 2010. Perl notes ---------- * \w, \s, \d character classes all match Unicode characters; so write out class ranges (e.g "[0-9]") if you only intend to match ASCII. Do not use the "/a" (ASCII) modifier, that requires Perl 5.14 and we're only depending on 5.10.1 at the moment. public-inbox-1.6.1/INSTALL000066400000000000000000000204611377346120300151540ustar00rootroot00000000000000public-inbox (server-side) installation --------------------------------------- This is for folks who want to setup their own public-inbox instance. Clients should use normal git-clone/git-fetch, or NNTP clients if they want to import mail into their personal inboxes. public-inbox is developed on Debian GNU/Linux systems and will never depend on packages outside of the "main" component of the "stable" distribution, currently Debian 10.x ("buster"), but older versions of Debian remain supported. Most packages are available in other GNU/Linux distributions and FreeBSD. TODO: this still needs to be documented better, also see the scripts/ and sa_config/ directories in the source tree Requirements ------------ public-inbox requires a number of other packages to access its full functionality. The core tools are, of course: * Git (1.8.0+, 2.6+ for writing v2 inboxes) * Perl 5.10.1+ * DBD::SQLite (needed for NNTP, message threading, and v2 inboxes) To accept incoming mail into a public inbox, you'll likely want: * MTA - postfix is recommended (for public-inbox-mda) * SpamAssassin (spamc/spamd) (for public-inbox-watch/public-inbox-mda) Beyond that, there is a long list of Perl modules required, starting with: * Digest::SHA typically installed with Perl rpm: perl-Digest-SHA * URI::Escape deb: liburi-perl pkg: p5-URI rpm: perl-URI (for HTML/Atom generation) Email::MIME will be optional as of public-inbox v1.5.0, it may still be used in maintainer comparison tests: * Email::MIME deb: libemail-mime-perl pkg: p5-Email-MIME rpm: perl-Email-MIME Plack and Date::Parse are optional as of public-inbox v1.3.0, but required for older releases: * Plack deb: libplack-perl pkg: p5-Plack rpm: perl-Plack, perl-Plack-Test, (for HTML/Atom generation) - Date::Parse deb: libtimedate-perl pkg: p5-TimeDate rpm: perl-TimeDate (for broken, mostly historical emails) Where "deb" indicates package names for Debian-derived distributions, "pkg" is for the FreeBSD package (maybe other common BSDs, too), and "rpm" is for RPM-based distributions (only known to work on Fedora). Numerous optional modules are likely to be useful as well: - DBD::SQLite deb: libdbd-sqlite3-perl pkg: p5-DBD-SQLite rpm: perl-DBD-SQLite (for v2, NNTP, or gzipped mboxes) - Search::Xapian deb: libsearch-xapian-perl pkg: p5-Search-Xapian rpm: perl-Search-Xapian (HTTP search) - Net::Server deb: libnet-server-perl pkg: pkg-Net-Server rpm: perl-Net-Server (for HTTP/NNTP background daemons, not needed as systemd services or foreground servers) - Inline::C deb: libinline-c-perl pkg: pkg-Inline-C (speeds up process spawning on Linux, see public-inbox-daemon(8)) - Email::Address::XS deb: libemail-address-xs-perl pkg: pkg-Email-Address-XS (correct parsing of tricky email addresses, phrases and comments) - Plack::Middleware::ReverseProxy deb: libplack-middleware-reverseproxy-perl pkg: p5-Plack-Middleware-ReverseProxy rpm: perl-Plack-Middleware-ReverseProxy (ensures redirects are correct when running behind nginx or Varnish) * highlight deb: libhighlight-perl (for syntax highlighting with coderepo) * xapian-compact (tool) deb: xapian-tools pkg: xapian-core rpm: xapian-core (optional, for public-inbox-compact(1)) The following modules are typically pulled in by dependencies listed above, so there is no need to explicitly install them: * Encode deb: libperl5.$MINOR (or libencode-perl) pkg: perl5 rpm: perl-Encode (likely installed with Perl) - DBI deb: libdbi-perl pkg: p5-DBI rpm: perl-DBI (pulled in by DBD::SQLite) * Devel::Peek deb: libperl5.$MINOR (e.g. libperl5.24) pkg: perl5 rpm: perl-Devel-Peek (optional for stale FD cleanup in daemons, typically installed alongside Perl5) - Linux::Inotify2 deb: liblinux-inotify2-perl rpm: perl-Linux-Inotify2 (for public-inbox-watch on Linux) - IO::Compress (::Gzip) deb: perl-modules (or libio-compress-perl) pkg: perl5 rpm: perl-IO-Compress (for gzipped mbox over HTTP, v2 format) Uncommonly needed modules: - Socket6 deb: libsocket6-perl pkg: p5-Socket6 rpm: perl-Socket6 (pulled in by SpamAssassin and Net::Server, only necessary if using IPv6 with Plack::Middleware::AccessLog or similar on Perl <= 5.12) - Crypt::CBC deb: libcrypt-cbc-perl pkg: p5-Crypt-CBC (for PublicInbox::Unsubscribe (rarely used)) Optional packages testing and development: - Plack::Test deb: libplack-test-perl pkg: p5-Plack rpm: perl-Plack-Test - Test::Simple deb: perl-modules-5.$MINOR pkg: perl5 rpm: perl-Test-Simple - XML::TreePP deb: libxml-treepp-perl pkg: p5-XML-TreePP rpm: perl-XML-TreePP standard MakeMaker installation (Perl) -------------------------------------- To use MakeMaker, you need to ensure ExtUtils::MakeMaker is available. This is typically installed with Perl, but RPM-based systems will likely need to install the `perl-ExtUtils-MakeMaker' package. Once the dependencies are installed, you should be able to build and install the system (into /usr/local) with: perl Makefile.PL make make test # see HACKING for faster tests for hackers make install # root permissions may be needed When installing Search::Xapian, make sure the underlying Xapian installation is not affected by an index corruption bug: https://bugs.debian.org/808610 For Debian 8.x (jessie), this means using Debian 8.5 or later. public-inbox will never store unregeneratable data in Xapian or any other search database we might use; Xapian corruption will not destroy critical data. See the public-inbox-overview(7) man page for the next steps once the installation is complete. Copyright --------- Copyright 2013-2020 all contributors License: AGPL-3.0+ public-inbox-1.6.1/MANIFEST000066400000000000000000000221431377346120300152530ustar00rootroot00000000000000.gitattributes .gitignore AUTHORS COPYING Documentation/.gitignore Documentation/RelNotes/v1.0.0.eml Documentation/RelNotes/v1.1.0-pre1.eml Documentation/RelNotes/v1.2.0.eml Documentation/RelNotes/v1.3.0.eml Documentation/RelNotes/v1.4.0.eml Documentation/RelNotes/v1.5.0.eml Documentation/RelNotes/v1.6.0.eml Documentation/RelNotes/v1.6.1.eml Documentation/RelNotes/v1.7.0.wip Documentation/clients.txt Documentation/dc-dlvr-spam-flow.txt Documentation/design_notes.txt Documentation/design_www.txt Documentation/extman.perl Documentation/flow.ge Documentation/flow.txt Documentation/hosted.txt Documentation/include.mk Documentation/marketing.txt Documentation/mknews.perl Documentation/public-inbox-compact.pod Documentation/public-inbox-config.pod Documentation/public-inbox-convert.pod Documentation/public-inbox-daemon.pod Documentation/public-inbox-edit.pod Documentation/public-inbox-httpd.pod Documentation/public-inbox-imapd.pod Documentation/public-inbox-index.pod Documentation/public-inbox-init.pod Documentation/public-inbox-learn.pod Documentation/public-inbox-mda.pod Documentation/public-inbox-nntpd.pod Documentation/public-inbox-overview.pod Documentation/public-inbox-purge.pod Documentation/public-inbox-tuning.pod Documentation/public-inbox-v1-format.pod Documentation/public-inbox-v2-format.pod Documentation/public-inbox-watch.pod Documentation/public-inbox-xcpdb.pod Documentation/public-inbox.cgi.pod Documentation/reproducibility.txt Documentation/standards.perl Documentation/technical/data_structures.txt Documentation/technical/ds.txt Documentation/technical/memory.txt Documentation/technical/whyperl.txt Documentation/txt2pre HACKING INSTALL MANIFEST Makefile.PL README TODO certs/.gitignore certs/create-certs.perl ci/README ci/deps.perl ci/profiles.sh ci/run.sh contrib/css/216dark.css contrib/css/216light.css contrib/css/README contrib/selinux/el7/publicinbox.fc contrib/selinux/el7/publicinbox.te examples/README examples/README.unsubscribe examples/apache2_cgi.conf examples/apache2_perl.conf examples/apache2_perl_old.conf examples/cgi-webrick.rb examples/cgit-commit-filter.lua examples/cgit-wwwhighlight-filter.lua examples/cgit.psgi examples/grok-pull.post_update_hook.sh examples/highlight.psgi examples/lib/.gitignore examples/logrotate.conf examples/newswww.psgi examples/nginx_proxy examples/public-inbox-config examples/public-inbox-httpd.socket examples/public-inbox-httpd@.service examples/public-inbox-imap-onion.socket examples/public-inbox-imapd.socket examples/public-inbox-imapd@.service examples/public-inbox-imaps.socket examples/public-inbox-nntpd.socket examples/public-inbox-nntpd@.service examples/public-inbox-nntps.socket examples/public-inbox-watch.service examples/public-inbox.psgi examples/unsubscribe-milter.socket examples/unsubscribe-milter@.service examples/unsubscribe-psgi.socket examples/unsubscribe-psgi@.service examples/unsubscribe.milter examples/unsubscribe.psgi examples/varnish-4.vcl lib/PublicInbox/Address.pm lib/PublicInbox/AddressPP.pm lib/PublicInbox/Admin.pm lib/PublicInbox/AdminEdit.pm lib/PublicInbox/AltId.pm lib/PublicInbox/Cgit.pm lib/PublicInbox/CompressNoop.pm lib/PublicInbox/Config.pm lib/PublicInbox/ConfigIter.pm lib/PublicInbox/ContentHash.pm lib/PublicInbox/DS.pm lib/PublicInbox/DSKQXS.pm lib/PublicInbox/DSPoll.pm lib/PublicInbox/Daemon.pm lib/PublicInbox/DirIdle.pm lib/PublicInbox/DummyInbox.pm lib/PublicInbox/EOFpipe.pm lib/PublicInbox/Emergency.pm lib/PublicInbox/Eml.pm lib/PublicInbox/EmlContentFoo.pm lib/PublicInbox/ExtMsg.pm lib/PublicInbox/FakeInotify.pm lib/PublicInbox/Feed.pm lib/PublicInbox/Filter/Base.pm lib/PublicInbox/Filter/Gmane.pm lib/PublicInbox/Filter/Mirror.pm lib/PublicInbox/Filter/RubyLang.pm lib/PublicInbox/Filter/SubjectTag.pm lib/PublicInbox/Filter/Vger.pm lib/PublicInbox/GetlineBody.pm lib/PublicInbox/Git.pm lib/PublicInbox/GitAsyncCat.pm lib/PublicInbox/GitCredential.pm lib/PublicInbox/GitHTTPBackend.pm lib/PublicInbox/GzipFilter.pm lib/PublicInbox/HTTP.pm lib/PublicInbox/HTTPD.pm lib/PublicInbox/HTTPD/Async.pm lib/PublicInbox/HlMod.pm lib/PublicInbox/Hval.pm lib/PublicInbox/IMAP.pm lib/PublicInbox/IMAPClient.pm lib/PublicInbox/IMAPD.pm lib/PublicInbox/IMAPTracker.pm lib/PublicInbox/IMAPdeflate.pm lib/PublicInbox/IMAPsearchqp.pm lib/PublicInbox/IdxStack.pm lib/PublicInbox/Import.pm lib/PublicInbox/In2Tie.pm lib/PublicInbox/Inbox.pm lib/PublicInbox/InboxIdle.pm lib/PublicInbox/InboxWritable.pm lib/PublicInbox/KQNotify.pm lib/PublicInbox/Linkify.pm lib/PublicInbox/Listener.pm lib/PublicInbox/Lock.pm lib/PublicInbox/MDA.pm lib/PublicInbox/MID.pm lib/PublicInbox/MIME.pm lib/PublicInbox/ManifestJsGz.pm lib/PublicInbox/Mbox.pm lib/PublicInbox/MboxGz.pm lib/PublicInbox/MsgIter.pm lib/PublicInbox/MsgTime.pm lib/PublicInbox/Msgmap.pm lib/PublicInbox/NDC_PP.pm lib/PublicInbox/NNTP.pm lib/PublicInbox/NNTPD.pm lib/PublicInbox/NNTPdeflate.pm lib/PublicInbox/NewsWWW.pm lib/PublicInbox/Over.pm lib/PublicInbox/OverIdx.pm lib/PublicInbox/ProcessPipe.pm lib/PublicInbox/Qspawn.pm lib/PublicInbox/Reply.pm lib/PublicInbox/SaPlugin/ListMirror.pm lib/PublicInbox/SaPlugin/ListMirror.pod lib/PublicInbox/Search.pm lib/PublicInbox/SearchIdx.pm lib/PublicInbox/SearchIdxShard.pm lib/PublicInbox/SearchQuery.pm lib/PublicInbox/SearchThread.pm lib/PublicInbox/SearchView.pm lib/PublicInbox/Sigfd.pm lib/PublicInbox/Smsg.pm lib/PublicInbox/SolverGit.pm lib/PublicInbox/Spamcheck.pm lib/PublicInbox/Spamcheck/Spamc.pm lib/PublicInbox/Spawn.pm lib/PublicInbox/SpawnPP.pm lib/PublicInbox/Syscall.pm lib/PublicInbox/TLS.pm lib/PublicInbox/TestCommon.pm lib/PublicInbox/Tmpfile.pm lib/PublicInbox/URIimap.pm lib/PublicInbox/Unsubscribe.pm lib/PublicInbox/UserContent.pm lib/PublicInbox/V2Writable.pm lib/PublicInbox/View.pm lib/PublicInbox/ViewDiff.pm lib/PublicInbox/ViewVCS.pm lib/PublicInbox/WWW.pm lib/PublicInbox/WWW.pod lib/PublicInbox/Watch.pm lib/PublicInbox/WwwAltId.pm lib/PublicInbox/WwwAtomStream.pm lib/PublicInbox/WwwAttach.pm lib/PublicInbox/WwwHighlight.pm lib/PublicInbox/WwwListing.pm lib/PublicInbox/WwwStatic.pm lib/PublicInbox/WwwStream.pm lib/PublicInbox/WwwText.pm lib/PublicInbox/Xapcmd.pm sa_config/Makefile sa_config/README sa_config/root/etc/spamassassin/public-inbox.pre sa_config/user/.spamassassin/user_prefs script/public-inbox-compact script/public-inbox-convert script/public-inbox-edit script/public-inbox-httpd script/public-inbox-imapd script/public-inbox-index script/public-inbox-init script/public-inbox-learn script/public-inbox-mda script/public-inbox-nntpd script/public-inbox-purge script/public-inbox-watch script/public-inbox-xcpdb script/public-inbox.cgi scripts/README scripts/dc-dlvr scripts/dc-dlvr.pre scripts/dupe-finder scripts/edit-sa-prefs scripts/import_maildir scripts/import_slrnspool scripts/import_vger_from_mbox scripts/report-spam scripts/slrnspool2maildir scripts/ssoma-replay scripts/xhdr-num2mid t/.gitconfig t/address.t t/admin.t t/altid.t t/altid_v2.t t/cgi.t t/check-www-inbox.perl t/config.t t/config_limiter.t t/content_hash.t t/convert-compact.t t/data/0001.patch t/data/message_embed.eml t/dir_idle.t t/ds-kqxs.t t/ds-leak.t t/ds-poll.t t/edit.t t/emergency.t t/eml.t t/eml_content_disposition.t t/eml_content_type.t t/epoll.t t/fail-bin/spamc t/fake_inotify.t t/feed.t t/filter_base-junk.eml t/filter_base-xhtml.eml t/filter_base.t t/filter_mirror.t t/filter_rubylang.t t/filter_subjecttag.t t/filter_vger.t t/git-http-backend.psgi t/git.fast-import-data t/git.t t/gzip_filter.t t/hl_mod.t t/html_index.t t/httpd-corner.psgi t/httpd-corner.t t/httpd-https.t t/httpd-unix.t t/httpd.t t/hval.t t/idx_stack.t t/imap.t t/imap_searchqp.t t/imap_tracker.t t/imapd-tls.t t/imapd.t t/import.t t/inbox.t t/inbox_idle.t t/index-git-times.t t/indexlevels-mirror-v1.t t/indexlevels-mirror.t t/init.t t/iso-2202-jp.eml t/kqnotify.t t/linkify.t t/main-bin/spamc t/mda-mime.eml t/mda.t t/mda_filter_rubylang.t t/mid.t t/mime.t t/msg_iter-nested.eml t/msg_iter-order.eml t/msg_iter.t t/msgmap.t t/msgtime.t t/multi-mid.t t/nntp.t t/nntpd-tls.t t/nntpd-v2.t t/nntpd.t t/nodatacow.t t/nulsubject.t t/over.t t/plack-2-txt-bodies.eml t/plack-attached-patch.eml t/plack-qp.eml t/plack.t t/precheck.t t/psgi_attach.eml t/psgi_attach.t t/psgi_bad_mids.t t/psgi_mount.t t/psgi_multipart_not.t t/psgi_scan_all.t t/psgi_search.t t/psgi_text.t t/psgi_v2-new.eml t/psgi_v2-old.eml t/psgi_v2.t t/purge.t t/qspawn.t t/replace.t t/reply.t t/run.perl t/search-amsg.eml t/search-thr-index.t t/search.t t/sigfd.t t/solve/0001-simple-mod.patch t/solve/0002-rename-with-modifications.patch t/solver_git.t t/spamcheck_spamc.t t/spawn.t t/thread-cycle.t t/thread-index-gap.t t/time.t t/uri_imap.t t/utf8.eml t/v1-add-remove-add.t t/v1reindex.t t/v2-add-remove-add.t t/v2dupindex.t t/v2mda.t t/v2mirror.t t/v2reindex.t t/v2writable.t t/view.t t/watch_filter_rubylang.t t/watch_imap.t t/watch_maildir.t t/watch_maildir_v2.t t/watch_multiple_headers.t t/watch_nntp.t t/www_altid.t t/www_listing.t t/www_static.t t/x-unknown-alpine.eml t/xcpdb-reshard.t xt/cmp-msgstr.t xt/cmp-msgview.t xt/eml_check_limits.t xt/git-http-backend.t xt/git_async_cmp.t xt/httpd-async-stream.t xt/imapd-mbsync-oimap.t xt/imapd-validate.t xt/mem-imapd-tls.t xt/mem-msgview.t xt/msgtime_cmp.t xt/nntpd-validate.t xt/perf-msgview.t xt/perf-nntpd.t xt/perf-threading.t xt/solver.t public-inbox-1.6.1/Makefile.PL000066400000000000000000000152731377346120300161020ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2013-2020 all contributors # License: AGPL-3.0+ use strict; use ExtUtils::MakeMaker; open my $m, '<', 'MANIFEST' or die "open(MANIFEST): $!\n"; chomp(my @manifest = (<$m>)); my @EXE_FILES = grep(m!^script/!, @manifest); my $v = {}; my $t = {}; # do not sort my @RELEASES = qw(v1.6.1 v1.6.0 v1.5.0 v1.4.0 v1.3.0 v1.2.0 v1.1.0-pre1 v1.0.0); $v->{news_deps} = [ map { "Documentation/RelNotes/$_.eml" } @RELEASES ]; $v->{txt} = [ qw(INSTALL README COPYING TODO HACKING) ]; my @dtxt = grep(m!\ADocumentation/[^/]+\.txt\z!, @manifest); push @dtxt, 'Documentation/standards.txt'; push @dtxt, 'Documentation/flow.txt'; push @dtxt, @{$v->{txt}}; for my $txt (@dtxt) { my $html = $txt; $html =~ s/\.txt\z/.html/ or $html .= '.html'; $t->{"$html : $txt"} = [ "\$(txt2pre) <$txt" ]; } $v->{t_slash_star_dot_t} = [ grep(m!\At/.*\.t\z!, @manifest) ]; my @scripts = qw(scripts/ssoma-replay); # legacy my @syn = (@EXE_FILES, grep(m!^lib/.*\.pm$!, @manifest), @scripts); @syn = grep(!/DSKQXS\.pm/, @syn) if !eval { require IO::KQueue }; @syn = grep(!/Unsubscribe\.pm/, @syn) if !eval { require Crypt::CBC }; @syn = grep(!/SaPlugin/, @syn) if !eval { require Mail::SpamAssasin }; $v->{syn_files} = \@syn; $v->{my_syntax} = [map { "$_.syntax" } @syn]; $v->{-m1} = [ map { (split('/'))[-1] } @EXE_FILES ]; $v->{-m5} = [ qw(public-inbox-config public-inbox-v1-format public-inbox-v2-format) ]; $v->{-m7} = [ qw(public-inbox-overview public-inbox-tuning) ]; $v->{-m8} = [ qw(public-inbox-daemon) ]; my @sections = (1, 5, 7, 8); $v->{check_80} = []; $v->{manuals} = []; $v->{mantxt} = []; for my $i (@sections) { my $ary = $v->{"-m$i"}; $v->{"m$i"} = $ary; for my $m (@$ary) { my $pod = "Documentation/$m.pod"; my $txt = "Documentation/$m.txt"; $t->{"$m.$i : $pod"} = [ "\$(podman) -s$i $pod \$@" ]; $t->{"$txt : $pod"} = [ "\$(podtext) $pod \$\@+", "touch -r $pod \$\@+", "mv \$\@+ \$@" ]; $t->{"Documentation/$m.html : $txt"} = [ "\$(txt2pre) <$txt" ]; $t->{".$m.cols : $m.$i"} = [ "\@echo CHECK80 $m.$i;". "COLUMNS=80 \$(MAN) ./$m.$i | \$(check_man)" ]; } push @{$v->{check_80}}, map { ".$_.cols" } @$ary; my $manuals = $v->{"man$i"} = [ map { "$_.$i" } @$ary ]; push @{$v->{manuals}}, @$manuals; push @{$v->{mantxt}}, map { "Documentation/$_.txt" } @$ary; } push @dtxt, @{$v->{mantxt}}; $v->{docs} = [ @dtxt, 'NEWS' ]; $v->{docs_html} = [ map {; my $x = $_; $x =~ s/\.txt\z//; "$x.html" } @{$v->{docs}} ]; $v->{gz_docs} = [ map { "$_.gz" } (@{$v->{docs}},@{$v->{docs_html}}) ]; $v->{rsync_docs} = [ @{$v->{gz_docs}}, @{$v->{docs}}, @{$v->{docs_html}}, qw(NEWS.atom NEWS.atom.gz)]; # external manpages which we host ourselves, since some packages # (currently just Xapian) doesn't host manpages themselves. my @xman = qw(copydatabase.1 xapian-compact.1); $v->{xdocs} = [ map { "Documentation/.$_.txt" } @xman ]; $v->{xdocs_html} = [ map { "Documentation/.$_.html" } @xman ]; for (@{$v->{xdocs}}) { $t->{"$_:"} = [ '$(PERL) -w Documentation/extman.perl $@ >$@+', 'mv $@+ $@' ]; my $html = $_; $html =~ s/\.txt\z/.html/; $t->{"$html : $_"} = [ "\$(txt2pre) <$_" ]; } $v->{gz_xdocs} = [ map { "$_.gz" } (@{$v->{xdocs_html}}, @{$v->{xdocs}}) ]; $v->{rsync_xdocs} = [ @{$v->{gz_xdocs}}, @{$v->{xdocs_html}}, @{$v->{xdocs}} ]; my $TGTS = join("\n", map {; my $tgt_prereq = $_; my $cmds = $t->{$_}; "$tgt_prereq\n".join('', map { "\t$_\n" } @$cmds); } sort keys %$t); my $VARS = join("\n", map {; my $varname = $_; join('', map { "$varname += $_\n" } sort @{$v->{$varname}}); } grep(!/^-/, sort keys %$v)); $VARS .= "\nRELEASES = ".join(' ', @RELEASES)."\n"; # Don't waste user's disk space by installing some pods from # imported code or internal use only my %man3 = map {; # semi-colon tells Perl this is a BLOCK (and not EXPR) my $base = $_; my $mod = $base; $mod =~ s!/!::!g; $mod =~ s/\.\w+\z//; "lib/PublicInbox/$_" => "blib/man3/PublicInbox::$mod.\$(MAN3EXT)" } qw(Git.pm Import.pm WWW.pod SaPlugin/ListMirror.pod); WriteMakefile( NAME => 'PublicInbox', # n.b. camel-case is not our choice # XXX drop "PENDING" in .pod before updating this! VERSION => '1.6.1', AUTHOR => 'Eric Wong ', ABSTRACT => 'public-inbox server infrastructure', EXE_FILES => \@EXE_FILES, PREREQ_PM => { # note: we use spamc(1), NOT the Perl modules # We also depend on git. # Keep this sorted and synced to the INSTALL document # libperl$PERL_VERSION, # `perl5' on FreeBSD # perl-Digest-SHA on RH-based 'Digest::SHA' => 0, # libperl$PERL_VERSION or libencode-perl on Debian, # `perl5' on FreeBSD 'Encode' => 2.35, # 2.35 shipped with 5.10.1 # libperl$PERL_VERSION + perl-modules-$PERL_VERSION 'Compress::Raw::Zlib' => 0, 'Compress::Zlib' => 0, 'IO::Compress::Gzip' => 0, # Plack is needed for public-inbox-httpd and PublicInbox::WWW # 'Plack' => 0, # TODO: this should really be made optional... 'URI::Escape' => 0, # We have more test dependencies, but do not force # users to install them. See INSTALL # All Perl installs I know about have these, but RH-based # distros make them separate even though 'perl' pulls them in 'File::Path' => 0, 'File::Temp' => '0.19', # for ->tmpdir support 'Getopt::Long' => 0, 'Exporter' => 0, # ExtUtils::MakeMaker # this file won't run w/o it... }, MAN3PODS => \%man3, ); sub MY::postamble { my $N = (`{ getconf _NPROCESSORS_ONLN || nproc; } 2>/dev/null` || 1); $N += 1; # account for sleeps in some tests (and makes an IV) <MANIFEST.gen 2>&1; then \\ diff -u MANIFEST MANIFEST.gen; fi check-manifest : MANIFEST \$(check_manifest) # the traditional way running per-*.t processes: check-each :: pure_all \$(EATMYDATA) \$(PROVE) --state=save -bvw -j\$(N) -@\$(check_manifest) # lightly-tested way to run tests, relies "--state=save" in check-each # for best performance check-run :: pure_all check-man \$(EATMYDATA) \$(PROVE) -bvw t/run.perl :: -j\$(N) -@\$(check_manifest) check :: check-each lib/PublicInbox/UserContent.pm :: contrib/css/216dark.css \$(PERL) -I lib \$@ \$? # Ensure new .pm files will always be installed by updating # the timestamp of Makefile.PL which forces Makefile to be remade Makefile.PL : MANIFEST touch -r MANIFEST \$@ \$(PERLRUN) \$@ EOF } public-inbox-1.6.1/README000066400000000000000000000141271377346120300150050ustar00rootroot00000000000000public-inbox - an "archives first" approach to mailing lists ------------------------------------------------------------ public-inbox implements the sharing of an email inbox via git to complement or replace traditional mailing lists. Readers may read via NNTP, Atom feeds or HTML archives. public-inbox spawned around three main ideas: * Publicly accessible and archived communication is essential to Free Software development. * Contributing to Free Software projects should not require the use of non-Free services or software. * Graphical user interfaces should not be required for text-based communication. Users may have broken graphics drivers, limited eyesight, or be unable to afford modern hardware. public-inbox aims to be easy-to-deploy and manage; encouraging projects to run their own instances with minimal overhead. Implementation -------------- public-inbox stores mail in git repositories as documented in https://public-inbox.org/public-inbox-v2-format.txt and https://public-inbox.org/public-inbox-v1-format.txt By storing (and optionally) exposing an inbox via git, it is fast and efficient to host and mirror public-inboxes. Traditional mailing lists use the "push" model. For readers, that requires commitment to subscribe and effort to unsubscribe. New readers may also have difficulty following existing discussions if archives do not expose Message-ID and References headers. List server admins are also burdened with delivery failures. public-inbox uses the "pull" model. Casual readers may follow the list via NNTP, Atom feed or HTML archives. If a reader loses interest, they simply stop following. Since we use git, mirrors are easy-to-setup, and lists are easy-to-relocate to different mail addresses without losing or splitting archives. _Anybody_ may also setup a delivery-only mailing list server to replay a public-inbox git archive to subscribers via SMTP. Features -------- * anybody may participate via plain-text email * stores email in git, readers may have a complete archive of the inbox * Atom feed and NNTP allows casual readers to follow via feed reader * uses only well-documented and easy-to-implement data formats Try it out now, see https://try.public-inbox.org/ Requirements for reading: * any software capable of NNTP or following Atom feeds Any basic web browser will do for the HTML archives. We primarily develop on w3m to maximize accessibility. Requirements (participant) -------------------------- * any MUA which may send text-only emails ("git send-email" works!) Users are strongly encouraged to use the "reply-all" feature of their mailers to reduce the impact of a public-inbox as a single point of failure. * The HTTP web interface exposes mboxrd files, and NNTP clients often feature reply-by-email functionality * participants do not need to install public-inbox, only server admins Requirements (server) --------------------- See https://public-inbox.org/INSTALL Hacking ------- AGPL source code is available via git: git clone https://public-inbox.org/public-inbox.git git clone https://repo.or.cz/public-inbox.git torsocks git clone http://hjrcffqmbrq6wope.onion/public-inbox See below for contact info. Contact ------- We are happy to see feedback of all types via plain-text email. public-inbox discussion is self-hosting on public-inbox.org Please send comments, user/developer discussion, patches, bug reports, and pull requests to our public-inbox address at: meta@public-inbox.org Please Cc: all recipients when replying as we do not require subscription. This also makes it easier to rope in folks of tangentially related projects we depend on (e.g. git developers on git@vger.kernel.org). The archives are readable via NNTP or HTTP: nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta https://public-inbox.org/meta/ And as Tor hidden services: http://hjrcffqmbrq6wope.onion/meta/ nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta You may also clone all messages via git: git clone --mirror https://public-inbox.org/meta/ torsocks git clone --mirror http://hjrcffqmbrq6wope.onion/meta/ Anti-Spam --------- The maintainer of public-inbox has found SpamAssassin a good tool for filtering his personal mail, and it will be the default spam filtering tool in public-inbox. See https://public-inbox.org/dc-dlvr-spam-flow.html for more info. Content Filtering ----------------- To discourage phishing, trackers, exploits and other nuisances, only plain-text emails are allowed and HTML is rejected by default. This improves accessibility, and saves bandwidth and storage as mail is archived forever. As of the 2010s, successful online social networks and forums are the ones which heavily restrict users formatting options; so public-inbox aims to preserve the focus on content, and not presentation. Copyright --------- Copyright 2013-2020 all contributors License: AGPL-3.0+ This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . Additional permission under GNU GPL version 3 section 7: If you modify this program, or any covered work, by linking or combining it with the OpenSSL project's OpenSSL library (or a modified version of that library), containing parts covered by the terms of the OpenSSL or SSLeay licenses, the copyright holder(s) grants you additional permission to convey the resulting work. Corresponding Source for a non-source form of such a combination shall include the source code for the parts of OpenSSL used as well as that of the covered work. public-inbox-1.6.1/TODO000066400000000000000000000144671377346120300146240ustar00rootroot00000000000000TODO items for public-inbox (Not in any particular order, and performance, ease-of-setup, installation, maintainability, etc all need to be considered for everything we introduce) * general performance improvements, but without relying on XS or pre-built modules any more than we currently do. (Optional Inline::C and user-compiled re2c acceptable) * mailmap support (same as git) for remapping expired email addresses * support remapping of expired URLs similar to mailmap (coordinate with git.git with this?) * POP3 server, since some webmail providers support external POP3: https://public-inbox.org/meta/20160411034104.GA7817@dcvr.yhbt.net/ Perhaps make this depend solely the NNTP server and work as a proxy. Meaning users can run this without needing a full copy of the archives in git repositories. * HTTP, IMAP and NNTP proxy support. Allow us to be a frontend for firewalled off (or Tor-exclusive) instances. The use case is for offering a publicly accessible IP with a cheap VPS, yet storing large amounts of data on computers without a public IP behind a home Internet connection. * support HTTP(S) CONNECT proxying to NNTP for users with firewall problems * DHT (distributed hash table) for mapping Message-IDs to various archive locations to avoid SPOF. * optional Cache::FastMmap support so production deployments won't need Varnish (Varnish doesn't protect NNTP or IMAP, either) * dogfood and take advantage of new kernel APIs (while maintaining portability to older Linux, free BSDs and maybe Hurd). * dogfood latest Xapian, Perl5, SQLite, git and various modules to ensure things continue working as they should (or more better) while retaining compatibility with old versions. * Support more of RFC 3977 (NNTP) Is there anything left for read-only support? * Combined "super server" for NNTP/HTTP/POP3/IMAP to reduce memory, process, and FD overhead * Configurable linkification for per-inbox shorthands: "$gmane/123456" could be configured to expand to the appropriate link pointing to the gmane.io list archives, likewise "[Bug #123456]" could be configured to expand to point to some project's bug tracker at http://example.com/bug/123456 * configurable synonym and spelling support in Xapian * Support optional "HTTPS Everywhere" for mapping old HTTP to HTTPS links if (and only if) the user wants to use HTTPS. We may also be able to configure redirects for expired URLs. Note: message bodies rendered as HTML themselves must NOT change, the links should point to an anchor tag within the same page, instead; giving the user options. * configurable constants (index limits, search results) * handle messages with multiple Message-IDs (done for v2, doable for v1) * handle broken double-bracketed References properly (maybe) and totally broken Message-IDs cf. https://public-inbox.org/git/20160814012706.GA18784@starla/ * improve documentation * linkify thread skeletons better https://public-inbox.org/git/6E3699DEA672430CAEA6DEFEDE6918F4@PhilipOakley/ * Further lower mail parser memory usage. We still slurp entire message bodies into memory and incur 2-3x overhead on multipart messages. Inline::C (and maybe gmime) could work. * use REQUEST_URI properly for CGI / mod_perl2 compatibility with Message-IDs which include '%' (done?) * more and better test cases (use git fast-import to speed up creation) * large mbox/Maildir/MH/NNTP spool import (see PublicInbox::Import) * Read-only WebDAV interface to the git repo so it can be mounted via davfs2 or fusedav to avoid full clones. davfs2 needs Range: request support for this to be feasible: https://savannah.nongnu.org/bugs/?33259 https://savannah.nongnu.org/support/?107649 * Contribute something like IMAP IDLE for "git fetch". Inboxes (and any git repos) can be kept up-to-date without relying on polling. * Improve bundle support in git to make it cheaper to host/clone with dumb HTTP(S) servers. * Expose targeted reindexing of individual messages. Sometimes an indexing bug only affects a handful of messages, so it's not worth the trouble of doing a full reindex. * code repository integration (cgit: done, TODO: gitweb, etc...) * migration path to v2 without breaking v1 "git fetch" cronjobs * imperfect scraper importers for obfuscated list archives (e.g. obfuscated Mailman stuff, Google Groups, etc...) * improve performance and avoid head-of-line blocking on slow storage (done for most git blob retrievals, Xapian needs work) * HTTP(S) search API (likely JMAP, but GraphQL could be an option) It should support git-specific prefixes (dfpre:, dfpost:, dfn:, etc) as extensions. If JMAP, it should have HTTP(S) analogues to various IMAP extensions. * search across multiple inboxes, or admin-definable groups of inboxes This will require a new detached Xapian index that can be used in parallel with existing per-inbox indices. Using ->add_database with hundreds of shards is unusable in current Xapian as of August 2020 (acknowledged by Xapian upstream). * scalability to tens/hundreds of thousands of inboxes - pagination for WwwListing - inotify-based manifest.js.gz updates - process/FD reduction (needs to be slow-storage friendly) ... * command-line tool (similar to mairix/notmuch, but solver+git-aware) * consider removing doc_data from Xapian, redundant with over.sqlite3 It's no longer read as of public-inbox 1.6.0, but still written for compatibility. * share "git cat-file --batch" processes across inboxes to avoid bumping into /proc/sys/fs/pipe-user-pages-* limits * make "git cat-file --batch" detect unlinked packfiles so we don't have to restart processes (very long-term) * support searching based on `git-patch-id --stable` to improve bidirectional mapping of commits <=> emails * linter to check validity of config file * linter option and WWW endpoint to graph relationships and flows between inboxes, addresses, Maildirs, coderepos, newsgroups, IMAP mailboxes, etc... * pygments support - via Python script similar to `git cat-file --batch' to avoid startup penalty. pygments.rb (Ruby) can be inspiration, too. * highlighting + linkification for "git format-patch --interdiff" output * highlighting for "git format-patch --range-diff" output (linkification is too expensive, as it requires mirroring) * support UUCP addresses for legacy archives public-inbox-1.6.1/certs/000077500000000000000000000000001377346120300152405ustar00rootroot00000000000000public-inbox-1.6.1/certs/.gitignore000066400000000000000000000000301377346120300172210ustar00rootroot00000000000000*.pem *.der *.enc *.p12 public-inbox-1.6.1/certs/create-certs.perl000077500000000000000000000070121377346120300205100ustar00rootroot00000000000000#!/usr/bin/perl -w # License: GPL-1.0+ or Artistic-1.0-Perl # from IO::Socket::SSL 2.063 / https://github.com/noxxi/p5-io-socket-ssl use strict; use warnings; use IO::Socket::SSL::Utils; use Net::SSLeay; my $dir = -d 'certs' && -f 'Makefile.PL' ? './certs/' : './'; my $now = time(); my $later = 0x7fffffff; # 2038 problems on 32-bit :< Net::SSLeay::SSLeay_add_ssl_algorithms(); my $sha256 = Net::SSLeay::EVP_get_digestbyname('sha256') or die; my $printfp = sub { my ($w,$cert) = @_; print $w.' sha256$'.unpack('H*',Net::SSLeay::X509_digest($cert, $sha256))."\n" }; my %time_valid = (not_before => $now, not_after => $later); my @ca = CERT_create( CA => 1, subject => { CN => 'IO::Socket::SSL Demo CA' }, %time_valid, ); save('test-ca.pem',PEM_cert2string($ca[0])); my @server = CERT_create( CA => 0, subject => { CN => 'server.local' }, purpose => 'server', issuer => \@ca, %time_valid, ); save('server-cert.pem',PEM_cert2string($server[0])); save('server-key.pem',PEM_key2string($server[1])); $printfp->(server => $server[0]); @server = CERT_create( CA => 0, subject => { CN => 'server2.local' }, purpose => 'server', issuer => \@ca, %time_valid, ); save('server2-cert.pem',PEM_cert2string($server[0])); save('server2-key.pem',PEM_key2string($server[1])); $printfp->(server2 => $server[0]); @server = CERT_create( CA => 0, subject => { CN => 'server-ecc.local' }, purpose => 'server', issuer => \@ca, key => KEY_create_ec(), %time_valid, ); save('server-ecc-cert.pem',PEM_cert2string($server[0])); save('server-ecc-key.pem',PEM_key2string($server[1])); $printfp->('server-ecc' => $server[0]); my @client = CERT_create( CA => 0, subject => { CN => 'client.local' }, purpose => 'client', issuer => \@ca, %time_valid, ); save('client-cert.pem',PEM_cert2string($client[0])); save('client-key.pem',PEM_key2string($client[1])); $printfp->(client => $client[0]); my @swc = CERT_create( CA => 0, subject => { CN => 'server.local' }, purpose => 'server', issuer => \@ca, subjectAltNames => [ [ DNS => '*.server.local' ], [ IP => '127.0.0.1' ], [ DNS => 'www*.other.local' ], [ DNS => 'smtp.mydomain.local' ], [ DNS => 'xn--lwe-sna.idntest.local' ] ], %time_valid, ); save('server-wildcard.pem',PEM_cert2string($swc[0]),PEM_key2string($swc[1])); my @subca = CERT_create( CA => 1, issuer => \@ca, subject => { CN => 'IO::Socket::SSL Demo Sub CA' }, %time_valid, ); save('test-subca.pem',PEM_cert2string($subca[0])); @server = CERT_create( CA => 0, subject => { CN => 'server.local' }, purpose => 'server', issuer => \@subca, %time_valid, ); save('sub-server.pem',PEM_cert2string($server[0]).PEM_key2string($server[1])); my @cap = CERT_create( CA => 1, subject => { CN => 'IO::Socket::SSL::Intercept' }, %time_valid, ); save('proxyca.pem',PEM_cert2string($cap[0]).PEM_key2string($cap[1])); sub save { my $file = shift; open(my $fd,'>',$dir.$file) or die $!; print $fd @_; } system(< # License: AGPL-3.0+ # Helper script for installing/uninstalling packages for CI use # Intended for use on non-production chroots or VMs since it # changes installed packages use strict; my $usage = "$0 PKG_FMT PROFILE [PROFILE_MOD]"; my $pkg_fmt = shift; @ARGV or die $usage, "\n"; my @test_essential = qw(Test::Simple); # we actually use Test::More # package profiles my $profiles = { # the smallest possible profile for testing # TODO: trim URI::Escape from this, maybe essential => [ qw( git perl Devel::Peek Digest::SHA Encode ExtUtils::MakeMaker IO::Compress::Gzip URI::Escape ), @test_essential ], # everything optional for normal use optional => [ qw( Date::Parse BSD::Resource DBD::SQLite DBI Inline::C Net::Server Plack Plack::Test Plack::Middleware::ReverseProxy Search::Xapian Socket6 highlight.pm xapian-compact ) ], # optional developer stuff devtest => [ qw( XML::TreePP curl w3m Plack::Test::ExternalServer ) ], }; # account for granularity differences between package systems and OSes my @precious; if ($^O eq 'freebsd') { @precious = qw(perl curl Socket6 IO::Compress::Gzip); } elsif ($pkg_fmt eq 'rpm') { @precious = qw(perl curl); } if (@precious) { my $re = join('|', map { quotemeta($_) } @precious); for my $list (values %$profiles) { @$list = grep(!/\A(?:$re)\z/, @$list); } push @{$profiles->{essential}}, @precious; } # bare minimum for v2 $profiles->{v2essential} = [ @{$profiles->{essential}}, qw(DBD::SQLite DBI) ]; # package names which can't be mapped automatically: my $non_auto = { 'perl' => { pkg => 'perl5' }, 'Date::Parse' => { deb => 'libtimedate-perl', pkg => 'p5-TimeDate', rpm => 'perl-TimeDate', }, 'Devel::Peek' => { deb => 'perl', # libperl5.XX, but the XX varies pkg => 'perl5', }, 'Digest::SHA' => { deb => 'perl', # libperl5.XX, but the XX varies pkg => 'perl5', }, 'Encode' => { deb => 'perl', # libperl5.XX, but the XX varies pkg => 'perl5', rpm => 'perl-Encode', }, 'ExtUtils::MakeMaker' => { deb => 'perl', # perl-modules-5.xx pkg => 'perl5', rpm => 'perl-ExtUtils-MakeMaker', }, 'IO::Compress::Gzip' => { deb => 'perl', # perl-modules-5.xx pkg => 'perl5', rpm => 'perl-IO-Compress', }, 'DBD::SQLite' => { deb => 'libdbd-sqlite3-perl' }, 'Plack::Test' => { deb => 'libplack-perl', pkg => 'p5-Plack', rpm => 'perl-Plack-Test', }, 'URI::Escape' => { deb => 'liburi-perl', pkg => 'p5-URI', rpm => 'perl-URI', }, 'Test::Simple' => { deb => 'perl', # perl-modules-5.XX, but the XX varies pkg => 'perl5', rpm => 'perl-Test-Simple', }, 'highlight.pm' => { deb => 'libhighlight-perl', pkg => [], rpm => [], }, # we call xapian-compact(1) in public-inbox-compact(1) 'xapian-compact' => { deb => 'xapian-tools', pkg => 'xapian-core', rpm => 'xapian-core', # ??? }, # OS-specific 'IO::KQueue' => { deb => [], pkg => 'p5-IO-KQueue', rpm => [], }, }; my (@pkg_install, @pkg_remove, %all); for my $ary (values %$profiles) { $all{$_} = \@pkg_remove for @$ary; } if ($^O eq 'freebsd') { $all{'IO::KQueue'} = \@pkg_remove; } $profiles->{all} = [ keys %all ]; # pseudo-profile for all packages # parse the profile list from the command-line for my $profile (@ARGV) { if ($profile =~ s/-\z//) { # like apt-get, trailing "-" means remove profile2dst($profile, \@pkg_remove); } else { profile2dst($profile, \@pkg_install); } } # fill in @pkg_install and @pkg_remove: while (my ($pkg, $dst_pkg_list) = each %all) { push @$dst_pkg_list, list(pkg2ospkg($pkg, $pkg_fmt)); } my @apt_opts = qw(-o APT::Install-Recommends=false -o APT::Install-Suggests=false); # OS-specific cleanups appreciated if ($pkg_fmt eq 'deb') { my @quiet = $ENV{V} ? () : ('-q'); root('apt-get', @apt_opts, qw(install --purge -y), @quiet, @pkg_install, # apt-get lets you suffix a package with "-" to # remove it in an "install" sub-command: map { "$_-" } @pkg_remove); root('apt-get', @apt_opts, qw(autoremove --purge -y), @quiet); } elsif ($pkg_fmt eq 'pkg') { my @quiet = $ENV{V} ? () : ('-q'); # FreeBSD, maybe other *BSDs are similar? # don't remove stuff that isn't installed: exclude_uninstalled(\@pkg_remove); root(qw(pkg remove -y), @quiet, @pkg_remove) if @pkg_remove; root(qw(pkg install -y), @quiet, @pkg_install) if @pkg_install; root(qw(pkg autoremove -y), @quiet); # TODO: yum / rpm support } elsif ($pkg_fmt eq 'rpm') { my @quiet = $ENV{V} ? () : ('-q'); exclude_uninstalled(\@pkg_remove); root(qw(yum remove -y), @quiet, @pkg_remove) if @pkg_remove; root(qw(yum install -y), @quiet, @pkg_install) if @pkg_install; } else { die "unsupported package format: $pkg_fmt\n"; } exit 0; # map a generic package name to an OS package name sub pkg2ospkg { my ($pkg, $fmt) = @_; # check explicit overrides, first: if (my $ospkg = $non_auto->{$pkg}->{$fmt}) { return $ospkg; } # check common Perl module name patterns: if ($pkg =~ /::/ || $pkg =~ /\A[A-Z]/) { if ($fmt eq 'deb') { $pkg =~ s/::/-/g; $pkg =~ tr/A-Z/a-z/; return "lib$pkg-perl"; } elsif ($fmt eq 'rpm') { $pkg =~ s/::/-/g; return "perl-$pkg" } elsif ($fmt eq 'pkg') { $pkg =~ s/::/-/g; return "p5-$pkg" } else { die "unsupported package format: $fmt for $pkg\n" } } # use package name as-is (e.g. 'curl' or 'w3m') $pkg; } # maps a install profile to a package list (@pkg_remove or @pkg_install) sub profile2dst { my ($profile, $dst_pkg_list) = @_; if (my $pkg_list = $profiles->{$profile}) { $all{$_} = $dst_pkg_list for @$pkg_list; } elsif ($all{$profile}) { # $profile is just a package name $all{$profile} = $dst_pkg_list; } else { die "unrecognized profile or package: $profile\n"; } } sub exclude_uninstalled { my ($list) = @_; my %inst_check = ( pkg => sub { system(qw(pkg info -q), $_[0]) == 0 }, deb => sub { system("dpkg -s $_[0] >/dev/null 2>&1") == 0 }, rpm => sub { system("rpm -qs $_[0] >/dev/null 2>&1") == 0 }, ); my $cb = $inst_check{$pkg_fmt} || die <<""; don't know how to check install status for $pkg_fmt my @tmp; for my $pkg (@$list) { push @tmp, $pkg if $cb->($pkg); } @$list = @tmp; } sub root { print join(' ', @_), "\n"; return if $ENV{DRY_RUN}; return if system(@_) == 0; warn 'command failed: ', join(' ', @_), "\n"; exit($? >> 8); } # ensure result can be pushed into an array: sub list { my ($pkg) = @_; ref($pkg) eq 'ARRAY' ? @$pkg : $pkg; } public-inbox-1.6.1/ci/profiles.sh000077500000000000000000000030531377346120300166760ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # Prints OS-specific package profiles to stdout (one per-newline) to use # as command-line args for ci/deps.perl. Called automatically by ci/run.sh # set by os-release(5) or similar ID= VERSION_ID= case $(uname -o) in GNU/Linux) for f in /etc/os-release /usr/lib/os-release do test -f $f || continue . $f # Debian sid (and testing) have no VERSION_ID case $ID--$VERSION_ID in debian--) case $PRETTY_NAME in */sid) VERSION_ID=sid ;; *) echo >&2 "$ID, but no VERSION_ID" echo >&2 "==> $f <==" cat >&2 $f exit 1 ;; esac ;; esac case $ID--$VERSION_ID in -|*--|--*) continue ;; *--*) break ;; esac done ;; FreeBSD) ID=freebsd VERSION_ID=$(uname -r | cut -d . -f 1) test "$VERSION_ID" -lt 11 && { echo >&2 "ID=$ID $(uname -r) too old to support"; exit 1 } esac case $ID in freebsd) PKG_FMT=pkg ;; debian|ubuntu) PKG_FMT=deb ;; centos|redhat|fedora) PKG_FMT=rpm ;; *) echo >&2 "PKG_FMT undefined for ID=$ID in $0" esac case $ID-$VERSION_ID in freebsd-11|freebsd-12) sed "s/^/$PKG_FMT /" < # License: AGPL-3.0+ set -e SUDO=${SUDO-'sudo'} PERL=${PERL-'perl'} MAKE=${MAKE-'make'} DO=${DO-''} set -x if test -f Makefile then $DO $MAKE clean fi ./ci/profiles.sh | while read args do $DO $SUDO $PERL -w ci/deps.perl $args $DO $PERL Makefile.PL $DO $MAKE $DO $MAKE check $DO $MAKE clean done public-inbox-1.6.1/contrib/000077500000000000000000000000001377346120300155605ustar00rootroot00000000000000public-inbox-1.6.1/contrib/css/000077500000000000000000000000001377346120300163505ustar00rootroot00000000000000public-inbox-1.6.1/contrib/css/216dark.css000066400000000000000000000032641377346120300202410ustar00rootroot00000000000000/* * CC0-1.0 * Dark color scheme using 216 web-safe colors, inspired * somewhat by the default color scheme in mutt. * It reduces eyestrain for me, and energy usage for all: * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme */ * { background:#000; color:#ccc } /* * Underlined links add visual noise which make them hard-to-read. * Use colors to make them stand out, instead. */ a:link { color:#69f; text-decoration:none } a:visited { color:#96f } /* quoted text in emails gets a different color */ *.q { color:#09f } /* * these may be used with cgit , too. * (cgit uses
, public-inbox uses ) */ *.add { color:#0ff } /* diff post-image lines */ *.del { color:#f0f } /* diff pre-image lines */ *.head { color:#fff } /* diff header (metainformation) */ *.hunk { color:#c93 } /* diff hunk-header */ /* * highlight 3.x colors (tested 3.18) for displaying blobs. * This doesn't use most of the colors available, as I find too * many colors overwhelming, so the default is commented out. */ .hl.num { color:#f30 } /* number */ .hl.esc { color:#f0f } /* escape character */ .hl.str { color:#f30 } /* string */ .hl.ppc { color:#f0f } /* preprocessor */ .hl.pps { color:#f30 } /* preprocessor string */ .hl.slc { color:#09f } /* single-line comment */ .hl.com { color:#09f } /* multi-line comment */ /* .hl.opt { color:#ccc } */ /* operator */ /* .hl.ipl { color:#ccc } */ /* interpolation */ /* keyword groups kw[a-z] */ .hl.kwa { color:#ff0 } .hl.kwb { color:#0f0 } .hl.kwc { color:#ff0 } /* .hl.kwd { color:#ccc } */ /* line-number (unused by public-inbox) */ /* .hl.lin { color:#ccc } */ public-inbox-1.6.1/contrib/css/216light.css000066400000000000000000000027471377346120300204340ustar00rootroot00000000000000/* * CC0-1.0 * Light color scheme using 216 web-safe colors. * Suitable for print, and blinding people with brightness. * Haphazardly thrown together because bright colors hurt my eyes */ * { background:#fff; color:#333 } /* * Underlined links add visual noise which make them hard-to-read. * Use colors to make them stand out, instead. */ a:link { color:#00f; text-decoration:none } a:visited { color:#808 } /* quoted text gets a different color */ *.q { color:#006 } /* * these may be used with cgit, too * (cgit uses
, public-inbox uses ) */ *.add { color:#060 } *.del {color:#900 } *.head { color:#000 } *.hunk { color:#960 } /* * highlight 3.x colors (tested 3.18) for displaying blobs. * This doesn't use most of the colors available, as I find too * many colors overwhelming, so the default is commented out. */ .hl.num { color:#f30 } /* number */ .hl.esc { color:#f0f } /* escape character */ .hl.str { color:#f30 } /* string */ .hl.ppc { color:#c3c } /* preprocessor */ .hl.pps { color:#f30 } /* preprocessor string */ .hl.slc { color:#099 } /* single-line comment */ .hl.com { color:#099 } /* multi-line comment */ /* .hl.opt { color:#ccc } */ /* operator */ /* .hl.ipl { color:#ccc } */ /* interpolation */ /* keyword groups kw[a-z] */ .hl.kwa { color:#f90 } .hl.kwb { color:#060 } .hl.kwc { color:#f90 } /* .hl.kwd { color:#ccc } */ /* line-number (unused by public-inbox) */ /* .hl.lin { color:#ccc } */ public-inbox-1.6.1/contrib/css/README000066400000000000000000000033301377346120300172270ustar00rootroot00000000000000Example CSS for use with public-inbox. CSS::Minifier or CSS::Minifier::XS will be tried for minimizing CSS at startup if available(*). Multiple CSS files may be configured for user-selectability via the "title" attribute or for different media. Local CSS files are read into memory once at startup. If only one CSS file is given without "title", it will be inlined. Snippet from ~/.public-inbox/config, order matters to browsers. -----8<----- [publicinbox] ; Depending on the browser, the first entry is the default. ; So having "/dev/null" at the top means no colors by default. ; Using the "title" attribute enables `View -> "Page Style"' ; choices in Firefox. css = /dev/null title=default ; git-config supports backslash to continue long lines ; Attributes ('media', 'title') must use single quotes(') ; or no quotes at all, but not double-quotes, as git-config(1) ; won't preserve them: css = /path/to/public-inbox/contrib/css/216dark.css \ title=216dark \ media='screen,(prefers-color-scheme:dark)' ; for tree haters who print web pages :P css = /path/to/public-inbox/contrib/css/216light.css \ title=216light \ media='screen,print,(prefers-color-scheme:light)' ; external CSS may be specified with href. ; Using "//" (protocol-relative) URLs is allowed, as is ; "https://" or "http://" for hosts which only support one protocol. css = href=//example.com/fugly.css title=external All files in these example directory are CC0-1.0 (public domain): To the extent possible under law, Eric Wong has waived all copyright and related or neighboring rights to these examples. https://creativecommons.org/publicdomain/zero/1.0/legalcode (*) "libcss-minifier-perl" or "libcss-minifier-xs-perl" on Debian-based systems public-inbox-1.6.1/contrib/selinux/000077500000000000000000000000001377346120300172475ustar00rootroot00000000000000public-inbox-1.6.1/contrib/selinux/el7/000077500000000000000000000000001377346120300177365ustar00rootroot00000000000000public-inbox-1.6.1/contrib/selinux/el7/publicinbox.fc000066400000000000000000000013041377346120300225640ustar00rootroot00000000000000/usr/(local/)?bin/public-inbox-httpd -- gen_context(system_u:object_r:publicinbox_daemon_exec_t,s0) /usr/(local/)?bin/public-inbox-nntpd -- gen_context(system_u:object_r:publicinbox_daemon_exec_t,s0) /usr/(local/)?bin/public-inbox-watch -- gen_context(system_u:object_r:publicinbox_deliver_exec_t,s0) /usr/(local/)?bin/public-inbox-mda -- gen_context(system_u:object_r:publicinbox_deliver_exec_t,s0) /var/lib/public-inbox(/.*)? gen_context(system_u:object_r:publicinbox_var_lib_t,s0) /var/run/public-inbox(/.*)? gen_context(system_u:object_r:publicinbox_var_run_t,s0) /var/log/public-inbox(/.*)? gen_context(system_u:object_r:publicinbox_log_t,s0) public-inbox-1.6.1/contrib/selinux/el7/publicinbox.te000066400000000000000000000100371377346120300226070ustar00rootroot00000000000000################## # This policy allows running public-inbox-httpd and public-inbox-nntpd # on reasonable ports (119 for nntpd and 80/443/8080 for httpd) # # It also allows delivering mail via postfix-pipe to public-inbox-mda # # Author: Konstantin Ryabitsev # policy_module(publicinbox, 1.0.3) require { type postfix_pipe_t; type spamc_t; type spamd_t; } ################## # Declarations type publicinbox_daemon_t; type publicinbox_daemon_exec_t; init_daemon_domain(publicinbox_daemon_t, publicinbox_daemon_exec_t) type publicinbox_var_lib_t; files_type(publicinbox_var_lib_t) type publicinbox_log_t; logging_log_file(publicinbox_log_t) type publicinbox_var_run_t; files_tmp_file(publicinbox_var_run_t) type publicinbox_tmp_t; files_tmp_file(publicinbox_tmp_t) type publicinbox_deliver_t; type publicinbox_deliver_exec_t; init_daemon_domain(publicinbox_deliver_t, publicinbox_deliver_exec_t) # Uncomment to put these domains into permissive mode #permissive publicinbox_daemon_t; #permissive publicinbox_deliver_t; ################## # Daemons policy domain_use_interactive_fds(publicinbox_daemon_t) files_read_etc_files(publicinbox_daemon_t) miscfiles_read_localization(publicinbox_daemon_t) allow publicinbox_daemon_t self:tcp_socket create_stream_socket_perms; allow publicinbox_daemon_t self:tcp_socket { accept listen }; # Need to be able to manage and exec them for Inline::C manage_files_pattern(publicinbox_daemon_t, publicinbox_var_run_t, publicinbox_var_run_t) exec_files_pattern(publicinbox_daemon_t, publicinbox_var_run_t, publicinbox_var_run_t) # Logging append_files_pattern(publicinbox_daemon_t, publicinbox_log_t, publicinbox_log_t) create_files_pattern(publicinbox_daemon_t, publicinbox_log_t, publicinbox_log_t) setattr_files_pattern(publicinbox_daemon_t, publicinbox_log_t, publicinbox_log_t) logging_log_filetrans(publicinbox_daemon_t, publicinbox_log_t, { file dir }) # Run on httpd and nntp ports (called innd_port_t) corenet_tcp_bind_generic_node(publicinbox_daemon_t) corenet_tcp_bind_http_port(publicinbox_daemon_t) corenet_tcp_bind_http_cache_port(publicinbox_daemon_t) corenet_tcp_bind_innd_port(publicinbox_daemon_t) # Allow reading anything publicinbox_var_lib_t list_dirs_pattern(publicinbox_daemon_t, publicinbox_var_lib_t, publicinbox_var_lib_t) read_files_pattern(publicinbox_daemon_t, publicinbox_var_lib_t, publicinbox_var_lib_t) # The daemon doesn't need to write to this dir dontaudit publicinbox_daemon_t publicinbox_var_lib_t:file write; # Allow executing bin (for git, mostly) corecmd_exec_bin(publicinbox_daemon_t) # Manage our tmp files manage_dirs_pattern(publicinbox_daemon_t, publicinbox_tmp_t, publicinbox_tmp_t) manage_files_pattern(publicinbox_daemon_t, publicinbox_tmp_t, publicinbox_tmp_t) files_tmp_filetrans(publicinbox_daemon_t, publicinbox_tmp_t, { file dir }) ################## # mda/watch policy # # Allow transitioning to deliver_t from postfix pipe domtrans_pattern(postfix_pipe_t, publicinbox_deliver_exec_t, publicinbox_deliver_t) postfix_rw_inherited_master_pipes(publicinbox_deliver_t) postfix_read_spool_files(publicinbox_deliver_t) files_read_etc_files(publicinbox_deliver_t) # Allow managing anything in publicinbox_var_lib_t manage_dirs_pattern(publicinbox_deliver_t, publicinbox_var_lib_t, publicinbox_var_lib_t) manage_files_pattern(publicinbox_deliver_t, publicinbox_var_lib_t, publicinbox_var_lib_t) # Allow executing bin (for git, mostly) corecmd_exec_bin(publicinbox_deliver_t) # git-fast-import wants to access system state and other bits kernel_dontaudit_read_system_state(publicinbox_deliver_t) # Allow using spamc spamassassin_domtrans_client(publicinbox_deliver_t) manage_files_pattern(spamc_t, publicinbox_var_lib_t, publicinbox_var_lib_t) read_files_pattern(spamd_t, publicinbox_var_lib_t, publicinbox_var_lib_t) # Manage our tmp files manage_dirs_pattern(publicinbox_deliver_t, publicinbox_tmp_t, publicinbox_tmp_t) manage_files_pattern(publicinbox_deliver_t, publicinbox_tmp_t, publicinbox_tmp_t) files_tmp_filetrans(publicinbox_deliver_t, publicinbox_tmp_t, { file dir }) public-inbox-1.6.1/examples/000077500000000000000000000000001377346120300157365ustar00rootroot00000000000000public-inbox-1.6.1/examples/README000066400000000000000000000011551377346120300166200ustar00rootroot00000000000000Various example configuration files related to public-inbox ----------------------------------------------------------- For all server admins --------------------- public-inbox-config - configuration file, this maps configured inboxes For PSGI/Plack (HTTP) servers ----------------------------- public-inbox.psgi - starting point for PSGI/Plack users in production and dev For Apache2 users ----------------- apache2_perl.conf - intended to be the basis of a production config Contact ------- Please send any related feedback to public-inbox: meta@public-inbox.org Our public-inbox is: https://public-inbox.org/meta/ public-inbox-1.6.1/examples/README.unsubscribe000066400000000000000000000031371377346120300211450ustar00rootroot00000000000000Unsubscribe endpoints for mlmmj users (and possibly Mailman, too) * examples/unsubscribe.milter filters outgoing messages and appends an HTTPS URL to the List-Unsubscribe header. This List-Unsubscribe header should point to the PSGI described below. Currently, this is only active for a whitelist of test addresses in /etc/unsubscribe-milter.whitelist with one email address per line. * examples/unsubscribe.psgi is a PSGI which needs to run as the mlmmj user with permission to run mlmmj-unsub. This depends on the PublicInbox::Unsubscribe module which may be extracted from the rest of public-inbox. It is strongly recommended to NOT run the rest of the public-inbox WWW code in the same process as this PSGI. (The public-inbox WWW code will never need write permissions to anything besides stderr). * Both the .milter and .psgi examples are bundled with systemd service and socket activation examples. AFAIK no other PSGI server besides public-inbox-httpd supports systemd socket activation. To wire up the milter for postfix, I use the following in /etc/postfix/main.cf: # Milter configuration milter_default_action = accept milter_protocol = 2 # other milters may be chained here (e.g. opendkim) # chroot users will need to adjust this path smtpd_milters = local:/var/spool/postfix/unsubscribe/unsubscribe.sock # This is not needed for mlmmj since mlmmj uses SMTP: # non_smtpd_milters = local:/var/spool/postfix/unsubscribe/unsubscribe.sock Copyright (C) 2016-2020 all contributors License: AGPL-3.0+ public-inbox-1.6.1/examples/apache2_cgi.conf000066400000000000000000000022251377346120300207330ustar00rootroot00000000000000# Example Apache2 configuration using CGI mod_cgi # If possible, use mod_perl (see apache2_perl.conf) or # a standalone PSGI/Plack # server instead of this. # Adjust paths to your installation. ServerName "public-inbox" ServerRoot "/var/www/cgi-bin" DocumentRoot "/var/www/cgi-bin" ErrorLog "/tmp/public-inbox-error.log" PidFile "/tmp/public-inbox.pid" Listen 127.0.0.1:8080 LoadModule cgi_module /usr/lib/apache2/modules/mod_cgi.so LoadModule env_module /usr/lib/apache2/modules/mod_env.so LoadModule rewrite_module /usr/lib/apache2/modules/mod_rewrite.so LoadModule dir_module /usr/lib/apache2/modules/mod_dir.so LoadModule mime_module /usr/lib/apache2/modules/mod_mime.so TypesConfig "/dev/null" Options +ExecCGI AddHandler cgi-script .cgi # we use this hack to ensure "public-inbox.cgi" doesn't show up # in any of our redirects: SetEnv NO_SCRIPT_NAME 1 # our public-inbox.cgi requires PATH_INFO-based URLs with minimal # use of query parameters DirectoryIndex public-inbox.cgi RewriteEngine On RewriteCond %{REQUEST_FILENAME} !-f RewriteCond %{REQUEST_FILENAME} !-d RewriteRule ^.* /public-inbox.cgi/$0 [L,PT] public-inbox-1.6.1/examples/apache2_perl.conf000066400000000000000000000013501377346120300211310ustar00rootroot00000000000000# Example Apache2 configuration using Plack::Handler::Apache2 # Adjust paths to your installation ServerName "public-inbox" ServerRoot "/var/www" DocumentRoot "/var/www" ErrorLog "/tmp/public-inbox-error.log" PidFile "/tmp/public-inbox.pid" Listen 127.0.0.1:8080 LoadModule perl_module /usr/lib/apache2/modules/mod_perl.so # no need to set no rely on HOME if using this: PerlSetEnv PI_CONFIG /home/pi/.public-inbox/config SetHandler perl-script PerlResponseHandler Plack::Handler::Apache2 PerlSetVar psgi_app /path/to/public-inbox.psgi # Optional, preload the application in the parent like startup.pl use Plack::Handler::Apache2; Plack::Handler::Apache2->preload("/path/to/public-inbox.psgi"); public-inbox-1.6.1/examples/apache2_perl_old.conf000066400000000000000000000024301377346120300217670ustar00rootroot00000000000000# Example legacy Apache2 configuration using CGI + mod_perl2 # Consider using Plack::Handler::Apache2 instead (see apache2_perl.conf) # Adjust paths to your installation ServerName "public-inbox" ServerRoot "/var/www/cgi-bin" DocumentRoot "/var/www/cgi-bin" ErrorLog "/tmp/public-inbox-error.log" PidFile "/tmp/public-inbox.pid" Listen 127.0.0.1:8080 LoadModule perl_module /usr/lib/apache2/modules/mod_perl.so LoadModule rewrite_module /usr/lib/apache2/modules/mod_rewrite.so LoadModule dir_module /usr/lib/apache2/modules/mod_dir.so LoadModule mime_module /usr/lib/apache2/modules/mod_mime.so TypesConfig "/dev/null" # PerlPassEnv PATH # this is implicit Options +ExecCGI AddHandler perl-script .cgi PerlResponseHandler ModPerl::Registry PerlOptions +ParseHeaders # we use this hack to ensure "public-inbox.cgi" doesn't show up # in any of our redirects: PerlSetEnv NO_SCRIPT_NAME 1 # no need to set no rely on HOME if using this: PerlSetEnv PI_CONFIG /home/pi/.public-inbox/config # our public-inbox.cgi requires PATH_INFO-based URLs with minimal # use of query parameters DirectoryIndex public-inbox.cgi RewriteEngine On RewriteCond %{REQUEST_FILENAME} !-f RewriteCond %{REQUEST_FILENAME} !-d RewriteRule ^.* /public-inbox.cgi/$0 [L,PT] public-inbox-1.6.1/examples/cgi-webrick.rb000066400000000000000000000015041377346120300204510ustar00rootroot00000000000000#!/usr/bin/env ruby # Sample configuration using WEBrick, mainly intended dev/testing # for folks familiar with Ruby and not various Perl webserver # deployment options. For those familiar with Perl web servers, # plackup(1) is recommended for development and public-inbox-httpd(1) # is our production deployment server. require 'webrick' require 'logger' options = { :BindAddress => '127.0.0.1', :Port => 8080, :Logger => Logger.new($stderr), :CGIPathEnv => ENV['PATH'], # need to run 'git' commands :AccessLog => [ [ Logger.new($stdout), WEBrick::AccessLog::COMBINED_LOG_FORMAT ] ], } server = WEBrick::HTTPServer.new(options) server.mount("/", WEBrick::HTTPServlet::CGIHandler, "/var/www/cgi-bin/public-inbox.cgi") ['INT', 'TERM'].each do |signal| trap(signal) {exit!(0)} end server.start public-inbox-1.6.1/examples/cgit-commit-filter.lua000066400000000000000000000027461377346120300221510ustar00rootroot00000000000000-- Copyright (C) 2015-2020 all contributors -- License: GPLv2 or later -- This commit filter maps a subject line to a search URL of a public-inbox -- disclaimer: written by someone who does not know Lua. -- -- This requires cgit linked with Lua -- Usage (in your cgitrc(5) config file): -- -- commit-filter=lua:/path/to/this/script.lua -- -- Example site: https://80x24.org/public-inbox.git/ local urls = {} urls['public-inbox.git'] = 'https://public-inbox.org/meta/' -- additional URLs here... -- TODO we should be able to auto-generate this based on "coderepo" -- directives in the public-inbox config file; but keep in mind -- the mapping is M:N between inboxes and coderepos function filter_open(...) lineno = 0 buffer = "" end function filter_close() -- cgit opens and closes this filter for the commit subject -- and body separately, and we only generate the link based -- on the commit subject: if lineno == 1 and string.find(buffer, "\n") == nil then u = urls[os.getenv('CGIT_REPO_URL')] if u == nil then html(buffer) else html('') html_txt(buffer) html('') end else -- pass the body-through as-is -- TODO: optionally use WwwHighlight for linkification like -- cgit-wwwhighlight-filter.lua html(buffer) end return 0 end function filter_write(str) lineno = lineno + 1 buffer = buffer .. str end public-inbox-1.6.1/examples/cgit-wwwhighlight-filter.lua000066400000000000000000000045051377346120300233700ustar00rootroot00000000000000-- Copyright (C) 2019-2020 all contributors -- License: GPL-2.0+ -- -- This filter accesses the PublicInbox::WwwHighlight PSGI endpoint -- (see examples/highlight.psgi) -- -- Dependencies: lua-http -- -- disclaimer: written by someone who does not know Lua. -- -- This requires cgit linked with Lua -- Usage (in your cgitrc(5) config file): -- -- source-filter=lua:/path/to/this/script.lua -- about-filter=lua:/path/to/this/script.lua -- local wwwhighlight_url = 'http://127.0.0.1:9090/' local req_timeout = 10 local too_big = false -- match $PublicInbox::HTTP::MAX_REQUEST_BUFFER local max_len = 10 * 1024 * 1024 -- about-filter needs surrounding
 tags if all we do is
-- highlight and linkify
local pre = true

function filter_open(...)
	req_body = ""

	-- detect when we're used in an about-filter
	local repo_url = os.getenv('CGIT_REPO_URL')
	if repo_url then
		local path_info = os.getenv('PATH_INFO')
		rurl = path_info:match("^/(.+)/about/?$")
		pre = rurl == repo_url
	end

	-- hand filename off for language detection
	local fn = select(1, ...)
	if fn then
		local http_util = require 'http.util'
		wwwhighlight_url = wwwhighlight_url .. http_util.encodeURI(fn)
	end
end

-- try to buffer the entire source in memory
function filter_write(str)
	if too_big then
		html(str)
	elseif (req_body:len() + str:len()) > max_len then
		too_big = true
		req_body = ""
		html(req_body)
		html(str)
	else
		req_body = req_body .. str
	end
end

function fail(err)
	io.stderr:write(tostring(err), "\n")
	if pre then
		html("
")
	end
	html_txt(req_body)
	if pre then
		html("
") end return 1 end function filter_close() if too_big then return 0 end local request = require 'http.request' local req = request.new_from_uri(wwwhighlight_url) req.headers:upsert(':method', 'PUT') req:set_body(req_body) -- don't wait for 100-Continue message from the PSGI app req.headers:delete('expect') local headers, stream = req:go(req_timeout) if headers == nil then return fail(stream) end local status = headers:get(':status') if status ~= '200' then return fail('status ' .. status) end local body, err = stream:get_body_as_string() if not body and err then return fail(err) end if pre then html("
")
	end
	html(body)
	if pre then
		html("
") end return 0 end public-inbox-1.6.1/examples/cgit.psgi000066400000000000000000000012631377346120300175520ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2020 all contributors # License: GPL-3.0+ # # PublicInbox::Cgit may be used independently of WWW. # # Usage (development, with auto-reload): # plackup -I lib -o 127.0.0.1 -R lib -r examples/cgit.psgi # # Usage (production, with public-inbox-httpd(1)): # public-inbox-httpd [OPTIONS] /path/to/examples/cgit.psgi use strict; use warnings; use Plack::Builder; use PublicInbox::Cgit; use PublicInbox::Config; my $pi_config = PublicInbox::Config->new; my $cgit = PublicInbox::Cgit->new($pi_config); builder { eval { enable 'ReverseProxy' }; enable 'Head'; sub { $cgit->call($_[0]) } } public-inbox-1.6.1/examples/grok-pull.post_update_hook.sh000077500000000000000000000101161377346120300235560ustar00rootroot00000000000000#!/bin/sh # use flock(1) from util-linux to avoid seek contention on slow HDDs # when using multiple `pull_threads' with grok-pull: # [ "${FLOCKER}" != "$0" ] && exec env FLOCKER="$0" flock "$0" "$0" "$@" || : # post_update_hook for repos.conf as used by grok-pull, takes a full # git repo path as it's first and only arg. full_git_dir="$1" url_base=http://127.0.0.1:8080/ # same default as other public-inbox-* tools PI_CONFIG=${PI_CONFIG-~/.public-inbox/config} # FreeBSD expr(1) only supports BRE, so no '+' EPOCH2MAIN='\(..*\)/git/[0-9][0-9]*\.git' # see if it's v2 or v1 based on tree contents, since somebody could # theoretically name a v1 inbox with a path that looks like a v2 epoch if git --git-dir="$full_git_dir" ls-tree --name-only HEAD | \ grep -E '^(m|d)$' >/dev/null then inbox_fmt=2 inbox_dir=$(expr "$full_git_dir" : "$EPOCH2MAIN") inbox_name=$(basename "$inbox_dir") msgmap="$inbox_dir"/msgmap.sqlite3 inbox_lock="$inbox_dir"/inbox.lock else inbox_fmt=1 inbox_dir="$full_git_dir" inbox_name=$(basename "$inbox_dir" .git) msgmap="$inbox_dir"/public-inbox/msgmap.sqlite3 inbox_lock="$inbox_dir"/ssoma.lock fi # run public-inbox-init iff unconfigured cfg_dir=$(git config -f "$PI_CONFIG" publicinbox."$inbox_name".inboxdir) # check legacy name for "inboxdir" case $cfg_dir in '') cfg_dir=$(git config -f "$PI_CONFIG" publicinbox."$inbox_name".mainrepo) ;; esac case $cfg_dir in '') remote_git_url=$(git --git-dir="$full_git_dir" config remote.origin.url) case $remote_git_url in '') echo >&2 "remote.origin.url unset in $full_git_dir/config" exit 1 ;; esac case $inbox_fmt in 1) remote_inbox_url="$remote_git_url" ;; 2) remote_inbox_url=$(expr "$remote_git_url" : "$EPOCH2MAIN") ;; esac config_url="$remote_inbox_url"/_/text/config/raw remote_config="$inbox_dir"/remote.config.$$ infourls= trap 'rm -f "$remote_config"' EXIT if curl --compressed -sSf -v "$config_url" >"$remote_config" then # n.b. inbox_name on the remote may not match our local # inbox_name, so we match all addresses in the remote config addresses=$(git config -f "$remote_config" -l | \ sed -ne 's/^publicinbox\..\+\.address=//p') case $addresses in '') echo >&2 'unable to extract address(es) from ' \ "$remote_config" exit 1 ;; esac newsgroups=$(git config -f "$remote_config" -l | \ sed -ne 's/^publicinbox\..\+\.newsgroup=//p') infourls=$(git config -f "$remote_config" -l | \ sed -ne 's/^publicinbox\..\+.infourl=//p') else newsgroups= addresses="$inbox_name@$$.$(hostname).example.com" echo >&2 "E: curl $config_url failed" echo >&2 "E: using bogus <$addresses> for $inbox_dir" fi local_url="$url_base$inbox_name" public-inbox-init -V$inbox_fmt "$inbox_name" \ "$inbox_dir" "$local_url" $addresses if test $? -ne 0 then echo >&2 "E: public-inbox-init failed on $inbox_dir" exit 1 fi for ng in $newsgroups do git config -f "$PI_CONFIG" \ "publicinbox.$inbox_name.newsgroup" "$ng" # only one newsgroup per inbox break done for url in $infourls do git config -f "$PI_CONFIG" \ "publicinbox.$inbox_name.infourl" "$url" done curl -sSfv "$remote_inbox_url"/description >"$inbox_dir"/description echo "I: $inbox_name at $inbox_dir ($addresses) $local_url" ;; esac # only run public-inbox-index if an index exists and has messages, # since epochs may be cloned out-of-order by grokmirror and we also # don't know what indexlevel a user wants if test -f "$msgmap" then # We need to use flock(1) (from util-linux) to avoid timeouts # and SQLite locking problems. # FreeBSD has a similar lockf(1) utility, but it unlinks by # default so we use `-k' to keep the lock on the FS. FLOCK=flock case $(uname -s) in FreeBSD) FLOCK='lockf -k' ;; # ... other OSes here esac n=$(echo 'SELECT COUNT(*) FROM msgmap' | \ $FLOCK $inbox_lock sqlite3 -readonly "$msgmap") case $n in 0|'') : v2 inboxes may be init-ed with an empty msgmap ;; *) # if on HDD and limited RAM, add `--sequential-shard' # and possibly a large `--batch-size' if you have much # memory in public-inbox 1.6.0+ $EATMYDATA public-inbox-index -v "$inbox_dir" ;; esac fi public-inbox-1.6.1/examples/highlight.psgi000066400000000000000000000011661377346120300205750ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # # Usage: plackup [OPTIONS] /path/to/this/file # A startup command for development which monitors changes: # plackup -I lib -o 127.0.0.1 -R lib -r examples/highlight.psgi # # .psgi paths may also be passed to public-inbox-httpd(1) for # production deployments: # public-inbox-httpd [OPTIONS] /path/to/examples/highlight.psgi use strict; use warnings; use PublicInbox::WwwHighlight; use Plack::Builder; my $hl = PublicInbox::WwwHighlight->new; builder { sub { $hl->call(@_) }; } public-inbox-1.6.1/examples/lib/000077500000000000000000000000001377346120300165045ustar00rootroot00000000000000public-inbox-1.6.1/examples/lib/.gitignore000066400000000000000000000002511377346120300204720ustar00rootroot00000000000000# empty directory to placate newer versions of plackup -r/--reload # (or dependent modules) which fail on missing "lib" directory # relative to the .psgi file being run public-inbox-1.6.1/examples/logrotate.conf000066400000000000000000000012751377346120300206120ustar00rootroot00000000000000# ==> /etc/logrotate.d/public-inbox <== # # See the logrotate(8) manpage for more information: # http://linux.die.net/man/8/logrotate /var/log/public-inbox/*.log { weekly missingok rotate 52 compress delaycompress notifempty sharedscripts dateext # note the lack of the racy "copytruncate" option in this # config. public-inbox-*d supports the USR1 signal and # we send it as our "lastaction": lastaction # systemd users do not need PID files, # only signal the @1 process since the @2 is short-lived # For systemd users, assuming you use two services systemctl kill -s SIGUSR1 public-inbox-httpd@1.service systemctl kill -s SIGUSR1 public-inbox-nntpd@1.service endscript } public-inbox-1.6.1/examples/newswww.psgi000066400000000000000000000025371377346120300203520ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2020 all contributors # License: GPL-3.0+ # # NewsWWW may be used independently of WWW. This can be useful # for mapping HTTP/HTTPS requests to the hostname of an NNTP server # to redirect users to the proper HTTP/HTTPS endpoint for a given # inbox. NewsWWW exists because people (or software) can mishandle # "nntp://" or "news://" URLs as "http://" (or "https://") # # Usage (development, with auto-reload): # plackup -I lib -o 127.0.0.1 -R lib -r examples/newswww.psgi # # Usage (production, with public-inbox-httpd(1)): # public-inbox-httpd [OPTIONS] /path/to/examples/newsww.psgi use strict; use warnings; use Plack::Builder; use PublicInbox::WWW; use PublicInbox::NewsWWW; my $newswww = PublicInbox::NewsWWW->new; # Optional, (you may drop the "mount '/'" section below) my $www = PublicInbox::WWW->new; $www->preload; builder { # HTTP/1.1 requests to "Host: news.example.com" will hit this: mount 'http://news.example.com/' => builder { enable 'Head'; sub { $newswww->call($_[0]) }; }; # rest of requests will hit this (optional) part for the # regular PublicInbox::WWW code: # see comments in examples/public-inbox.psgi for more info: mount '/' => builder { eval { enable 'ReverseProxy' }; enable 'Head'; sub { $www->call($_[0]) } }; } public-inbox-1.6.1/examples/nginx_proxy000066400000000000000000000013711377346120300202470ustar00rootroot00000000000000# Example NGINX configuration to proxy-pass requests # to public-inbox-httpd or to a standalone PSGI/Plack server. # The daemon is assumed to be running locally on port 8001. # Adjust ssl certificate paths if you use any, or remove # the ssl configuration directives if you don't. server { server_name _; listen 80; access_log /var/log/nginx/public-inbox-httpd_access.log; error_log /var/log/nginx/public-inbox-httpd_error.log; location ~* ^/(.*)$ { proxy_set_header HOST $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-Proto $scheme; proxy_pass http://127.0.0.1:8001$request_uri; } listen 443 ssl; ssl_certificate /path/to/certificate.pem; ssl_certificate_key /path/to/certificate_key.pem; } public-inbox-1.6.1/examples/public-inbox-config000066400000000000000000000010471377346120300215210ustar00rootroot00000000000000# this usually in ~/.public-inbox/config and parseable with git-config(1) # update t/config.t if changing this, that test relies on this [publicinbox "test"] address = try@public-inbox.org address = sandbox@public-inbox.org address = test@public-inbox.org ; note: "mainrepo" is the old name for "inboxdir", both ; remain supported for backwards compatibility. inboxdir = /home/pi/test-main.git url = http://example.com/test [publicinbox "meta"] address = meta@public-inbox.org inboxdir = /home/pi/meta-main.git url = http://example.com/meta public-inbox-1.6.1/examples/public-inbox-httpd.socket000066400000000000000000000003171377346120300226650ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-httpd.socket <== [Unit] Description = public-inbox-httpd socket [Socket] ListenStream = 80 Service = public-inbox-httpd@1.service [Install] WantedBy = sockets.target public-inbox-1.6.1/examples/public-inbox-httpd@.service000066400000000000000000000021421377346120300231330ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-httpd@.service <== # Since SIGUSR2 upgrades do not work under systemd, this service file # allows starting two simultaneous services during upgrade time # (e.g. public-inbox-httpd@1 public-inbox-httpd@2) with the intention # that they take turns running in-between upgrades. This should # allow upgrading without downtime. [Unit] Description = public-inbox PSGI server %i Wants = public-inbox-httpd.socket After = public-inbox-httpd.socket [Service] Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin \ PERL_INLINE_DIRECTORY=/tmp/.pub-inline LimitNOFILE = 30000 ExecStartPre = /bin/mkdir -p -m 1777 /tmp/.pub-inline ExecStart = /usr/local/bin/public-inbox-httpd \ -1 /var/log/public-inbox/httpd.out.log StandardError = syslog # NonBlocking is REQUIRED to avoid a race condition if running # simultaneous services NonBlocking = true Sockets = public-inbox-httpd.socket KillSignal = SIGQUIT User = nobody Group = nogroup ExecReload = /bin/kill -HUP $MAINPID TimeoutStopSec = 86400 KillMode = process [Install] WantedBy = multi-user.target public-inbox-1.6.1/examples/public-inbox-imap-onion.socket000066400000000000000000000005271377346120300236130ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-imap-onion.socket <== # This unit is for the corresponding line in torrc(5): # HiddenServicePort 143 unix:/run/imapd.onion.sock [Unit] Description = public-inbox-imap .onion socket [Socket] ListenStream = /run/imapd.onion.sock Service = public-inbox-imapd@1.service [Install] WantedBy = sockets.target public-inbox-1.6.1/examples/public-inbox-imapd.socket000066400000000000000000000004111377346120300226270ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-imapd.socket <== [Unit] Description = public-inbox-imapd socket [Socket] ListenStream = 0.0.0.0:143 BindIPv6Only = ipv6-only ListenStream = [::]:143 Service = public-inbox-imapd@1.service [Install] WantedBy = sockets.target public-inbox-1.6.1/examples/public-inbox-imapd@.service000066400000000000000000000025721377346120300231110ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-imapd@.service <== # Since SIGUSR2 upgrades do not work under systemd, this service file # allows starting two simultaneous services during upgrade time # (e.g. public-inbox-imapd@1 public-inbox-imapd@2) with the intention # that they take turns running in-between upgrades. This should # allow upgrading without downtime. [Unit] Description = public-inbox-imapd IMAP server %i Wants = public-inbox-imapd.socket public-inbox-imaps.socket \ public-inbox-imap-onion.socket After = public-inbox-imapd.socket public-inbox-imaps.socket \ public-inbox-imap-onion.socket [Service] Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin \ PERL_INLINE_DIRECTORY=/tmp/.pub-inline LimitNOFILE = 30000 ExecStartPre = /bin/mkdir -p -m 1777 /tmp/.pub-inline ExecStart = /usr/local/bin/public-inbox-imapd -W0 \ -1 /var/log/public-inbox/imapd.out.log \ --cert /etc/ssl/certs/news.example.com.pem \ --key /etc/ssl/private/news.example.com.key StandardError = syslog # NonBlocking is REQUIRED to avoid a race condition if running # simultaneous services NonBlocking = true Sockets = public-inbox-imapd.socket public-inbox-imaps.socket \ public-inbox-imap-onion.socket KillSignal = SIGQUIT User = nobody Group = ssl-cert ExecReload = /bin/kill -HUP $MAINPID TimeoutStopSec = 86400 KillMode = process [Install] WantedBy = multi-user.target public-inbox-1.6.1/examples/public-inbox-imaps.socket000066400000000000000000000004111377346120300226460ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-imaps.socket <== [Unit] Description = public-inbox-imaps socket [Socket] ListenStream = 0.0.0.0:993 BindIPv6Only = ipv6-only ListenStream = [::]:993 Service = public-inbox-imapd@1.service [Install] WantedBy = sockets.target public-inbox-1.6.1/examples/public-inbox-nntpd.socket000066400000000000000000000003201377346120300226570ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-nntpd.socket <== [Unit] Description = public-inbox-nntpd socket [Socket] ListenStream = 119 Service = public-inbox-nntpd@1.service [Install] WantedBy = sockets.target public-inbox-1.6.1/examples/public-inbox-nntpd@.service000066400000000000000000000024151377346120300231360ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-nntpd@.service <== # Since SIGUSR2 upgrades do not work under systemd, this service file # allows starting two simultaneous services during upgrade time # (e.g. public-inbox-nntpd@1 public-inbox-nntpd@2) with the intention # that they take turns running in-between upgrades. This should # allow upgrading without downtime. [Unit] Description = public-inbox NNTP server %i Wants = public-inbox-nntpd.socket public-inbox-nntps.socket After = public-inbox-nntpd.socket public-inbox-nntps.socket [Service] Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin \ PERL_INLINE_DIRECTORY=/tmp/.pub-inline LimitNOFILE = 30000 ExecStartPre = /bin/mkdir -p -m 1777 /tmp/.pub-inline ExecStart = /usr/local/bin/public-inbox-nntpd \ -1 /var/log/public-inbox/nntpd.out.log \ --cert /etc/ssl/certs/news.example.com.pem \ --key /etc/ssl/private/news.example.com.key StandardError = syslog # NonBlocking is REQUIRED to avoid a race condition if running # simultaneous services NonBlocking = true Sockets = public-inbox-nntpd.socket public-inbox-nntps.socket KillSignal = SIGQUIT User = nobody Group = ssl-cert ExecReload = /bin/kill -HUP $MAINPID TimeoutStopSec = 86400 KillMode = process [Install] WantedBy = multi-user.target public-inbox-1.6.1/examples/public-inbox-nntps.socket000066400000000000000000000004111377346120300226770ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-nntps.socket <== [Unit] Description = public-inbox-nntps socket [Socket] ListenStream = 0.0.0.0:563 BindIPv6Only = ipv6-only ListenStream = [::]:563 Service = public-inbox-nntpd@1.service [Install] WantedBy = sockets.target public-inbox-1.6.1/examples/public-inbox-watch.service000066400000000000000000000007551377346120300230260ustar00rootroot00000000000000# ==> /etc/systemd/system/public-inbox-watch.service <== [Unit] Description = public-inbox Maildir watch After = spamassassin.service [Service] Environment = PI_CONFIG=/home/pi/.public-inbox/config \ PATH=/usr/local/bin:/usr/bin:/bin ExecStart = /usr/local/bin/public-inbox-watch StandardOutput = syslog StandardError = syslog ExecReload = /bin/kill -HUP $MAINPID # this user must have read access to Maildirs it watches User = pi KillMode = process [Install] WantedBy = multi-user.target public-inbox-1.6.1/examples/public-inbox.psgi000066400000000000000000000034411377346120300212170ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2014-2020 all contributors # License: GPL-3.0+ # Note: this is part of our test suite, update t/plack.t if this changes # Usage: plackup [OPTIONS] /path/to/this/file # # A startup command for development which monitors changes: # plackup -I lib -o 127.0.0.1 -R lib -r examples/public-inbox.psgi # # .psgi paths may also be passed to public-inbox-httpd(1) for # production deployments: # public-inbox-httpd [OPTIONS] /path/to/examples/public-inbox.psgi use strict; use warnings; use PublicInbox::WWW; use Plack::Builder; my $www = PublicInbox::WWW->new; $www->preload; # share the public-inbox code itself: my $src = $ENV{SRC_GIT_DIR}; # '/path/to/public-inbox.git' $src = PublicInbox::Git->new($src) if defined $src; builder { # Enable to ensure redirects and Atom feed URLs are generated # properly when running behind a reverse proxy server which # sets the X-Forwarded-Proto request header. # See Plack::Middleware::ReverseProxy documentation for details eval { enable 'ReverseProxy' }; $@ and warn "Plack::Middleware::ReverseProxy missing,\n", "URL generation for redirects may be wrong if behind a reverse proxy\n"; # Optional: Log timing information for requests to track performance. # Logging to STDOUT is recommended since public-inbox-httpd knows # how to reopen it via SIGUSR1 after log rotation. # enable 'AccessLog::Timed', # logger => sub { syswrite(STDOUT, $_[0]) }, # format => '%t "%r" %>s %b %D'; enable 'Head'; sub { my ($env) = @_; # share public-inbox.git code! if ($src && $env->{PATH_INFO} =~ m!\A/(?:public-inbox(?:\.git)?/)? ($PublicInbox::GitHTTPBackend::ANY)\z!xo) { PublicInbox::GitHTTPBackend::serve($env, $src, $1); } else { $www->call($env); } }; } public-inbox-1.6.1/examples/unsubscribe-milter.socket000066400000000000000000000003741377346120300227720ustar00rootroot00000000000000# ==> /etc/systemd/system/unsubscribe-milter.socket <== [Unit] Description = unsubscribe.milter socket [Socket] ListenStream = /var/spool/postfix/unsubscribe/unsubscribe.sock Service = unsubscribe-milter@1.service [Install] WantedBy = sockets.target public-inbox-1.6.1/examples/unsubscribe-milter@.service000066400000000000000000000020051377346120300232330ustar00rootroot00000000000000# ==> /etc/systemd/system/unsubscribe-milter@.service <== # The '@' is to allow multiple simultaneous services to start # and share the same socket so new code can be cycled in # without downtime [Unit] Description = unsubscribe milter %i Wants = unsubscribe-milter.socket After = unsubscribe-milter.socket [Service] # First 8 bytes is for the key, next 8 bytes is for the IV # using Blowfish. We want as short URLs as possible to avoid # copy+paste errors # umask 077 && dd if=/dev/urandom bs=16 count=1 of=.unsubscribe.key ExecStart = /usr/local/sbin/unsubscribe.milter /home/mlmmj/.unsubscribe.key # UNIQUE_MAILTO makes the List-Unsubscribe mailto: header unique # so unsubcribing becomes one-step (requires MDA/MTA configuration, # see the bottom of examples/unsubscribe.milter # Environment = UNIQUE_MAILTO=1 Sockets = unsubscribe-milter.socket # the corresponding PSGI app needs permissions to modify the # mlmmj spool, so we might as well use the same user since User = mlmmj [Install] WantedBy = multi-user.target public-inbox-1.6.1/examples/unsubscribe-psgi.socket000066400000000000000000000004161377346120300224350ustar00rootroot00000000000000# ==> /etc/systemd/system/unsubscribe-psgi.socket <== [Unit] Description = unsubscribe PSGI socket [Socket] # Forward to the PSGI using nginx or similar ListenStream = /run/unsubscribe-psgi.sock Service = unsubscribe-psgi@1.service [Install] WantedBy = sockets.target public-inbox-1.6.1/examples/unsubscribe-psgi@.service000066400000000000000000000013461377346120300227100ustar00rootroot00000000000000# ==> /etc/systemd/system/unsubscribe-psgi@.service <== # The '@' is to allow multiple simultaneous services to start # and share the same socket so new code can be cycled in # without downtime [Unit] Description = unsubscribe PSGI %i Wants = unsubscribe-psgi.socket After = unsubscribe-psgi.socket [Service] # any PSGI server ought to work, # but public-inbox-httpd supports socket activation like unsubscribe.milter ExecStart = /usr/local/bin/public-inbox-httpd -W0 /etc/unsubscribe.psgi # NonBlocking is REQUIRED to avoid a race condition if running # simultaneous services NonBlocking = true Sockets = unsubscribe-psgi.socket # we need to modify the mlmmj spool User = mlmmj KillMode = process [Install] WantedBy = multi-user.target public-inbox-1.6.1/examples/unsubscribe.milter000066400000000000000000000105631377346120300215050ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Sendmail::PMilter qw(:all); use IO::Socket; use Crypt::CBC; use MIME::Base64 qw(encode_base64url); my $key_file = shift @ARGV or die "Usage: $0 KEY_FILE\n"; open my $fh, '<', $key_file or die "failed to open $key_file\n"; my ($key, $iv); if (read($fh, $key, 8) != 8 || read($fh, $iv, 8) != 8 || read($fh, my $end, 8) != 0) { die "KEY_FILE must be 16 bytes\n"; } # optionally support unique mailto: subject in List-Unsubscribe, # requires a custom rule in front of mlmmj, see __END__ my $unique_mailto = $ENV{UNIQUE_MAILTO}; # these parameters were chosen to generate shorter parameters # to reduce the possibility of copy+paste errors my $crypt = Crypt::CBC->new(-key => $key, -iv => $iv, -header => 'none', -cipher => 'Blowfish'); $fh = $iv = $key = undef; my %cbs; $cbs{connect} = sub { my ($ctx) = @_; eval { $ctx->setpriv({ header => {}, envrcpt => {} }) }; warn $@ if $@; SMFIS_CONTINUE; }; $cbs{envrcpt} = sub { my ($ctx, $addr) = @_; eval { $addr =~ tr!<>!!d; $ctx->getpriv->{envrcpt}->{$addr} = 1; }; warn $@ if $@; SMFIS_CONTINUE; }; $cbs{header} = sub { my ($ctx, $k, $v) = @_; eval { my $k_ = lc $k; if ($k_ eq 'list-unsubscribe') { my $header = $ctx->getpriv->{header} ||= {}; my $ary = $header->{$k_} ||= []; # we create placeholders in case there are # multiple headers of the same name my $cur = []; push @$ary, $cur; # This relies on mlmmj convention: # $LIST+unsubscribe@$DOMAIN if ($v =~ /\A]+)>\z/) { @$cur = ($k, $v, $1, $2); # Mailman convention: # $LIST-request@$DOMAIN?subject=unsubscribe } elsif ($v =~ /\A\z/x) { # @$cur = ($k, $v, $1, $2); } } }; warn $@ if $@; SMFIS_CONTINUE; }; # We don't want people unsubscribing archivers: sub archive_addr { my ($addr) = @_; return 1 if ($addr =~ /\@m\.gmane(?:-mx)?\.org\z/); return 1 if ($addr eq 'archive@mail-archive.com'); 0 } $cbs{eom} = sub { my ($ctx) = @_; eval { my $priv = $ctx->getpriv; $ctx->setpriv({ header => {}, envrcpt => {} }); my @rcpt = keys %{$priv->{envrcpt}}; # one recipient, one unique HTTP(S) URL return SMFIS_CONTINUE if @rcpt != 1; return SMFIS_CONTINUE if archive_addr(lc($rcpt[0])); my $unsub = $priv->{header}->{'list-unsubscribe'} || []; my $n = 0; foreach my $u (@$unsub) { # Milter indices are 1-based, # not 0-based like Perl arrays my $index = ++$n; my ($k, $v, $list, $domain) = @$u; next unless $k && $v && $list && $domain; my $u = $crypt->encrypt($rcpt[0]); $u = encode_base64url($u); if ($unique_mailto) { # $u needs to be in the Subject: header since # +$EXTENSION is case-insensitive my $s = "subject=$u"; $v = ""; } $v .= ",\n "; $ctx->chgheader($k, $index, $v); } }; warn $@ if $@; SMFIS_CONTINUE; }; my $milter = Sendmail::PMilter->new; # Try to inherit a socket from systemd or similar: my $fds = $ENV{LISTEN_FDS}; if ($fds && (($ENV{LISTEN_PID} || 0) == $$)) { die "$0 can only listen on one FD\n" if $fds != 1; my $start_fd = 3; my $s = IO::Socket->new_from_fd($start_fd, 'r') or die "inherited bad FD from LISTEN_FDS: $!\n"; $milter->set_socket($s); } else { # fall back to binding a socket: my $sock = 'unix:/var/spool/postfix/unsubscribe/unsubscribe.sock'; $milter->set_listen(1024); my $umask = umask 0000; $milter->setconn($sock); umask $umask; } $milter->register('unsubscribe', \%cbs, SMFI_CURR_ACTS); $milter->main(); __END__ # TMPMSG comes from dc-dlvr, it's populated before the above runs: # TMPMSG=$(mktemp -t dc-dlvr.orig.$USER.XXXXXX || exit 1) # cat >$TMPMSG # I use something like this in front of mlmmj for UNIQUE_MAILTO # $EXTENSION and $ORIGINAL_RECIPIENT are set by postfix, $list # is a local mapping of addresses to mailing list names. case $ORIGINAL_RECIPIENT in foo+*) list=foo ;; # ... esac case $EXTENSION in unique-unsub) u="$(formail -z -c -x Subject <$TMPMSG)" d=$(expr "$ORIGINAL_RECIPIENT" : '^.*@\(.*\)') # forward this to the unsubscribe.psgi service curl -sSf https://$d/u/$u/$list >/dev/null exit ;; esac /usr/bin/mlmmj-receive -L /path/to/mlmmj-spool/$list <"$TMPMSG" exit public-inbox-1.6.1/examples/unsubscribe.psgi000066400000000000000000000043541377346120300211540ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2020 all contributors # License: GPL-3.0+ # This should not require any other PublicInbox code, but may use # PublicInbox::Config if ~/.public-inbox/config exists or # PI_CONFIG is pointed to an appropriate location use strict; use Plack::Builder; use PublicInbox::Unsubscribe; my $app = PublicInbox::Unsubscribe->new( pi_config => eval { # optional, for pointing out archives require PublicInbox::Config; # uses ~/.public-inbox/config by default, # can override with PI_CONFIG or here since # I run this .psgi as the mlmmj user while the # public-inbox-mda code which actually writes to # the archives runs as a different user. PublicInbox::Config->new('/home/pi/.public-inbox/config') }, # change if you fork code_url => 'https://public-inbox.org/public-inbox.git', owner_email => 'BOFH@example.com', confirm => 0, # First 8 bytes is for the key, next 8 bytes is for the IV # using Blowfish. We want as short URLs as possible to avoid # copy+paste errors # umask 077 && dd if=/dev/urandom bs=16 count=1 of=.unsubscribe.key key_file => '/home/mlmmj/.unsubscribe.key', # this runs as whatever user has perms to run /usr/bin/mlmmj-unsub # users of other mailing lists. Returns '' on success. unsubscribe => sub { my ($user_addr, $list_addr) = @_; # map list_addr to mlmmj spool, I use: # /home/mlmmj/spool/$LIST here my ($list, $domain) = split('@', $list_addr, 2); my $spool = "/home/mlmmj/spool/$list"; return "Invalid list: $list" unless -d $spool; # -c to send a confirmation email, -s is important # in case a user is click-happy and clicks twice. my @cmd = (qw(/usr/bin/mlmmj-unsub -c -s), '-L', $spool, '-a', $user_addr); # we don't know which version they're subscribed to, # try both non-digest and digest my $normal = system(@cmd); my $digest = system(@cmd, '-d'); # success if either succeeds: return '' if ($normal == 0 || $digest == 0); # missing executable or FS error, # otherwise -s always succeeds, right? return 'Unknown error, contact admin'; }, ); builder { mount '/u' => builder { eval { enable 'ReverseProxy' }; # optional enable 'Head'; sub { $app->call(@_) }; }; }; public-inbox-1.6.1/examples/varnish-4.vcl000066400000000000000000000035341377346120300202640ustar00rootroot00000000000000# Example VCL for Varnish 4.0 with public-inbox WWW code # This is based on what shipped for 3.x a long time ago (I think) # and I'm hardly an expert in VCL (nor should we expect anybody # who maintains a public-inbox HTTP interface to be). # # It seems to work for providing some protection from traffic # bursts; but perhaps the public-inbox WWW interface can someday # provide enough out-of-the-box performance that configuration # of an extra component is pointless. vcl 4.0; backend default { # this is where public-inbox-http listens .host = "127.0.0.1"; .port = "280"; } sub vcl_recv { /* pipe POST and any other weird methods directly to backend */ if (req.method != "GET" && req.method != "HEAD") { return (pipe); } if (req.http.Authorization || req.http.Cookie) { /* Not cacheable by default */ return (pass); } return (hash); } sub vcl_pipe { # By default Connection: close is set on all piped requests by varnish, # but public-inbox-httpd supports persistent connections well :) unset bereq.http.connection; return (pipe); } sub vcl_hash { hash_data(req.url); if (req.http.host) { hash_data(req.http.host); } else { hash_data(server.ip); } /* we generate fully-qualified URLs for Atom feeds and redirects */ if (req.http.X-Forwarded-Proto) { hash_data(req.http.X-Forwarded-Proto); } return (lookup); } sub vcl_backend_response { set beresp.grace = 60s; set beresp.do_stream = true; if (beresp.ttl <= 0s || /* no point in caching stuff git already stores on disk */ beresp.http.Content-Type ~ "application/x-git" || beresp.http.Set-Cookie || beresp.http.Vary == "*") { /* Mark as "Hit-For-Pass" for the next 2 minutes */ set beresp.ttl = 120 s; set beresp.uncacheable = true; return (deliver); } else { /* short TTL for up-to-dateness, our PSGI is not that slow */ set beresp.ttl = 10s; } return (deliver); } public-inbox-1.6.1/lib/000077500000000000000000000000001377346120300146665ustar00rootroot00000000000000public-inbox-1.6.1/lib/PublicInbox/000077500000000000000000000000001377346120300171045ustar00rootroot00000000000000public-inbox-1.6.1/lib/PublicInbox/Address.pm000066400000000000000000000013351377346120300210310ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ package PublicInbox::Address; use strict; use warnings; sub xs_emails { grep { defined } map { $_->address() } parse_email_addresses($_[0]) } sub xs_names { grep { defined } map { my $n = $_->name; my $addr = $_->address; $n = $_->user if defined($addr) && $n eq $addr; $n; } parse_email_addresses($_[0]); } eval { require Email::Address::XS; Email::Address::XS->import(qw(parse_email_addresses)); *emails = \&xs_emails; *names = \&xs_names; }; if ($@) { require PublicInbox::AddressPP; *emails = \&PublicInbox::AddressPP::emails; *names = \&PublicInbox::AddressPP::names; } 1; public-inbox-1.6.1/lib/PublicInbox/AddressPP.pm000066400000000000000000000017261377346120300212750ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ package PublicInbox::AddressPP; use strict; # very loose regexes, here. We don't need RFC-compliance, # just enough to make thing sanely displayable and pass to git # We favor Email::Address::XS for conformance if available sub emails { ($_[0] =~ /([\w\.\+=\?"\(\)\-!#\$%&'\*\/\^\`\|\{\}~]+\@[\w\.\-\(\)]+) (?:\s[^>]*)?>?\s*(?:\(.*?\))?(?:,\s*|\z)/gx) } sub names { my @p = split(/]+)\@[\w\.\-]+>?\s*(\(.*?\))?(?:,\s*|\z)/, $_[0]); my @ret; for (my $i = 0; $i <= $#p;) { my $phrase = $p[$i++]; $phrase =~ tr/\r\n\t / /s; $phrase =~ s/\A['"\s]*//; $phrase =~ s/['"\s]*\z//; my $user = $p[$i++] // ''; my $comment = $p[$i++] // ''; if ($phrase =~ /\S/) { $phrase =~ s/\@\S+\z//; push @ret, $phrase; } elsif ($comment =~ /\A\((.*?)\)\z/) { push @ret, $1; } else { push @ret, $user; } } @ret; } 1; public-inbox-1.6.1/lib/PublicInbox/Admin.pm000066400000000000000000000174071377346120300205030ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # common stuff for administrative command-line tools # Unstable internal API package PublicInbox::Admin; use strict; use parent qw(Exporter); use Cwd qw(abs_path); use POSIX (); our @EXPORT_OK = qw(resolve_repo_dir setup_signals); use PublicInbox::Config; use PublicInbox::Inbox; use PublicInbox::Spawn qw(popen_rd); sub setup_signals { my ($cb, $arg) = @_; # optional # we call exit() here instead of _exit() so DESTROY methods # get called (e.g. File::Temp::Dir and PublicInbox::Msgmap) $SIG{INT} = $SIG{HUP} = $SIG{PIPE} = $SIG{TERM} = sub { my ($sig) = @_; # https://www.tldp.org/LDP/abs/html/exitcodes.html eval { $cb->($sig, $arg) } if $cb; $sig = 'SIG'.$sig; exit(128 + POSIX->$sig); }; } sub resolve_repo_dir { my ($cd, $ver) = @_; my $prefix = defined $cd ? $cd : './'; if (-d $prefix && -f "$prefix/inbox.lock") { # v2 $$ver = 2 if $ver; return abs_path($prefix); } my $cmd = [ qw(git rev-parse --git-dir) ]; my $fh = popen_rd($cmd, undef, {-C => $cd}); my $dir = do { local $/; <$fh> }; close $fh or die "error in ".join(' ', @$cmd)." (cwd:$cd): $!\n"; chomp $dir; $$ver = 1 if $ver; return abs_path($cd) if ($dir eq '.' && defined $cd); abs_path($dir); } # for unconfigured inboxes sub detect_indexlevel ($) { my ($ibx) = @_; my $over = $ibx->over; my $srch = $ibx->search; delete @$ibx{qw(over search)}; # don't leave open FDs lying around # brand new or never before indexed inboxes default to full return 'full' unless $over; my $l = 'basic'; return $l unless $srch; if (my $xdb = $srch->xdb) { $l = 'full'; my $m = $xdb->get_metadata('indexlevel'); if ($m eq 'medium') { $l = $m; } elsif ($m ne '') { warn <<""; $ibx->{inboxdir} has unexpected indexlevel in Xapian: $m } $ibx->{-skip_docdata} = 1 if $xdb->get_metadata('skip_docdata'); } $l; } sub unconfigured_ibx ($$) { my ($dir, $i) = @_; my $name = "unconfigured-$i"; PublicInbox::Inbox->new({ name => $name, address => [ "$name\@example.com" ], inboxdir => $dir, # TODO: consumers may want to warn on this: #-unconfigured => 1, }); } sub resolve_inboxes ($;$$) { my ($argv, $opt, $cfg) = @_; $opt ||= {}; $cfg //= PublicInbox::Config->new; if ($opt->{all}) { my $cfgfile = PublicInbox::Config::default_file(); $cfg or die "--all specified, but $cfgfile not readable\n"; @$argv and die "--all specified, but directories specified\n"; } my $min_ver = $opt->{-min_inbox_version} || 0; my (@old, @ibxs); my %dir2ibx; if ($cfg) { $cfg->each_inbox(sub { my ($ibx) = @_; my $path = abs_path($ibx->{inboxdir}); if (defined($path)) { $dir2ibx{$path} = $ibx; } else { warn <{name} $ibx->{inboxdir}: $! EOF } }); } if ($opt->{all}) { my @all = values %dir2ibx; @all = grep { $_->version >= $min_ver } @all; push @ibxs, @all; } else { # directories specified on the command-line my $i = 0; my @dirs = @$argv; push @dirs, '.' unless @dirs; foreach (@dirs) { my $v; my $dir = resolve_repo_dir($_, \$v); if ($v < $min_ver) { push @old, $dir; next; } my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i); $i++; push @ibxs, $ibx; } } if (@old) { die "-V$min_ver inboxes not supported by $0\n\t", join("\n\t", @old), "\n"; } @ibxs; } # TODO: make Devel::Peek optional, only used for daemon my @base_mod = qw(Devel::Peek); my @over_mod = qw(DBD::SQLite DBI); my %mod_groups = ( -index => [ @base_mod, @over_mod ], -base => \@base_mod, -search => [ @base_mod, @over_mod, 'Search::Xapian' ], ); sub scan_ibx_modules ($$) { my ($mods, $ibx) = @_; if (!$ibx->{indexlevel} || $ibx->{indexlevel} ne 'basic') { $mods->{'Search::Xapian'} = 1; } else { $mods->{$_} = 1 foreach @over_mod; } } sub check_require { my (@mods) = @_; my $err = {}; while (my $mod = shift @mods) { if (my $groups = $mod_groups{$mod}) { push @mods, @$groups; } elsif ($mod eq 'Search::Xapian') { require PublicInbox::Search; PublicInbox::Search::load_xapian() or $err->{'Search::Xapian || Xapian'} = $@; } else { eval "require $mod"; $err->{$mod} = $@ if $@; } } scalar keys %$err ? $err : undef; } sub missing_mod_msg { my ($err) = @_; my @mods = map { "`$_'" } sort keys %$err; my $last = pop @mods; @mods ? (join(', ', @mods)."' and $last") : $last } sub require_or_die { my $err = check_require(@_) or return; die missing_mod_msg($err)." required for $0\n"; } sub indexlevel_ok_or_die ($) { my ($indexlevel) = @_; my $req; if ($indexlevel eq 'basic') { $req = '-index'; } elsif ($indexlevel =~ /\A(?:medium|full)\z/) { $req = '-search'; } else { die <<""; invalid indexlevel=$indexlevel (must be `basic', `medium', or `full') } my $err = check_require($req) or return; die missing_mod_msg($err) ." required for indexlevel=$indexlevel\n"; } sub index_terminate { my (undef, $ibx) = @_; # $_[0] = signal name $ibx->git->cleanup; } sub index_inbox { my ($ibx, $im, $opt) = @_; my $jobs = delete $opt->{jobs} if $opt; if (my $pr = $opt->{-progress}) { $pr->("indexing $ibx->{inboxdir} ...\n"); } local %SIG = %SIG; setup_signals(\&index_terminate, $ibx); if (ref($ibx) && $ibx->version == 2) { eval { require PublicInbox::V2Writable }; die "v2 requirements not met: $@\n" if $@; $ibx->{-creat_opt}->{nproc} = $jobs; my $v2w = $im // $ibx->importer($opt->{reindex} // $jobs); if (defined $jobs) { if ($jobs == 0) { $v2w->{parallel} = 0; } else { my $n = $v2w->{shards}; if ($jobs < ($n + 1) && !$opt->{reshard}) { warn "Unable to respect --jobs=$jobs on index, inbox was created with $n shards\n"; } } } my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; local $SIG{__WARN__} = sub { $warn_cb->($v2w->{current_info}, ': ', @_); }; $v2w->index_sync($opt); } else { require PublicInbox::SearchIdx; my $s = PublicInbox::SearchIdx->new($ibx, 1); $s->index_sync($opt); } } sub progress_prepare ($) { my ($opt) = @_; # public-inbox-index defaults to quiet, -xcpdb and -compact do not if (defined($opt->{quiet}) && $opt->{quiet} < 0) { $opt->{quiet} = !$opt->{verbose}; } if ($opt->{quiet}) { open my $null, '>', '/dev/null' or die "failed to open /dev/null: $!\n"; $opt->{1} = $null; # suitable for spawn() redirect } else { $opt->{verbose} ||= 1; $opt->{-progress} = sub { print STDERR @_ }; } } # same unit factors as git: sub parse_unsigned ($) { my ($val) = @_; $$val =~ /\A([0-9]+)([kmg])?\z/i or return; my ($n, $unit_factor) = ($1, $2 // ''); my %u = ( k => 1024, m => 1024**2, g => 1024**3 ); $$val = $n * ($u{lc($unit_factor)} // 1); 1; } sub index_prepare ($$) { my ($opt, $cfg) = @_; my $env; if ($opt->{compact}) { require PublicInbox::Xapcmd; PublicInbox::Xapcmd::check_compact(); $opt->{compact_opt} = { -coarse_lock => 1, compact => 1 }; if (defined(my $jobs = $opt->{jobs})) { $opt->{compact_opt}->{jobs} = $jobs; } } for my $k (qw(max_size batch_size)) { my $git_key = "publicInbox.index".ucfirst($k); $git_key =~ s/_([a-z])/\U$1/g; defined(my $v = $opt->{$k} // $cfg->{lc($git_key)}) or next; parse_unsigned(\$v) or die "`$git_key=$v' not parsed\n"; $v > 0 or die "`$git_key=$v' must be positive\n"; $opt->{$k} = $v; } # out-of-the-box builds of Xapian 1.4.x are still limited to 32-bit # https://getting-started-with-xapian.readthedocs.io/en/latest/concepts/indexing/limitations.html $opt->{batch_size} and $env = { XAPIAN_FLUSH_THRESHOLD => '4294967295' }; for my $k (qw(sequential_shard)) { my $git_key = "publicInbox.index".ucfirst($k); $git_key =~ s/_([a-z])/\U$1/g; defined(my $s = $opt->{$k} // $cfg->{lc($git_key)}) or next; defined(my $v = $cfg->git_bool($s)) or die "`$git_key=$s' not boolean\n"; $opt->{$k} = $v; } $env; } 1; public-inbox-1.6.1/lib/PublicInbox/AdminEdit.pm000066400000000000000000000034041377346120300213010ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # common stuff between -edit, -purge (and maybe -learn in the future) package PublicInbox::AdminEdit; use strict; use warnings; use PublicInbox::Admin; our @OPT = qw(all force|f verbose|v! help|h); sub check_editable ($) { my ($ibxs) = @_; foreach my $ibx (@$ibxs) { my $lvl = $ibx->{indexlevel}; if (defined $lvl) { PublicInbox::Admin::indexlevel_ok_or_die($lvl); next; } # Undefined indexlevel, so `full'... # Search::Xapian exists and the DB can be read, at least, fine $ibx->search and next; # it's possible for a Xapian directory to exist, # but Search::Xapian to go missing/broken. # Make sure it's purged in that case: $ibx->over or die "no over.sqlite3 in $ibx->{inboxdir}\n"; # $ibx->{search} is populated by $ibx->over call my $xdir_ro = $ibx->{search}->xdir(1); my $nshard = 0; foreach my $shard (<$xdir_ro/*>) { if (-d $shard && $shard =~ m!/[0-9]+\z!) { my $bytes = 0; $bytes += -s $_ foreach glob("$shard/*"); $nshard++ if $bytes; } } if ($nshard) { PublicInbox::Admin::require_or_die('-search'); } else { # somebody could "rm -r" all the Xapian directories; # let them purge the overview, at least $ibx->{indexlevel} ||= 'basic'; } } } # takes the output of V2Writable::purge and V2Writable::replace # $rewrites = [ array commits keyed by epoch ] sub show_rewrites ($$$) { my ($fh, $ibx, $rewrites) = @_; print $fh "$ibx->{inboxdir}:"; if (scalar @$rewrites) { my $epoch = -1; my @out = map {; ++$epoch; "$epoch.git: ".(defined($_) ? $_ : '(unchanged)') } @$rewrites; print $fh join("\n\t", '', @out), "\n"; } else { print $fh " NONE\n"; } } 1; public-inbox-1.6.1/lib/PublicInbox/AltId.pm000066400000000000000000000034021377346120300204360ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # Used for giving serial numbers to messages. This can be tied to # the msgmap for live updates to living lists (see # PublicInbox::Filters::RubyLang), or kept separate for imports # of defunct NNTP groups (e.g. scripts/xhdr-num2mid) # # Introducing NEW uses of serial numbers is discouraged because of # it leads to reliance on centralization. However, being able # to use existing serial numbers is beneficial. package PublicInbox::AltId; use strict; use warnings; use URI::Escape qw(uri_unescape); use PublicInbox::Msgmap; # spec: TYPE:PREFIX:param1=value1¶m2=value2&... # The PREFIX will be a searchable boolean prefix in Xapian # Example: serial:gmane:file=/path/to/altmsgmap.sqlite3 sub new { my ($class, $ibx, $spec, $writable) = @_; my ($type, $prefix, $query) = split(/:/, $spec, 3); $type eq 'serial' or die "non-serial not supported, yet\n"; $prefix =~ /\A\w+\z/ or warn "non-word prefix not searchable\n"; my %params = map { my ($k, $v) = split(/=/, uri_unescape($_), 2); $v = '' unless defined $v; ($k, $v); } split(/[&;]/, $query); my $f = $params{file} or die "file: required for $type spec $spec\n"; unless (index($f, '/') == 0) { if ($ibx->version == 1) { $f = "$ibx->{inboxdir}/public-inbox/$f"; } else { $f = "$ibx->{inboxdir}/$f"; } } bless { filename => $f, writable => $writable, prefix => $prefix, xprefix => 'X'.uc($prefix), }, $class; } sub mm_alt { my ($self) = @_; $self->{mm_alt} ||= eval { my $f = $self->{filename}; my $writable = $self->{writable}; PublicInbox::Msgmap->new_file($f, $writable); }; } sub mid2alt { my ($self, $mid) = @_; $self->mm_alt->num_for($mid); } 1; public-inbox-1.6.1/lib/PublicInbox/Cgit.pm000066400000000000000000000071411377346120300203330ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # wrapper for cgit(1) and git-http-backend(1) for browsing and # serving git code repositories. Requires 'publicinbox.cgitrc' # directive to be set in the public-inbox config file. package PublicInbox::Cgit; use strict; use PublicInbox::GitHTTPBackend; use PublicInbox::Git; # not bothering with Exporter for a one-off *input_prepare = \&PublicInbox::GitHTTPBackend::input_prepare; *serve = \&PublicInbox::GitHTTPBackend::serve; use PublicInbox::Qspawn; use PublicInbox::WwwStatic qw(r); sub locate_cgit ($) { my ($pi_config) = @_; my $cgit_bin = $pi_config->{'publicinbox.cgitbin'}; my $cgit_data = $pi_config->{'publicinbox.cgitdata'}; # /var/www/htdocs/cgit is the default install path from cgit.git # /usr/{lib,share}/cgit is where Debian puts cgit # TODO: check other distros for common paths unless (defined $cgit_bin) { foreach (qw(/var/www/htdocs/cgit /usr/lib/cgit)) { my $x = "$_/cgit.cgi"; next unless -x $x; $cgit_bin = $x; last; } } unless (defined $cgit_data) { my @dirs = qw(/var/www/htdocs/cgit /usr/share/cgit); # local installs of cgit from source have # CGIT_SCRIPT_PATH==CGIT_DATA_PATH by default, # so we can usually infer the cgit_data path from cgit_bin if (defined($cgit_bin) && $cgit_bin =~ m!\A(.+?)/[^/]+\z!) { unshift @dirs, $1 if -d $1; } foreach my $d (@dirs) { my $f = "$d/cgit.css"; next unless -f $f; $cgit_data = $d; last; } } ($cgit_bin, $cgit_data); } sub new { my ($class, $pi_config) = @_; my ($cgit_bin, $cgit_data) = locate_cgit($pi_config); my $self = bless { cmd => [ $cgit_bin ], cgit_data => $cgit_data, pi_config => $pi_config, }, $class; $pi_config->fill_all; # fill in -code_repos mapped to inboxes # some cgit repos may not be mapped to inboxes, so ensure those exist: my $code_repos = $pi_config->{-code_repos}; foreach my $k (keys %$pi_config) { $k =~ /\Acoderepo\.(.+)\.dir\z/ or next; my $dir = $pi_config->{$k}; $code_repos->{$1} ||= PublicInbox::Git->new($dir); } while (my ($nick, $repo) = each %$code_repos) { $self->{"\0$nick"} = $repo; } my $cgit_static = $pi_config->{-cgit_static}; my $static = join('|', map { quotemeta $_ } keys %$cgit_static); $self->{static} = qr/\A($static)\z/; $self; } # only what cgit cares about: my @PASS_ENV = qw( HTTP_HOST QUERY_STRING REQUEST_METHOD SCRIPT_NAME SERVER_NAME SERVER_PORT HTTP_COOKIE HTTP_REFERER CONTENT_LENGTH ); # XXX: cgit filters may care about more variables... my $parse_cgi_headers = \&PublicInbox::GitHTTPBackend::parse_cgi_headers; sub call { my ($self, $env) = @_; my $path_info = $env->{PATH_INFO}; my $cgit_data; # handle requests without spawning cgit iff possible: if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!ox) { my ($nick, $path) = ($1, $2); if (my PublicInbox::Git $git = $self->{"\0$nick"}) { return serve($env, $git, $path); } } elsif ($path_info =~ m!$self->{static}! && defined($cgit_data = $self->{cgit_data})) { my $f = $cgit_data.$1; # {static} only matches leading slash return PublicInbox::WwwStatic::response($env, [], $f); } my $cgi_env = { PATH_INFO => $path_info }; foreach (@PASS_ENV) { defined(my $v = $env->{$_}) or next; $cgi_env->{$_} = $v; } $cgi_env->{'HTTPS'} = 'on' if $env->{'psgi.url_scheme'} eq 'https'; my $rdr = input_prepare($env) or return r(500); my $qsp = PublicInbox::Qspawn->new($self->{cmd}, $cgi_env, $rdr); my $limiter = $self->{pi_config}->limiter('-cgit'); $qsp->psgi_return($env, $limiter, $parse_cgi_headers); } 1; public-inbox-1.6.1/lib/PublicInbox/CompressNoop.pm000066400000000000000000000007711377346120300220760ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ # Provide the same methods as Compress::Raw::Zlib::Deflate but # does no transformation of outgoing data package PublicInbox::CompressNoop; use strict; use Compress::Raw::Zlib qw(Z_OK); sub new { bless \(my $self), __PACKAGE__ } sub deflate { # ($self, $input, $output) $_[2] .= $_[1]; Z_OK; } sub flush { # ($self, $output, $flags = Z_FINISH) $_[1] //= ''; Z_OK; } 1; public-inbox-1.6.1/lib/PublicInbox/Config.pm000066400000000000000000000274211377346120300206550ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ # # Used throughout the project for reading configuration # # Note: I hate camelCase; but git-config(1) uses it, but it's better # than alllowercasewithoutunderscores, so use lc('configKey') where # applicable for readability package PublicInbox::Config; use strict; use v5.10.1; use PublicInbox::Inbox; use PublicInbox::Spawn qw(popen_rd); sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] } # returns key-value pairs of config directives in a hash # if keys may be multi-value, the value is an array ref containing all values sub new { my ($class, $file) = @_; $file //= default_file(); my $self; if (ref($file) eq 'SCALAR') { # used by some tests open my $fh, '<', $file or die; # PerlIO::scalar $self = config_fh_parse($fh, "\n", '='); } else { $self = git_config_dump($file); } bless $self, $class; # caches $self->{-by_addr} = {}; $self->{-by_list_id} = {}; $self->{-by_name} = {}; $self->{-by_newsgroup} = {}; $self->{-no_obfuscate} = {}; $self->{-limiters} = {}; $self->{-code_repos} = {}; # nick => PublicInbox::Git object $self->{-cgitrc_unparsed} = $self->{'publicinbox.cgitrc'}; if (my $no = delete $self->{'publicinbox.noobfuscate'}) { $no = _array($no); my @domains; foreach my $n (@$no) { my @n = split(/\s+/, $n); foreach (@n) { if (/\S+@\S+/) { # full address $self->{-no_obfuscate}->{lc $_} = 1; } else { # allow "example.com" or "@example.com" s/\A@//; push @domains, quotemeta($_); } } } my $nod = join('|', @domains); $self->{-no_obfuscate_re} = qr/(?:$nod)\z/i; } if (my $css = delete $self->{'publicinbox.css'}) { $self->{css} = _array($css); } $self; } sub noop {} sub fill_all ($) { each_inbox($_[0], \&noop) } sub _lookup_fill ($$$) { my ($self, $cache, $key) = @_; $self->{$cache}->{$key} // do { fill_all($self); $self->{$cache}->{$key}; } } sub lookup { my ($self, $recipient) = @_; _lookup_fill($self, '-by_addr', lc($recipient)); } sub lookup_list_id { my ($self, $list_id) = @_; _lookup_fill($self, '-by_list_id', lc($list_id)); } sub lookup_name ($$) { my ($self, $name) = @_; $self->{-by_name}->{$name} // _fill($self, "publicinbox.$name"); } sub each_inbox { my ($self, $cb, @arg) = @_; # may auto-vivify if config file is non-existent: foreach my $section (@{$self->{-section_order}}) { next if $section !~ m!\Apublicinbox\.([^/]+)\z!; my $ibx = lookup_name($self, $1) or next; $cb->($ibx, @arg); } } sub lookup_newsgroup { my ($self, $ng) = @_; _lookup_fill($self, '-by_newsgroup', lc($ng)); } sub limiter { my ($self, $name) = @_; $self->{-limiters}->{$name} //= do { require PublicInbox::Qspawn; my $max = $self->{"publicinboxlimiter.$name.max"} || 1; my $limiter = PublicInbox::Qspawn::Limiter->new($max); $limiter->setup_rlimit($name, $self); $limiter; }; } sub config_dir { $ENV{PI_DIR} // "$ENV{HOME}/.public-inbox" } sub default_file { $ENV{PI_CONFIG} // (config_dir() . '/config'); } sub config_fh_parse ($$$) { my ($fh, $rs, $fs) = @_; my %rv; my (%section_seen, @section_order); local $/ = $rs; while (defined(my $line = <$fh>)) { chomp $line; my ($k, $v) = split($fs, $line, 2); my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/); unless (defined $section_seen{$section}) { $section_seen{$section} = 1; push @section_order, $section; } my $cur = $rv{$k}; if (defined $cur) { if (ref($cur) eq "ARRAY") { push @$cur, $v; } else { $rv{$k} = [ $cur, $v ]; } } else { $rv{$k} = $v; } } $rv{-section_order} = \@section_order; \%rv; } sub git_config_dump { my ($file) = @_; return {} unless -e $file; my @cmd = (qw/git config -z -l --includes/, "--file=$file"); my $cmd = join(' ', @cmd); my $fh = popen_rd(\@cmd); my $rv = config_fh_parse($fh, "\0", "\n"); close $fh or die "failed to close ($cmd) pipe: $?"; $rv; } sub valid_inbox_name ($) { my ($name) = @_; # Similar rules found in git.git/remote.c::valid_remote_nick # and git.git/refs.c::check_refname_component # We don't reject /\.lock\z/, however, since we don't lock refs if ($name eq '' || $name =~ /\@\{/ || $name =~ /\.\./ || $name =~ m![/:\?\[\]\^~\s\f[:cntrl:]\*]! || $name =~ /\A\./ || $name =~ /\.\z/) { return 0; } # Note: we allow URL-unfriendly characters; users may configure # non-HTTP-accessible inboxes 1; } # XXX needs testing for cgit compatibility # cf. cgit/scan-tree.c::add_repo sub cgit_repo_merge ($$$) { my ($self, $base, $repo) = @_; my $path = $repo->{dir}; if (defined(my $se = $self->{-cgit_strict_export})) { return unless -e "$path/$se"; } return if -e "$path/noweb"; # this comes from the cgit config, and AFAIK cgit only allows # repos to have one URL, but that's just the PATH_INFO component, # not the Host: portion # $repo = { url => 'foo.git', dir => '/path/to/foo.git' } my $rel = $repo->{url}; unless (defined $rel) { my $off = index($path, $base, 0); if ($off != 0) { $rel = $path; } else { $rel = substr($path, length($base) + 1); } $rel =~ s!/\.git\z!! or $rel =~ s!/+\z!!; $self->{-cgit_remove_suffix} and $rel =~ s!/?\.git\z!!; } $self->{"coderepo.$rel.dir"} //= $path; $self->{"coderepo.$rel.cgiturl"} //= _array($rel); } sub is_git_dir ($) { my ($git_dir) = @_; -d "$git_dir/objects" && -f "$git_dir/HEAD"; } # XXX needs testing for cgit compatibility sub scan_path_coderepo { my ($self, $base, $path) = @_; opendir(my $dh, $path) or do { warn "error opening directory: $path\n"; return }; my $git_dir = $path; if (is_git_dir($git_dir) || is_git_dir($git_dir .= '/.git')) { my $repo = { dir => $git_dir }; cgit_repo_merge($self, $base, $repo); return; } while (defined(my $dn = readdir $dh)) { next if $dn eq '.' || $dn eq '..'; if (index($dn, '.') == 0 && !$self->{-cgit_scan_hidden_path}) { next; } my $dir = "$path/$dn"; scan_path_coderepo($self, $base, $dir) if -d $dir; } } sub scan_tree_coderepo ($$) { my ($self, $path) = @_; scan_path_coderepo($self, $path, $path); } sub scan_projects_coderepo ($$$) { my ($self, $list, $path) = @_; open my $fh, '<', $list or do { warn "failed to open cgit projectlist=$list: $!\n"; return; }; while (<$fh>) { chomp; scan_path_coderepo($self, $path, "$path/$_"); } } sub parse_cgitrc { my ($self, $cgitrc, $nesting) = @_; if ($nesting == 0) { # defaults: my %s = map { $_ => 1 } qw(/cgit.css /cgit.png /favicon.ico /robots.txt); $self->{-cgit_static} = \%s; } # same limit as cgit/configfile.c::parse_configfile return if $nesting > 8; open my $fh, '<', $cgitrc or do { warn "failed to open cgitrc=$cgitrc: $!\n"; return; }; # FIXME: this doesn't support macro expansion via $VARS, yet my $repo; while (<$fh>) { chomp; if (m!\Arepo\.url=(.+?)/*\z!) { my $nick = $1; cgit_repo_merge($self, $repo->{dir}, $repo) if $repo; $repo = { url => $nick }; } elsif (m!\Arepo\.path=(.+)\z!) { if (defined $repo) { $repo->{dir} = $1; } else { warn "$_ without repo.url\n"; } } elsif (m!\Ainclude=(.+)\z!) { parse_cgitrc($self, $1, $nesting + 1); } elsif (m!\A(scan-hidden-path|remove-suffix)=([0-9]+)\z!) { my ($k, $v) = ($1, $2); $k =~ tr/-/_/; $self->{"-cgit_$k"} = $v; } elsif (m!\A(project-list|strict-export)=(.+)\z!) { my ($k, $v) = ($1, $2); $k =~ tr/-/_/; $self->{"-cgit_$k"} = $v; } elsif (m!\Ascan-path=(.+)\z!) { if (defined(my $list = $self->{-cgit_project_list})) { scan_projects_coderepo($self, $list, $1); } else { scan_tree_coderepo($self, $1); } } elsif (m!\A(?:css|favicon|logo|repo\.logo)=(/.+)\z!) { # absolute paths for static files via PublicInbox::Cgit $self->{-cgit_static}->{$1} = 1; } } cgit_repo_merge($self, $repo->{dir}, $repo) if $repo; } # parse a code repo # Only git is supported at the moment, but SVN and Hg are possibilities sub _fill_code_repo { my ($self, $nick) = @_; my $pfx = "coderepo.$nick"; # TODO: support gitweb and other repository viewers? if (defined(my $cgitrc = delete $self->{-cgitrc_unparsed})) { parse_cgitrc($self, $cgitrc, 0); } my $dir = $self->{"$pfx.dir"}; # aka "GIT_DIR" unless (defined $dir) { warn "$pfx.dir unset\n"; return; } my $git = PublicInbox::Git->new($dir); foreach my $t (qw(blob commit tree tag)) { $git->{$t.'_url_format'} = _array($self->{lc("$pfx.${t}UrlFormat")}); } if (defined(my $cgits = $self->{"$pfx.cgiturl"})) { $git->{cgit_url} = $cgits = _array($cgits); $self->{"$pfx.cgiturl"} = $cgits; # cgit supports "/blob/?id=%s", but it's only a plain-text # display and requires an unabbreviated id= foreach my $t (qw(blob commit tag)) { $git->{$t.'_url_format'} //= map { "$_/$t/?id=%s" } @$cgits; } } $git; } sub git_bool { my ($val) = $_[-1]; # $_[0] may be $self, or $val if ($val =~ /\A(?:false|no|off|[\-\+]?(?:0x)?0+)\z/i) { 0; } elsif ($val =~ /\A(?:true|yes|on|[\-\+]?(?:0x)?[0-9]+)\z/i) { 1; } else { undef; } } sub _fill { my ($self, $pfx) = @_; my $ibx = {}; for my $k (qw(watch nntpserver)) { my $v = $self->{"$pfx.$k"}; $ibx->{$k} = $v if defined $v; } for my $k (qw(filter inboxdir newsgroup replyto httpbackendmax feedmax indexlevel indexsequentialshard)) { if (defined(my $v = $self->{"$pfx.$k"})) { if (ref($v) eq 'ARRAY') { warn <[-1]' EOF $ibx->{$k} = $v->[-1]; } else { $ibx->{$k} = $v; } } } # backwards compatibility: $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"}; if (($ibx->{inboxdir} // '') =~ /\n/s) { warn "E: `$ibx->{inboxdir}' must not contain `\\n'\n"; return; } foreach my $k (qw(obfuscate)) { my $v = $self->{"$pfx.$k"}; defined $v or next; if (defined(my $bval = git_bool($v))) { $ibx->{$k} = $bval; } else { warn "Ignoring $pfx.$k=$v in config, not boolean\n"; } } # TODO: more arrays, we should support multi-value for # more things to encourage decentralization foreach my $k (qw(address altid nntpmirror coderepo hide listid url infourl watchheader)) { if (defined(my $v = $self->{"$pfx.$k"})) { $ibx->{$k} = _array($v); } } return unless defined($ibx->{inboxdir}); my $name = $pfx; $name =~ s/\Apublicinbox\.//; if (!valid_inbox_name($name)) { warn "invalid inbox name: '$name'\n"; return; } $ibx->{name} = $name; $ibx->{-pi_config} = $self; $ibx = PublicInbox::Inbox->new($ibx); foreach (@{$ibx->{address}}) { my $lc_addr = lc($_); $self->{-by_addr}->{$lc_addr} = $ibx; $self->{-no_obfuscate}->{$lc_addr} = 1; } if (my $listids = $ibx->{listid}) { # RFC2919 section 6 stipulates "case insensitive equality" foreach my $list_id (@$listids) { $self->{-by_list_id}->{lc($list_id)} = $ibx; } } if (my $ng = $ibx->{newsgroup}) { $self->{-by_newsgroup}->{$ng} = $ibx; } $self->{-by_name}->{$name} = $ibx; if ($ibx->{obfuscate}) { $ibx->{-no_obfuscate} = $self->{-no_obfuscate}; $ibx->{-no_obfuscate_re} = $self->{-no_obfuscate_re}; fill_all($self); # noop to populate -no_obfuscate } if (my $ibx_code_repos = $ibx->{coderepo}) { my $code_repos = $self->{-code_repos}; my $repo_objs = $ibx->{-repo_objs} = []; foreach my $nick (@$ibx_code_repos) { my @parts = split(m!/!, $nick); my $valid = 0; $valid += valid_inbox_name($_) foreach (@parts); $valid == scalar(@parts) or next; my $repo = $code_repos->{$nick} //= _fill_code_repo($self, $nick); push @$repo_objs, $repo if $repo; } } $ibx } sub urlmatch { my ($self, $key, $url) = @_; state $urlmatch_broken; # requires git 1.8.5 return if $urlmatch_broken; my $file = default_file(); my $cmd = [qw/git config -z --includes --get-urlmatch/, "--file=$file", $key, $url ]; my $fh = popen_rd($cmd); local $/ = "\0"; my $val = <$fh>; if (close($fh)) { chomp($val); $val; } else { $urlmatch_broken = 1 if (($? >> 8) != 1); undef; } } 1; public-inbox-1.6.1/lib/PublicInbox/ConfigIter.pm000066400000000000000000000022441377346120300214750ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ # Intended for PublicInbox::DS->EventLoop in read-only daemons # to avoid each_inbox() monopolizing the event loop when hundreds/thousands # of inboxes are in play. package PublicInbox::ConfigIter; use strict; use v5.10.1; sub new { my ($class, $pi_cfg, $cb, @args) = @_; my $i = 0; bless [ $pi_cfg, \$i, $cb, @args ], __PACKAGE__; } # for PublicInbox::DS::next_tick, we only call this is if # PublicInbox::DS is already loaded sub event_step { my $self = shift; my ($pi_cfg, $i, $cb, @arg) = @$self; my $section = $pi_cfg->{-section_order}->[$$i++]; eval { $cb->($pi_cfg, $section, @arg) }; warn "E: $@ in ${self}::event_step" if $@; PublicInbox::DS::requeue($self) if defined($section); } # for generic PSGI servers sub each_section { my $self = shift; my ($pi_cfg, $i, $cb, @arg) = @$self; while (defined(my $section = $pi_cfg->{-section_order}->[$$i++])) { eval { $cb->($pi_cfg, $section, @arg) }; warn "E: $@ in ${self}::each_section" if $@; } eval { $cb->($pi_cfg, undef, @arg) }; warn "E: $@ in ${self}::each_section" if $@; } 1; public-inbox-1.6.1/lib/PublicInbox/ContentHash.pm000066400000000000000000000050541377346120300216640ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ # Unstable internal API. # Used for on-the-fly duplicate detection in V2 inboxes. # This is not stored in any database anywhere and may change # as changes in duplicate detection are needed. # See L manpage for more details. package PublicInbox::ContentHash; use strict; use warnings; use base qw/Exporter/; our @EXPORT_OK = qw/content_hash content_digest/; use PublicInbox::MID qw(mids references); use PublicInbox::MsgIter; # not sure if less-widely supported hash families are worth bothering with use Digest::SHA; sub digest_addr ($$$) { my ($dig, $h, $v) = @_; $v =~ tr/"//d; $v =~ s/@([a-z0-9\_\.\-\(\)]*([A-Z])\S*)/'@'.lc($1)/ge; utf8::encode($v); $dig->add("$h\0$v\0"); } sub content_dig_i { my ($dig) = $_[1]; my ($part, $depth, @idx) = @{$_[0]}; $dig->add("\0$depth:".join('.', @idx)."\0"); my $fn = $part->filename; if (defined $fn) { utf8::encode($fn); $dig->add("fn\0$fn\0"); } my @d = $part->header('Content-Description'); foreach my $d (@d) { utf8::encode($d); $dig->add("d\0$d\0"); } $dig->add("b\0"); my $ct = $part->content_type || 'text/plain'; my ($s, undef) = msg_part_text($part, $ct); if (defined $s) { $s =~ s/\r\n/\n/gs; $s =~ s/\s*\z//s; utf8::encode($s); } else { $s = $part->body; } $dig->add($s); } sub content_digest ($) { my ($eml) = @_; my $dig = Digest::SHA->new(256); # References: and In-Reply-To: get used interchangeably # in some "duplicates" in LKML. We treat them the same # in SearchIdx, so treat them the same for this: # do NOT consider the Message-ID as part of the content_hash # if we got here, we've already got Message-ID reuse my %seen = map { $_ => 1 } @{mids($eml)}; foreach my $mid (@{references($eml)}) { $dig->add("ref\0$mid\0") unless $seen{$mid}++; } # Only use Sender: if From is not present foreach my $h (qw(From Sender)) { my @v = $eml->header($h); if (@v) { digest_addr($dig, $h, $_) foreach @v; } } foreach my $h (qw(Subject Date)) { my @v = $eml->header($h); foreach my $v (@v) { utf8::encode($v); $dig->add("$h\0$v\0"); } } # Some mail processors will add " to unquoted names that were # not in the original message. For the purposes of deduplication, # do not take it into account: foreach my $h (qw(To Cc)) { my @v = $eml->header($h); digest_addr($dig, $h, $_) foreach @v; } msg_iter($eml, \&content_dig_i, $dig); $dig; } sub content_hash ($) { content_digest($_[0])->digest; } 1; public-inbox-1.6.1/lib/PublicInbox/DS.pm000066400000000000000000000507061377346120300177600ustar00rootroot00000000000000# This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # This license differs from the rest of public-inbox # # This is a fork of the unmaintained Danga::Socket (1.61) with # significant changes. See Documentation/technical/ds.txt in our # source for details. # # Do not expect this to be a stable API like Danga::Socket, # but it will evolve to suite our needs and to take advantage of # newer Linux and *BSD features. # Bugs encountered were reported to bug-Danga-Socket@rt.cpan.org, # fixed in Danga::Socket 1.62 and visible at: # https://rt.cpan.org/Public/Dist/Display.html?Name=Danga-Socket # # fields: # sock: underlying socket # rbuf: scalarref, usually undef # wbuf: arrayref of coderefs or tmpio (autovivified)) # (tmpio = [ GLOB, offset, [ length ] ]) package PublicInbox::DS; use strict; use bytes; use POSIX qw(WNOHANG); use IO::Handle qw(); use Fcntl qw(SEEK_SET :DEFAULT O_APPEND); use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); use parent qw(Exporter); our @EXPORT_OK = qw(now msg_more); use 5.010_001; use Scalar::Util qw(blessed); use PublicInbox::Syscall qw(:epoll); use PublicInbox::Tmpfile; use Errno qw(EAGAIN EINVAL); use Carp qw(confess carp); my $nextq; # queue for next_tick my $wait_pids; # list of [ pid, callback, callback_arg ] my $later_queue; # list of callbacks to run at some later interval my $EXPMAP; # fd -> idle_time our $EXPTIME = 180; # 3 minutes my ($later_timer, $reap_armed, $exp_timer); my $ToClose; # sockets to close when event loop is done our ( %DescriptorMap, # fd (num) -> PublicInbox::DS object $Epoll, # Global epoll fd (or DSKQXS ref) $_io, # IO::Handle for Epoll $PostLoopCallback, # subref to call at the end of each loop, if defined (global) $LoopTimeout, # timeout of event loop in milliseconds $DoneInit, # if we've done the one-time module init yet @Timers, # timers $in_loop, ); Reset(); ##################################################################### ### C L A S S M E T H O D S ##################################################################### =head2 C<< CLASS->Reset() >> Reset all state =cut sub Reset { %DescriptorMap = (); $in_loop = $wait_pids = $later_queue = $reap_armed = undef; $EXPMAP = {}; $nextq = $ToClose = $later_timer = $exp_timer = undef; $LoopTimeout = -1; # no timeout by default @Timers = (); $PostLoopCallback = undef; $DoneInit = 0; $_io = undef; # closes real $Epoll FD $Epoll = undef; # may call DSKQXS::DESTROY *EventLoop = *FirstTimeEventLoop; } =head2 C<< CLASS->SetLoopTimeout( $timeout ) >> Set the loop timeout for the event loop to some value in milliseconds. A timeout of 0 (zero) means poll forever. A timeout of -1 means poll and return immediately. =cut sub SetLoopTimeout { return $LoopTimeout = $_[1] + 0; } =head2 C<< PublicInbox::DS::add_timer( $seconds, $coderef, $arg) >> Add a timer to occur $seconds from now. $seconds may be fractional, but timers are not guaranteed to fire at the exact time you ask for. =cut sub add_timer ($$;$) { my ($secs, $coderef, $arg) = @_; my $fire_time = now() + $secs; my $timer = [$fire_time, $coderef, $arg]; if (!@Timers || $fire_time >= $Timers[-1][0]) { push @Timers, $timer; return $timer; } # Now, where do we insert? (NOTE: this appears slow, algorithm-wise, # but it was compared against calendar queues, heaps, naive push/sort, # and a bunch of other versions, and found to be fastest with a large # variety of datasets.) for (my $i = 0; $i < @Timers; $i++) { if ($Timers[$i][0] > $fire_time) { splice(@Timers, $i, 0, $timer); return $timer; } } die "Shouldn't get here."; } # keeping this around in case we support other FD types for now, # epoll_create1(EPOLL_CLOEXEC) requires Linux 2.6.27+... sub set_cloexec ($) { my ($fd) = @_; $_io = IO::Handle->new_from_fd($fd, 'r+') or return; defined(my $fl = fcntl($_io, F_GETFD, 0)) or return; fcntl($_io, F_SETFD, $fl | FD_CLOEXEC); } sub _InitPoller { return if $DoneInit; $DoneInit = 1; if (PublicInbox::Syscall::epoll_defined()) { $Epoll = epoll_create(); set_cloexec($Epoll) if (defined($Epoll) && $Epoll >= 0); } else { my $cls; for (qw(DSKQXS DSPoll)) { $cls = "PublicInbox::$_"; last if eval "require $cls"; } $cls->import(qw(epoll_ctl epoll_wait)); $Epoll = $cls->new; } *EventLoop = *EpollEventLoop; } =head2 C<< CLASS->EventLoop() >> Start processing IO events. In most daemon programs this never exits. See C below for how to exit the loop. =cut sub FirstTimeEventLoop { my $class = shift; _InitPoller(); EventLoop($class); } sub now () { clock_gettime(CLOCK_MONOTONIC) } sub next_tick () { my $q = $nextq or return; $nextq = undef; for (@$q) { # we avoid "ref" on blessed refs to workaround a Perl 5.16.3 leak: # https://rt.perl.org/Public/Bug/Display.html?id=114340 if (blessed($_)) { $_->event_step; } else { $_->(); } } } # runs timers and returns milliseconds for next one, or next event loop sub RunTimers { next_tick(); return (($nextq || $ToClose) ? 0 : $LoopTimeout) unless @Timers; my $now = now(); # Run expired timers while (@Timers && $Timers[0][0] <= $now) { my $to_run = shift(@Timers); $to_run->[1]->($to_run->[2]); } # timers may enqueue into nextq: return 0 if ($nextq || $ToClose); return $LoopTimeout unless @Timers; # convert time to an even number of milliseconds, adding 1 # extra, otherwise floating point fun can occur and we'll # call RunTimers like 20-30 times, each returning a timeout # of 0.0000212 seconds my $timeout = int(($Timers[0][0] - $now) * 1000) + 1; # -1 is an infinite timeout, so prefer a real timeout return $timeout if $LoopTimeout == -1; # otherwise pick the lower of our regular timeout and time until # the next timer return $LoopTimeout if $LoopTimeout < $timeout; return $timeout; } # We can't use waitpid(-1) safely here since it can hit ``, system(), # and other things. So we scan the $wait_pids list, which is hopefully # not too big. We keep $wait_pids small by not calling dwaitpid() # until we've hit EOF when reading the stdout of the child. sub reap_pids { $reap_armed = undef; my $tmp = $wait_pids or return; $wait_pids = undef; foreach my $ary (@$tmp) { my ($pid, $cb, $arg) = @$ary; my $ret = waitpid($pid, WNOHANG); if ($ret == 0) { push @$wait_pids, $ary; # autovivifies @$wait_pids } elsif ($cb) { eval { $cb->($arg, $pid) }; } } # we may not be done, yet, and could've missed/masked a SIGCHLD: $reap_armed //= requeue(\&reap_pids) if $wait_pids; } # reentrant SIGCHLD handler (since reap_pids is not reentrant) sub enqueue_reap () { $reap_armed //= requeue(\&reap_pids) } sub in_loop () { $in_loop } # Internal function: run the post-event callback, send read events # for pushed-back data, and close pending connections. returns 1 # if event loop should continue, or 0 to shut it all down. sub PostEventLoop () { # now we can close sockets that wanted to close during our event # processing. (we didn't want to close them during the loop, as we # didn't want fd numbers being reused and confused during the event # loop) if (my $close_now = $ToClose) { $ToClose = undef; # will be autovivified on push @$close_now = map { fileno($_) } @$close_now; # order matters, destroy expiry times, first: delete @$EXPMAP{@$close_now}; # ->DESTROY methods may populate ToClose delete @DescriptorMap{@$close_now}; } # by default we keep running, unless a postloop callback cancels it $PostLoopCallback ? $PostLoopCallback->(\%DescriptorMap) : 1; } sub EpollEventLoop { local $in_loop = 1; do { my @events; my $i; my $timeout = RunTimers(); # get up to 1000 events my $evcount = epoll_wait($Epoll, 1000, $timeout, \@events); for ($i=0; $i<$evcount; $i++) { # it's possible epoll_wait returned many events, including some at the end # that ones in the front triggered unregister-interest actions. if we # can't find the %sock entry, it's because we're no longer interested # in that event. $DescriptorMap{$events[$i]->[0]}->event_step; } } while (PostEventLoop()); _run_later(); } =head2 C<< CLASS->SetPostLoopCallback( CODEREF ) >> Sets post loop callback function. Pass a subref and it will be called every time the event loop finishes. Return 1 (or any true value) from the sub to make the loop continue, 0 or false and it will exit. The callback function will be passed two parameters: \%DescriptorMap =cut sub SetPostLoopCallback { my ($class, $ref) = @_; # global callback $PostLoopCallback = (defined $ref && ref $ref eq 'CODE') ? $ref : undef; } ##################################################################### ### PublicInbox::DS-the-object code ##################################################################### =head2 OBJECT METHODS =head2 C<< CLASS->new( $socket ) >> Create a new PublicInbox::DS subclass object for the given I which will react to events on it during the C. This is normally (always?) called from your subclass via: $class->SUPER::new($socket); =cut sub new { my ($self, $sock, $ev) = @_; $self->{sock} = $sock; my $fd = fileno($sock); _InitPoller(); retry: if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) { if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) { $ev &= ~EPOLLEXCLUSIVE; goto retry; } die "couldn't add epoll watch for $fd: $!\n"; } confess("DescriptorMap{$fd} defined ($DescriptorMap{$fd})") if defined($DescriptorMap{$fd}); $DescriptorMap{$fd} = $self; } ##################################################################### ### I N S T A N C E M E T H O D S ##################################################################### sub requeue ($) { push @$nextq, $_[0] } # autovivifies =head2 C<< $obj->close >> Close the socket. =cut sub close { my ($self) = @_; my $sock = delete $self->{sock} or return; # we need to flush our write buffer, as there may # be self-referential closures (sub { $client->close }) # preventing the object from being destroyed delete $self->{wbuf}; # if we're using epoll, we have to remove this from our epoll fd so we stop getting # notifications about it my $fd = fileno($sock); epoll_ctl($Epoll, EPOLL_CTL_DEL, $fd, 0) and confess("EPOLL_CTL_DEL: $!"); # we explicitly don't delete from DescriptorMap here until we # actually close the socket, as we might be in the middle of # processing an epoll_wait/etc that returned hundreds of fds, one # of which is not yet processed and is what we're closing. if we # keep it in DescriptorMap, then the event harnesses can just # looked at $pob->{sock} == undef and ignore it. but if it's an # un-accounted for fd, then it (understandably) freak out a bit # and emit warnings, thinking their state got off. # defer closing the actual socket until the event loop is done # processing this round of events. (otherwise we might reuse fds) push @$ToClose, $sock; # autovivifies $ToClose return 0; } # portable, non-thread-safe sendfile emulation (no pread, yet) sub send_tmpio ($$) { my ($sock, $tmpio) = @_; sysseek($tmpio->[0], $tmpio->[1], SEEK_SET) or return; my $n = $tmpio->[2] // 65536; $n = 65536 if $n > 65536; defined(my $to_write = sysread($tmpio->[0], my $buf, $n)) or return; my $written = 0; while ($to_write > 0) { if (defined(my $w = syswrite($sock, $buf, $to_write, $written))) { $written += $w; $to_write -= $w; } else { return if $written == 0; last; } } $tmpio->[1] += $written; # offset $tmpio->[2] -= $written if defined($tmpio->[2]); # length $written; } sub epbit ($$) { # (sock, default) $_[0]->can('stop_SSL') ? PublicInbox::TLS::epollbit() : $_[1]; } # returns 1 if done, 0 if incomplete sub flush_write ($) { my ($self) = @_; my $sock = $self->{sock} or return; my $wbuf = $self->{wbuf} or return 1; next_buf: while (my $bref = $wbuf->[0]) { if (ref($bref) ne 'CODE') { while ($sock) { my $w = send_tmpio($sock, $bref); # bref is tmpio if (defined $w) { if ($w == 0) { shift @$wbuf; goto next_buf; } } elsif ($! == EAGAIN) { my $ev = epbit($sock, EPOLLOUT) or return $self->close; epwait($sock, $ev | EPOLLONESHOT); return 0; } else { return $self->close; } } } else { #(ref($bref) eq 'CODE') { shift @$wbuf; my $before = scalar(@$wbuf); $bref->($self); # bref may be enqueueing more CODE to call (see accept_tls_step) return 0 if (scalar(@$wbuf) > $before); } } # while @$wbuf delete $self->{wbuf}; 1; # all done } sub rbuf_idle ($$) { my ($self, $rbuf) = @_; if ($$rbuf eq '') { # who knows how long till we can read again delete $self->{rbuf}; } else { $self->{rbuf} = $rbuf; } } sub do_read ($$$;$) { my ($self, $rbuf, $len, $off) = @_; my $r = sysread(my $sock = $self->{sock}, $$rbuf, $len, $off // 0); return ($r == 0 ? $self->close : $r) if defined $r; # common for clients to break connections without warning, # would be too noisy to log here: if ($! == EAGAIN) { my $ev = epbit($sock, EPOLLIN) or return $self->close; epwait($sock, $ev | EPOLLONESHOT); rbuf_idle($self, $rbuf); 0; } else { $self->close; } } # drop the socket if we hit unrecoverable errors on our system which # require BOFH attention: ENOSPC, EFBIG, EIO, EMFILE, ENFILE... sub drop { my $self = shift; carp(@_); $self->close; } # n.b.: use ->write/->read for this buffer to allow compatibility with # PerlIO::mmap or PerlIO::scalar if needed sub tmpio ($$$) { my ($self, $bref, $off) = @_; my $fh = tmpfile('wbuf', $self->{sock}, O_APPEND) or return drop($self, "tmpfile $!"); $fh->autoflush(1); my $len = bytes::length($$bref) - $off; $fh->write($$bref, $len, $off) or return drop($self, "write ($len): $!"); [ $fh, 0 ] # [1] = offset, [2] = length, not set by us } =head2 C<< $obj->write( $data ) >> Write the specified data to the underlying handle. I may be scalar, scalar ref, code ref (to run when there). Returns 1 if writes all went through, or 0 if there are writes in queue. If it returns 1, caller should stop waiting for 'writable' events) =cut sub write { my ($self, $data) = @_; # nobody should be writing to closed sockets, but caller code can # do two writes within an event, have the first fail and # disconnect the other side (whose destructor then closes the # calling object, but it's still in a method), and then the # now-dead object does its second write. that is this case. we # just lie and say it worked. it'll be dead soon and won't be # hurt by this lie. my $sock = $self->{sock} or return 1; my $ref = ref $data; my $bref = $ref ? $data : \$data; my $wbuf = $self->{wbuf}; if ($wbuf && scalar(@$wbuf)) { # already buffering, can't write more... if ($ref eq 'CODE') { push @$wbuf, $bref; } else { my $tmpio = $wbuf->[-1]; if ($tmpio && !defined($tmpio->[2])) { # append to tmp file buffer $tmpio->[0]->print($$bref) or return drop($self, "print: $!"); } else { my $tmpio = tmpio($self, $bref, 0) or return 0; push @$wbuf, $tmpio; } } return 0; } elsif ($ref eq 'CODE') { $bref->($self); return 1; } else { my $to_write = bytes::length($$bref); my $written = syswrite($sock, $$bref, $to_write); if (defined $written) { return 1 if $written == $to_write; requeue($self); # runs: event_step -> flush_write } elsif ($! == EAGAIN) { my $ev = epbit($sock, EPOLLOUT) or return $self->close; epwait($sock, $ev | EPOLLONESHOT); $written = 0; } else { return $self->close; } # deal with EAGAIN or partial write: my $tmpio = tmpio($self, $bref, $written) or return 0; # wbuf may be an empty array if we're being called inside # ->flush_write via CODE bref: push @{$self->{wbuf}}, $tmpio; # autovivifies return 0; } } use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0; sub msg_more ($$) { my $self = $_[0]; my $sock = $self->{sock} or return 1; my $wbuf = $self->{wbuf}; if (MSG_MORE && (!defined($wbuf) || !scalar(@$wbuf)) && !$sock->can('stop_SSL')) { my $n = send($sock, $_[1], MSG_MORE); if (defined $n) { my $nlen = bytes::length($_[1]) - $n; return 1 if $nlen == 0; # all done! # queue up the unwritten substring: my $tmpio = tmpio($self, \($_[1]), $n) or return 0; push @{$self->{wbuf}}, $tmpio; # autovivifies epwait($sock, EPOLLOUT|EPOLLONESHOT); return 0; } } # don't redispatch into NNTPdeflate::write PublicInbox::DS::write($self, \($_[1])); } sub epwait ($$) { my ($sock, $ev) = @_; epoll_ctl($Epoll, EPOLL_CTL_MOD, fileno($sock), $ev) and confess("EPOLL_CTL_MOD $!"); } # return true if complete, false if incomplete (or failure) sub accept_tls_step ($) { my ($self) = @_; my $sock = $self->{sock} or return; return 1 if $sock->accept_SSL; return $self->close if $! != EAGAIN; my $ev = PublicInbox::TLS::epollbit() or return $self->close; epwait($sock, $ev | EPOLLONESHOT); unshift(@{$self->{wbuf}}, \&accept_tls_step); # autovivifies 0; } # return true if complete, false if incomplete (or failure) sub shutdn_tls_step ($) { my ($self) = @_; my $sock = $self->{sock} or return; return $self->close if $sock->stop_SSL(SSL_fast_shutdown => 1); return $self->close if $! != EAGAIN; my $ev = PublicInbox::TLS::epollbit() or return $self->close; epwait($sock, $ev | EPOLLONESHOT); unshift(@{$self->{wbuf}}, \&shutdn_tls_step); # autovivifies 0; } # don't bother with shutdown($sock, 2), we don't fork+exec w/o CLOEXEC # or fork w/o exec, so no inadvertent socket sharing sub shutdn ($) { my ($self) = @_; my $sock = $self->{sock} or return; if ($sock->can('stop_SSL')) { shutdn_tls_step($self); } else { $self->close; } } # must be called with eval, PublicInbox::DS may not be loaded (see t/qspawn.t) sub dwaitpid ($$$) { die "Not in EventLoop\n" unless $in_loop; push @$wait_pids, [ @_ ]; # [ $pid, $cb, $arg ] # We could've just missed our SIGCHLD, cover it, here: enqueue_reap(); } sub _run_later () { my $run = $later_queue or return; $later_timer = $later_queue = undef; $_->() for @$run; } sub later ($) { push @$later_queue, $_[0]; # autovivifies @$later_queue $later_timer //= add_timer(60, \&_run_later); } sub expire_old () { my $now = now(); my $exp = $EXPTIME; my $old = $now - $exp; my %new; while (my ($fd, $idle_at) = each %$EXPMAP) { if ($idle_at < $old) { my $ds_obj = $DescriptorMap{$fd}; $new{$fd} = $idle_at if !$ds_obj->shutdn; } else { $new{$fd} = $idle_at; } } $EXPMAP = \%new; $exp_timer = scalar(keys %new) ? later(\&expire_old) : undef; } sub update_idle_time { my ($self) = @_; my $sock = $self->{sock} or return; $EXPMAP->{fileno($sock)} = now(); $exp_timer //= later(\&expire_old); } sub not_idle_long { my ($self, $now) = @_; my $sock = $self->{sock} or return; my $idle_at = $EXPMAP->{fileno($sock)} or return; ($idle_at + $EXPTIME) > $now; } 1; =head1 AUTHORS (Danga::Socket) Brad Fitzpatrick - author Michael Granger - docs, testing Mark Smith - contributor, heavy user, testing Matt Sergeant - kqueue support, docs, timers, other bits public-inbox-1.6.1/lib/PublicInbox/DSKQXS.pm000066400000000000000000000101511377346120300204550ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # Licensed the same as Danga::Socket (and Perl5) # License: GPL-1.0+ or Artistic-1.0-Perl # # # # kqueue support via IO::KQueue XS module. This makes kqueue look # like epoll to simplify the code in DS.pm. This is NOT meant to be # an all encompassing emulation of epoll via IO::KQueue, but just to # support cases public-inbox-nntpd/httpd care about. # # It also implements signalfd(2) emulation via "tie". package PublicInbox::DSKQXS; use strict; use warnings; use parent qw(Exporter); use Symbol qw(gensym); use IO::KQueue; use Errno qw(EAGAIN); use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT EPOLLET EPOLL_CTL_ADD EPOLL_CTL_MOD EPOLL_CTL_DEL $SFD_NONBLOCK); our @EXPORT_OK = qw(epoll_ctl epoll_wait); sub EV_DISPATCH () { 0x0080 } # map EPOLL* bits to kqueue EV_* flags for EV_SET sub kq_flag ($$) { my ($bit, $ev) = @_; if ($ev & $bit) { my $fl = EV_ENABLE; $fl |= EV_CLEAR if $fl & EPOLLET; # EV_DISPATCH matches EPOLLONESHOT semantics more closely # than EV_ONESHOT, in that EV_ADD is not required to # re-enable a disabled watch. ($ev & EPOLLONESHOT) ? ($fl | EV_DISPATCH) : $fl; } else { EV_DISABLE; } } sub new { my ($class) = @_; bless { kq => IO::KQueue->new, owner_pid => $$ }, $class; } # returns a new instance which behaves like signalfd on Linux. # It's wasteful in that it uses another FD, but it simplifies # our epoll-oriented code. sub signalfd { my ($class, $signo, $flags) = @_; my $sym = gensym; tie *$sym, $class, $signo, $flags; # calls TIEHANDLE $sym } sub TIEHANDLE { # similar to signalfd() my ($class, $signo, $flags) = @_; my $self = $class->new; $self->{timeout} = ($flags & $SFD_NONBLOCK) ? 0 : -1; my $kq = $self->{kq}; $kq->EV_SET($_, EVFILT_SIGNAL, EV_ADD) for @$signo; $self; } sub READ { # called by sysread() for signalfd compatibility my ($self, undef, $len, $off) = @_; # $_[1] = buf die "bad args for signalfd read" if ($len % 128) // defined($off); my $timeout = $self->{timeout}; my $sigbuf = $self->{sigbuf} //= []; my $nr = $len / 128; my $r = 0; $_[1] = ''; do { while ($nr--) { my $signo = shift(@$sigbuf) or last; # caller only cares about signalfd_siginfo.ssi_signo: $_[1] .= pack('L', $signo) . ("\0" x 124); $r += 128; } return $r if $r; my @events = eval { $self->{kq}->kevent($timeout) }; # workaround https://rt.cpan.org/Ticket/Display.html?id=116615 if ($@) { next if $@ =~ /Interrupted system call/; die; } if (!scalar(@events) && $timeout == 0) { $! = EAGAIN; return; } # Grab the kevent.ident (signal number). The kevent.data # field shows coalesced signals, and maybe we'll use it # in the future... @$sigbuf = map { $_->[0] } @events; } while (1); } # for fileno() calls in PublicInbox::DS sub FILENO { ${$_[0]->{kq}} } sub epoll_ctl { my ($self, $op, $fd, $ev) = @_; my $kq = $self->{kq}; if ($op == EPOLL_CTL_MOD) { $kq->EV_SET($fd, EVFILT_READ, kq_flag(EPOLLIN, $ev)); eval { $kq->EV_SET($fd, EVFILT_WRITE, kq_flag(EPOLLOUT, $ev)) }; } elsif ($op == EPOLL_CTL_DEL) { $kq->EV_SET($fd, EVFILT_READ, EV_DISABLE); eval { $kq->EV_SET($fd, EVFILT_WRITE, EV_DISABLE) }; } else { # EPOLL_CTL_ADD $kq->EV_SET($fd, EVFILT_READ, EV_ADD|kq_flag(EPOLLIN, $ev)); # we call this blindly for read-only FDs such as tied # DSKQXS (signalfd emulation) and Listeners eval { $kq->EV_SET($fd, EVFILT_WRITE, EV_ADD | kq_flag(EPOLLOUT, $ev)); }; } 0; } sub epoll_wait { my ($self, $maxevents, $timeout_msec, $events) = @_; @$events = eval { $self->{kq}->kevent($timeout_msec) }; if (my $err = $@) { # workaround https://rt.cpan.org/Ticket/Display.html?id=116615 if ($err =~ /Interrupted system call/) { @$events = (); } else { die $err; } } # caller only cares for $events[$i]->[0] scalar(@$events); } # kqueue is close-on-fork (not exec), so we must not close it # in forked processes: sub DESTROY { my ($self) = @_; my $kq = delete $self->{kq} or return; if (delete($self->{owner_pid}) == $$) { POSIX::close($$kq); } } 1; public-inbox-1.6.1/lib/PublicInbox/DSPoll.pm000066400000000000000000000030611377346120300205770ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # Licensed the same as Danga::Socket (and Perl5) # License: GPL-1.0+ or Artistic-1.0-Perl # # # # poll(2) via IO::Poll core module. This makes poll look # like epoll to simplify the code in DS.pm. This is NOT meant to be # an all encompassing emulation of epoll via IO::Poll, but just to # support cases public-inbox-nntpd/httpd care about. package PublicInbox::DSPoll; use strict; use warnings; use parent qw(Exporter); use IO::Poll; use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT EPOLL_CTL_DEL); our @EXPORT_OK = qw(epoll_ctl epoll_wait); sub new { bless {}, $_[0] } # fd => events sub epoll_ctl { my ($self, $op, $fd, $ev) = @_; # not wasting time on error checking if ($op != EPOLL_CTL_DEL) { $self->{$fd} = $ev; } else { delete $self->{$fd}; } 0; } sub epoll_wait { my ($self, $maxevents, $timeout_msec, $events) = @_; my @pset; while (my ($fd, $events) = each %$self) { my $pevents = $events & EPOLLIN ? POLLIN : 0; $pevents |= $events & EPOLLOUT ? POLLOUT : 0; push(@pset, $fd, $pevents); } @$events = (); my $n = IO::Poll::_poll($timeout_msec, @pset); if ($n >= 0) { for (my $i = 0; $i < @pset; ) { my $fd = $pset[$i++]; my $revents = $pset[$i++] or next; delete($self->{$fd}) if $self->{$fd} & EPOLLONESHOT; push @$events, [ $fd ]; } my $nevents = scalar @$events; if ($n != $nevents) { warn "BUG? poll() returned $n, but got $nevents"; } } $n; } 1; public-inbox-1.6.1/lib/PublicInbox/Daemon.pm000066400000000000000000000433621377346120300206550ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # contains common daemon code for the httpd, imapd, and nntpd servers. # This may be used for read-only IMAP server if we decide to implement it. package PublicInbox::Daemon; use strict; use warnings; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; use IO::Handle; # ->autoflush use IO::Socket; use POSIX qw(WNOHANG :signal_h); use Socket qw(IPPROTO_TCP SOL_SOCKET); sub SO_ACCEPTFILTER () { 0x1000 } use Cwd qw/abs_path/; STDOUT->autoflush(1); STDERR->autoflush(1); use PublicInbox::DS qw(now); use PublicInbox::Syscall qw($SFD_NONBLOCK); require PublicInbox::Listener; use PublicInbox::EOFpipe; use PublicInbox::Sigfd; my @CMD; my ($set_user, $oldset); my (@cfg_listen, $stdout, $stderr, $group, $user, $pid_file, $daemonize); my $worker_processes = 1; my @listeners; my %pids; my %tls_opt; # scheme://sockname => args for IO::Socket::SSL->start_SSL my $reexec_pid; my ($uid, $gid); my ($default_cert, $default_key); my %KNOWN_TLS = ( 443 => 'https', 563 => 'nntps', 993 => 'imaps' ); my %KNOWN_STARTTLS = ( 119 => 'nntp', 143 => 'imap' ); sub accept_tls_opt ($) { my ($opt_str) = @_; # opt_str: opt1=val1,opt2=val2 (opt may repeat for multi-value) require PublicInbox::TLS; my $o = {}; # allow ',' as delimiter since '&' is shell-unfriendly foreach (split(/[,&]/, $opt_str)) { my ($k, $v) = split(/=/, $_, 2); push @{$o->{$k} ||= []}, $v; } # key may be a part of cert. At least # p5-io-socket-ssl/example/ssl_server.pl has this fallback: $o->{cert} //= [ $default_cert ]; $o->{key} //= defined($default_key) ? [ $default_key ] : $o->{cert}; my %ctx_opt = (SSL_server => 1); # parse out hostname:/path/to/ mappings: foreach my $k (qw(cert key)) { my $x = $ctx_opt{'SSL_'.$k.'_file'} = {}; foreach my $path (@{$o->{$k}}) { my $host = ''; $path =~ s/\A([^:]+):// and $host = $1; $x->{$host} = $path; } } my $ctx = IO::Socket::SSL::SSL_Context->new(%ctx_opt) or die 'SSL_Context->new: '.PublicInbox::TLS::err(); # save ~34K per idle connection (cf. SSL_CTX_set_mode(3ssl)) # RSS goes from 346MB to 171MB with 10K idle NNTPS clients on amd64 # cf. https://rt.cpan.org/Ticket/Display.html?id=129463 my $mode = eval { Net::SSLeay::MODE_RELEASE_BUFFERS() }; if ($mode && $ctx->{context}) { eval { Net::SSLeay::CTX_set_mode($ctx->{context}, $mode) }; warn "W: $@ (setting SSL_MODE_RELEASE_BUFFERS)\n" if $@; } { SSL_server => 1, SSL_startHandshake => 0, SSL_reuse_ctx => $ctx }; } sub daemon_prepare ($) { my ($default_listen) = @_; my $listener_names = {}; # sockname => IO::Handle $oldset = PublicInbox::Sigfd::block_signals(); @CMD = ($0, @ARGV); my ($prog) = ($CMD[0] =~ m!([^/]+)\z!g); my $help = < \@cfg_listen, '1|stdout=s' => \$stdout, '2|stderr=s' => \$stderr, 'W|worker-processes=i' => \$worker_processes, 'P|pid-file=s' => \$pid_file, 'u|user=s' => \$user, 'g|group=s' => \$group, 'D|daemonize' => \$daemonize, 'cert=s' => \$default_cert, 'key=s' => \$default_key, 'help|h' => \(my $show_help), ); GetOptions(%opt) or die $help; if ($show_help) { print $help; exit 0 }; if (defined $pid_file && $pid_file =~ /\.oldbin\z/) { die "--pid-file cannot end with '.oldbin'\n"; } @listeners = inherit($listener_names); # allow socket-activation users to set certs once and not # have to configure each socket: my @inherited_names = keys(%$listener_names) if defined($default_cert); # ignore daemonize when inheriting $daemonize = undef if scalar @listeners; push @cfg_listen, $default_listen unless (@listeners || @cfg_listen); foreach my $l (@cfg_listen) { my $orig = $l; my $scheme = ''; if ($l =~ s!\A([^:]+)://!!) { $scheme = $1; } elsif ($l =~ /\A(?:\[[^\]]+\]|[^:]+):([0-9])+/) { my $s = $KNOWN_TLS{$1} // $KNOWN_STARTTLS{$1}; $scheme = $s if defined $s; } if ($l =~ s!/?\?(.+)\z!!) { $tls_opt{"$scheme://$l"} = accept_tls_opt($1); } elsif (defined($default_cert)) { $tls_opt{"$scheme://$l"} = accept_tls_opt(''); } elsif ($scheme =~ /\A(?:https|imaps|imaps)\z/) { die "$orig specified w/o cert=\n"; } # TODO: use scheme to load either NNTP.pm or HTTP.pm next if $listener_names->{$l}; # already inherited my (%o, $sock_pkg); if (index($l, '/') == 0) { $sock_pkg = 'IO::Socket::UNIX'; eval "use $sock_pkg"; die $@ if $@; %o = (Type => SOCK_STREAM, Peer => $l); if (-S $l) { my $c = $sock_pkg->new(%o); if (!defined($c) && $!{ECONNREFUSED}) { unlink $l or die "failed to unlink stale socket=$l: $!\n"; } # else: let the bind fail } $o{Local} = delete $o{Peer}; } else { # both work for IPv4, too for (qw(IO::Socket::IP IO::Socket::INET6)) { $sock_pkg = $_; eval "use $sock_pkg"; $@ or last; } die $@ if $@; %o = (LocalAddr => $l, ReuseAddr => 1, Proto => 'tcp'); } $o{Listen} = 1024; my $prev = umask 0000; my $s = eval { $sock_pkg->new(%o) }; warn "error binding $l: $! ($@)\n" unless $s; umask $prev; if ($s) { $listener_names->{sockname($s)} = $s; $s->blocking(0); push @listeners, $s; } } # cert/key options in @cfg_listen takes precedence when inheriting, # but map well-known inherited ports if --listen isn't specified # at all for my $sockname (@inherited_names) { $sockname =~ /:([0-9]+)\z/ or next; if (my $scheme = $KNOWN_TLS{$1}) { $tls_opt{"$scheme://$sockname"} ||= accept_tls_opt(''); } elsif (($scheme = $KNOWN_STARTTLS{$1})) { next if $tls_opt{"$scheme://$sockname"}; $tls_opt{''} ||= accept_tls_opt(''); } } die "No listeners bound\n" unless @listeners; } sub check_absolute ($$) { my ($var, $val) = @_; if (defined $val && index($val, '/') != 0) { die "--$var must be an absolute path when using --daemonize: $val\n"; } } sub daemonize () { if ($daemonize) { foreach my $i (0..$#ARGV) { my $arg = $ARGV[$i]; next unless -e $arg; $ARGV[$i] = abs_path($arg); } check_absolute('stdout', $stdout); check_absolute('stderr', $stderr); check_absolute('pid-file', $pid_file); chdir '/' or die "chdir failed: $!"; } if (defined($pid_file) || defined($group) || defined($user)) { eval { require Net::Server::Daemonize; 1 } // die <&STDIN' or die "redirect stdout failed: $!\n"; open STDERR, '>&STDIN' or die "redirect stderr failed: $!\n"; POSIX::setsid(); $pid = fork; die "could not fork: $!\n" unless defined $pid; exit if $pid; } return unless defined $pid_file; write_pid($pid_file); # for ->DESTROY: bless { pid => $$, pid_file => \$pid_file }, __PACKAGE__; } sub worker_quit { # $_[0] = signal name or number (unused) # killing again terminates immediately: exit unless @listeners; $_->close foreach @listeners; # call PublicInbox::DS::close @listeners = (); my $proc_name; my $warn = 0; # drop idle connections and try to quit gracefully PublicInbox::DS->SetPostLoopCallback(sub { my ($dmap, undef) = @_; my $n = 0; my $now = now(); foreach my $s (values %$dmap) { $s->can('busy') or next; if ($s->busy($now)) { ++$n; } else { # close as much as possible, early as possible $s->close; } } if ($n) { if (($warn + 5) < now()) { warn "$$ quitting, $n client(s) left\n"; $warn = now(); } unless (defined $proc_name) { $proc_name = (split(/\s+/, $0))[0]; $proc_name =~ s!\A.*?([^/]+)\z!$1!; } $0 = "$proc_name quitting, $n client(s) left"; } $n; # true: loop continues, false: loop breaks }); } sub reopen_logs { if ($stdout) { open STDOUT, '>>', $stdout or warn "failed to redirect stdout to $stdout: $!\n"; STDOUT->autoflush(1); do_chown($stdout); } if ($stderr) { open STDERR, '>>', $stderr or warn "failed to redirect stderr to $stderr: $!\n"; STDERR->autoflush(1); do_chown($stderr); } } sub sockname ($) { my ($s) = @_; my $addr = getsockname($s) or return; my ($host, $port) = host_with_port($addr); if ($port == 0 && $host eq '127.0.0.1') { my ($path) = Socket::sockaddr_un($addr); return $path; } "$host:$port"; } sub unpack_ipv6 ($) { my ($addr) = @_; my ($port, $host); # Socket.pm in Perl 5.14+ supports IPv6: eval { ($port, $host) = Socket::unpack_sockaddr_in6($addr); $host = Socket::inet_ntop(Socket::AF_INET6(), $host); }; if ($@) { # Perl 5.12 or earlier? SpamAssassin and Net::Server use # Socket6, so it may be installed on our system, already # (otherwise die here): require Socket6; ($port, $host) = Socket6::unpack_sockaddr_in6($addr); $host = Socket6::inet_ntop(Socket6::AF_INET6(), $host); } ($host, $port); } sub host_with_port ($) { my ($addr) = @_; my ($port, $host); # this eval will die on Unix sockets: eval { if (length($addr) >= 28) { ($host, $port) = unpack_ipv6($addr); $host = "[$host]"; } else { ($port, $host) = Socket::sockaddr_in($addr); $host = Socket::inet_ntoa($host); } }; $@ ? ('127.0.0.1', 0) : ($host, $port); } sub inherit ($) { my ($listener_names) = @_; return () if ($ENV{LISTEN_PID} || 0) != $$; my $fds = $ENV{LISTEN_FDS} or return (); my $end = $fds + 2; # LISTEN_FDS_START - 1 my @rv = (); foreach my $fd (3..$end) { my $s = IO::Handle->new_from_fd($fd, 'r'); if (my $k = sockname($s)) { if ($s->blocking) { $s->blocking(0); warn <<""; Inherited socket (fd=$fd) is blocking, making it non-blocking. Set 'NonBlocking = true' in the systemd.service unit to avoid stalled processes when multiple service instances start. } $listener_names->{$k} = $s; push @rv, $s; } else { warn "failed to inherit fd=$fd (LISTEN_FDS=$fds)"; } } @rv } sub upgrade { # $_[0] = signal name or number (unused) if ($reexec_pid) { warn "upgrade in-progress: $reexec_pid\n"; return; } if (defined $pid_file) { if ($pid_file =~ /\.oldbin\z/) { warn "BUG: .oldbin suffix exists: $pid_file\n"; return; } unlink_pid_file_safe_ish($$, $pid_file); $pid_file .= '.oldbin'; write_pid($pid_file); } my $pid = fork; unless (defined $pid) { warn "fork failed: $!\n"; return; } if ($pid == 0) { use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD); $ENV{LISTEN_FDS} = scalar @listeners; $ENV{LISTEN_PID} = $$; foreach my $s (@listeners) { # @listeners are globs with workers, PI::L w/o workers $s = $s->{sock} if ref($s) eq 'PublicInbox::Listener'; my $fl = fcntl($s, F_GETFD, 0); fcntl($s, F_SETFD, $fl &= ~FD_CLOEXEC); } exec @CMD; die "Failed to exec: $!\n"; } $reexec_pid = $pid; } sub kill_workers ($) { my ($s) = @_; while (my ($pid, $id) = each %pids) { kill $s, $pid; } } sub upgrade_aborted ($) { my ($p) = @_; warn "reexec PID($p) died with: $?\n"; $reexec_pid = undef; return unless $pid_file; my $file = $pid_file; $file =~ s/\.oldbin\z// or die "BUG: no '.oldbin' suffix in $file"; unlink_pid_file_safe_ish($$, $pid_file); $pid_file = $file; eval { write_pid($pid_file) }; warn $@, "\n" if $@; } sub reap_children { # $_[0] = 'CHLD' or POSIX::SIGCHLD() while (1) { my $p = waitpid(-1, WNOHANG) or return; if (defined $reexec_pid && $p == $reexec_pid) { upgrade_aborted($p); } elsif (defined(my $id = delete $pids{$p})) { warn "worker[$id] PID($p) died with: $?\n"; } elsif ($p > 0) { warn "unknown PID($p) reaped: $?\n"; } else { return; } } } sub unlink_pid_file_safe_ish ($$) { my ($unlink_pid, $file) = @_; return unless defined $unlink_pid && $unlink_pid == $$; open my $fh, '<', $file or return; local $/ = "\n"; defined(my $read_pid = <$fh>) or return; chomp $read_pid; if ($read_pid == $unlink_pid) { Net::Server::Daemonize::unlink_pid_file($file); } } sub master_quit ($) { exit unless @listeners; @listeners = (); kill_workers($_[0]); } sub master_loop { pipe(my ($p0, $p1)) or die "failed to create parent-pipe: $!"; my $set_workers = $worker_processes; reopen_logs(); my $ignore_winch; my $sig = { USR1 => sub { reopen_logs(); kill_workers($_[0]); }, USR2 => \&upgrade, QUIT => \&master_quit, INT => \&master_quit, TERM => \&master_quit, WINCH => sub { return if $ignore_winch || !@listeners; if (-t STDIN || -t STDOUT || -t STDERR) { $ignore_winch = 1; warn < sub { return unless @listeners; $worker_processes = $set_workers; kill_workers($_[0]); }, TTIN => sub { return unless @listeners; if ($set_workers > $worker_processes) { ++$worker_processes; } else { $worker_processes = ++$set_workers; } }, TTOU => sub { $worker_processes = --$set_workers if $set_workers > 0; }, CHLD => \&reap_children, }; my $sigfd = PublicInbox::Sigfd->new($sig, 0); local %SIG = (%SIG, %$sig) if !$sigfd; PublicInbox::Sigfd::sig_setmask($oldset) if !$sigfd; while (1) { # main loop my $n = scalar keys %pids; unless (@listeners) { exit if $n == 0; $set_workers = $worker_processes = $n = 0; } if ($n > $worker_processes) { while (my ($k, $v) = each %pids) { kill('TERM', $k) if $v >= $worker_processes; } $n = $worker_processes; } my $want = $worker_processes - 1; if ($n <= $want) { PublicInbox::Sigfd::block_signals() if !$sigfd; for my $i ($n..$want) { my $pid = fork; if (!defined $pid) { warn "failed to fork worker[$i]: $!\n"; } elsif ($pid == 0) { $set_user->() if $set_user; return $p0; # run normal work code } else { warn "PID=$pid is worker[$i]\n"; $pids{$pid} = $i; } } PublicInbox::Sigfd::sig_setmask($oldset) if !$sigfd; } if ($sigfd) { # Linux and IO::KQueue users: $sigfd->wait_once; } else { # wake up every second sleep(1); } } exit # never gets here, just for documentation } sub tls_start_cb ($$) { my ($opt, $orig_post_accept) = @_; sub { my ($io, $addr, $srv) = @_; my $ssl = IO::Socket::SSL->start_SSL($io, %$opt); $orig_post_accept->($ssl, $addr, $srv); } } sub defer_accept ($$) { my ($s, $af_name) = @_; return unless defined $af_name; if ($^O eq 'linux') { my $TCP_DEFER_ACCEPT = 9; # Socket::TCP_DEFER_ACCEPT is in 5.14+ my $x = getsockopt($s, IPPROTO_TCP, $TCP_DEFER_ACCEPT); return unless defined $x; # may be Unix socket my $sec = unpack('i', $x); return if $sec > 0; # systemd users may set a higher value setsockopt($s, IPPROTO_TCP, $TCP_DEFER_ACCEPT, 1); } elsif ($^O eq 'freebsd') { my $x = getsockopt($s, SOL_SOCKET, SO_ACCEPTFILTER); return if defined $x; # don't change if set my $accf_arg = pack('a16a240', $af_name, ''); setsockopt($s, SOL_SOCKET, SO_ACCEPTFILTER, $accf_arg); } } sub daemon_loop ($$$$) { my ($refresh, $post_accept, $tlsd, $af_default) = @_; my %post_accept; while (my ($k, $v) = each %tls_opt) { if ($k =~ s!\A(?:https|imaps|nntps)://!!) { $post_accept{$k} = tls_start_cb($v, $post_accept); } elsif ($tlsd) { # STARTTLS, $k eq '' is OK $tlsd->{accept_tls} = $v; } } my $sig = { HUP => $refresh, INT => \&worker_quit, QUIT => \&worker_quit, TERM => \&worker_quit, TTIN => 'IGNORE', TTOU => 'IGNORE', USR1 => \&reopen_logs, USR2 => 'IGNORE', WINCH => 'IGNORE', CHLD => \&PublicInbox::DS::enqueue_reap, }; if ($worker_processes > 0) { $refresh->(); # preload by default my $fh = master_loop(); # returns if in child process PublicInbox::EOFpipe->new($fh, \&worker_quit, undef); } else { reopen_logs(); $set_user->() if $set_user; $sig->{USR2} = sub { worker_quit() if upgrade() }; $refresh->(); } $uid = $gid = undef; reopen_logs(); @listeners = map {; my $tls_cb = $post_accept{sockname($_)}; # NNTPS, HTTPS, HTTP, IMAPS and POP3S are client-first traffic # IMAP, NNTP and POP3 are server-first defer_accept($_, $tls_cb ? 'dataready' : $af_default); # this calls epoll_create: PublicInbox::Listener->new($_, $tls_cb || $post_accept) } @listeners; my $sigfd = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK); local %SIG = (%SIG, %$sig) if !$sigfd; if (!$sigfd) { # wake up every second to accept signals if we don't # have signalfd or IO::KQueue: PublicInbox::Sigfd::sig_setmask($oldset); PublicInbox::DS->SetLoopTimeout(1000); } PublicInbox::DS->EventLoop; } sub run ($$$;$) { my ($default, $refresh, $post_accept, $tlsd) = @_; local $SIG{PIPE} = 'IGNORE'; daemon_prepare($default); my $af_default = $default =~ /:8080\z/ ? 'httpready' : undef; my $for_destroy = daemonize(); daemon_loop($refresh, $post_accept, $tlsd, $af_default); PublicInbox::DS->Reset; # ->DESTROY runs when $for_destroy goes out-of-scope } sub do_chown ($) { my ($path) = @_; if (defined $uid and !chown($uid, $gid, $path)) { warn "could not chown $path: $!\n"; } } sub write_pid ($) { my ($path) = @_; Net::Server::Daemonize::create_pid_file($path); do_chown($path); } sub DESTROY { unlink_pid_file_safe_ish($_[0]->{pid}, ${$_[0]->{pid_file}}); } 1; public-inbox-1.6.1/lib/PublicInbox/DirIdle.pm000066400000000000000000000031741377346120300207630ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ # Used by public-inbox-watch for Maildir (and possibly MH in the future) package PublicInbox::DirIdle; use strict; use parent 'PublicInbox::DS'; use PublicInbox::Syscall qw(EPOLLIN EPOLLET); use PublicInbox::In2Tie; my ($MAIL_IN, $ino_cls); if ($^O eq 'linux' && eval { require Linux::Inotify2; 1 }) { $MAIL_IN = Linux::Inotify2::IN_MOVED_TO() | Linux::Inotify2::IN_CREATE(); $ino_cls = 'Linux::Inotify2'; # Perl 5.22+ is needed for fileno(DIRHANDLE) support: } elsif ($^V ge v5.22 && eval { require PublicInbox::KQNotify }) { $MAIL_IN = PublicInbox::KQNotify::MOVED_TO_OR_CREATE(); $ino_cls = 'PublicInbox::KQNotify'; } else { require PublicInbox::FakeInotify; $MAIL_IN = PublicInbox::FakeInotify::MOVED_TO_OR_CREATE(); } sub new { my ($class, $dirs, $cb) = @_; my $self = bless { cb => $cb }, $class; my $inot; if ($ino_cls) { $inot = $ino_cls->new or die "E: $ino_cls->new: $!"; my $io = PublicInbox::In2Tie::io($inot); $self->SUPER::new($io, EPOLLIN | EPOLLET); } else { require PublicInbox::FakeInotify; $inot = PublicInbox::FakeInotify->new; # starts timer } # Linux::Inotify2->watch or similar $inot->watch($_, $MAIL_IN) for @$dirs; $self->{inot} = $inot; PublicInbox::FakeInotify::poll_once($self) if !$ino_cls; $self; } sub event_step { my ($self) = @_; my $cb = $self->{cb}; local $PublicInbox::DS::in_loop = 0; # waitpid() synchronously eval { my @events = $self->{inot}->read; # Linux::Inotify2->read $cb->($_) for @events; }; warn "$self->{inot}->read err: $@\n" if $@; } 1; public-inbox-1.6.1/lib/PublicInbox/DummyInbox.pm000066400000000000000000000011731377346120300215370ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ # # An EXAMINE-able, PublicInbox::Inbox-like object for IMAP. Some # IMAP clients don't like having unselectable parent mailboxes, # so we have a dummy package PublicInbox::DummyInbox; use strict; sub created_at { 0 } # Msgmap::created_at sub mm { shift } sub uid_range { [] } # Over::uid_range sub subscribe_unlock { undef }; no warnings 'once'; *max = \&created_at; *query_xover = \&uid_range; *over = \&mm; *search = *unsubscribe_unlock = *get_art = *description = *base_url = \&subscribe_unlock; 1; public-inbox-1.6.1/lib/PublicInbox/EOFpipe.pm000066400000000000000000000011531377346120300207310ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ package PublicInbox::EOFpipe; use strict; use parent qw(PublicInbox::DS); use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); sub new { my (undef, $rd, $cb, $arg) = @_; my $self = bless { cb => $cb, arg => $arg }, __PACKAGE__; # 1031: F_SETPIPE_SZ, 4096: page size fcntl($rd, 1031, 4096) if $^O eq 'linux'; $self->SUPER::new($rd, EPOLLIN|EPOLLONESHOT); } sub event_step { my ($self) = @_; if ($self->do_read(my $buf, 1) == 0) { # auto-closed $self->{cb}->($self->{arg}); } } 1; public-inbox-1.6.1/lib/PublicInbox/Emergency.pm000066400000000000000000000042521377346120300213630ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # # Emergency Maildir delivery for MDA package PublicInbox::Emergency; use strict; use warnings; use Fcntl qw(:DEFAULT SEEK_SET); use Sys::Hostname qw(hostname); use IO::Handle; # ->flush, ->autoflush sub new { my ($class, $dir) = @_; foreach (qw(new tmp cur)) { my $d = "$dir/$_"; next if -d $d; require File::Path; if (!File::Path::mkpath($d) && !-d $d) { die "failed to mkpath($d): $!\n"; } } bless { dir => $dir, files => {}, t => 0, cnt => 0, pid => $$ }, $class; } sub _fn_in { my ($self, $dir) = @_; my @host = split(/\./, hostname); my $now = time; if ($self->{t} != $now) { $self->{t} = $now; $self->{cnt} = 0; } else { $self->{cnt}++; } my $f; do { $f = "$self->{dir}/$dir/$self->{t}.$$"."_$self->{cnt}.$host[0]"; $self->{cnt}++; } while (-e $f); $f; } sub prepare { my ($self, $strref) = @_; die "already in transaction: $self->{tmp}" if $self->{tmp}; my ($tmp, $fh); do { $tmp = _fn_in($self, 'tmp'); $! = undef; } while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_RDWR) && $!{EEXIST}); print $fh $$strref or die "write failed: $!"; $fh->flush or die "flush failed: $!"; $fh->autoflush(1); $self->{fh} = $fh; $self->{tmp} = $tmp; } sub abort { my ($self) = @_; delete $self->{fh}; my $tmp = delete $self->{tmp} or return; unlink($tmp) or warn "Failed to unlink $tmp: $!"; undef; } sub fh { my ($self) = @_; my $fh = $self->{fh} or die "{fh} not open!\n"; seek($fh, 0, SEEK_SET) or die "seek(fh) failed: $!"; sysseek($fh, 0, SEEK_SET) or die "sysseek(fh) failed: $!"; $fh; } sub commit { my ($self) = @_; $$ == $self->{pid} or return; # no-op in forked child delete $self->{fh}; my $tmp = delete $self->{tmp} or return; my $new; do { $new = _fn_in($self, 'new'); } while (!link($tmp, $new) && $!{EEXIST}); my @sn = stat($new) or die "stat $new failed: $!"; my @st = stat($tmp) or die "stat $tmp failed: $!"; if ($st[0] == $sn[0] && $st[1] == $sn[1]) { unlink($tmp) or warn "Failed to unlink $tmp: $!"; } else { warn "stat($new) and stat($tmp) differ"; } } sub DESTROY { commit($_[0]) } 1; public-inbox-1.6.1/lib/PublicInbox/Eml.pm000066400000000000000000000351711377346120300201660ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ # # Lazy MIME parser, it still slurps the full message but keeps short # lifetimes. Unlike Email::MIME, it doesn't pre-split multipart # messages or do any up-front parsing of headers besides splitting # the header string from the body. # # Contains ideas and code from Email::Simple and Email::MIME # (Perl Artistic License, GPL-1+) # # This aims to replace Email::MIME for our purposes, similar API # but internal field names are differ if they're not 100%-compatible. # # Includes some proposed fixes for Email::MIME: # - header-less sub parts - https://github.com/rjbs/Email-MIME/issues/14 # - "0" as boundary - https://github.com/rjbs/Email-MIME/issues/63 # # $self = { # bdy => scalar ref for body (may be undef), # hdr => scalar ref for header, # crlf => "\n" or "\r\n" (scalar, not a ref), # # # filled in during ->each_part # ct => hash ref returned by parse_content_type # } package PublicInbox::Eml; use strict; use v5.10.1; use Carp qw(croak); use Encode qw(find_encoding decode encode); # stdlib use Text::Wrap qw(wrap); # stdlib, we need Perl 5.6+ for $huge use MIME::Base64 3.05; # Perl 5.10.0 / 5.9.2 use MIME::QuotedPrint 3.05; # ditto my $MIME_Header = find_encoding('MIME-Header'); use PublicInbox::EmlContentFoo qw(parse_content_type parse_content_disposition); $PublicInbox::EmlContentFoo::STRICT_PARAMS = 0; our $mime_parts_limit = 1000; # same as SpamAssassin (not in postfix AFAIK) # the rest of the limit names are taken from postfix: our $mime_nesting_limit = 20; # seems enough, Perl sucks, here our $mime_boundary_length_limit = 2048; # same as postfix our $header_size_limit = 102400; # same as postfix my %MIME_ENC = (qp => \&enc_qp, base64 => \&encode_base64); my %MIME_DEC = (qp => \&dec_qp, base64 => \&decode_base64); $MIME_ENC{quotedprint} = $MIME_ENC{'quoted-printable'} = $MIME_ENC{qp}; $MIME_DEC{quotedprint} = $MIME_DEC{'quoted-printable'} = $MIME_DEC{qp}; $MIME_ENC{$_} = \&identity_codec for qw(7bit 8bit binary); my %DECODE_ADDRESS = map { $_ => 1 } qw(From To Cc Sender Reply-To); my %DECODE_FULL = ( Subject => 1, 'Content-Description' => 1, 'Content-Type' => 1, # not correct, but needed, oh well ); our %STR_TYPE = (text => 1); our %STR_SUBTYPE = (plain => 1, html => 1); # message/* subtypes we descend into our %MESSAGE_DESCEND = ( news => 1, # RFC 1849 (obsolete, but archives are forever) rfc822 => 1, # RFC 2046 rfc2822 => 1, # gmime handles this (but not rfc5322) global => 1, # RFC 6532 ); my %re_memo; sub re_memo ($) { my ($k) = @_; # Do not normalize $k with lc/uc; instead strive to keep # capitalization in our codebase consistent. $re_memo{$k} ||= qr/^\Q$k\E:[ \t]*([^\n]*\r?\n # 1st line # continuation lines: (?:[^:\n]*?[ \t]+[^\n]*\r?\n)*) /ismx } sub hdr_truncate ($) { my $len = length($_[0]); substr($_[0], $header_size_limit, $len) = ''; my $end = rindex($_[0], "\n"); if ($end >= 0) { ++$end; substr($_[0], $end, $len) = ''; warn "header of $len bytes truncated to $end bytes\n"; } else { $_[0] = ''; warn <= 0) { # likely on *nix my $hdr = substr($$ref, 0, $pos + 2, ''); # sv_chop on $$ref chop($hdr); # lower SvCUR hdr_truncate($hdr) if length($hdr) > $header_size_limit; bless { hdr => \$hdr, crlf => "\n", bdy => $ref }, __PACKAGE__; } elsif ($$ref =~ /\r?\n(\r?\n)/s) { my $hdr = substr($$ref, 0, $+[0], ''); # sv_chop on $$ref substr($hdr, -(length($1))) = ''; # lower SvCUR hdr_truncate($hdr) if length($hdr) > $header_size_limit; bless { hdr => \$hdr, crlf => $1, bdy => $ref }, __PACKAGE__; } elsif ($$ref =~ /^[a-z0-9-]+[ \t]*:/ims && $$ref =~ /(\r?\n)\z/s) { # body is optional :P my $hdr = substr($$ref, 0, $header_size_limit + 1); hdr_truncate($hdr) if length($hdr) > $header_size_limit; bless { hdr => \$hdr, crlf => $1 }, __PACKAGE__; } else { # nothing useful my $hdr = $$ref = ''; bless { hdr => \$hdr, crlf => "\n" }, __PACKAGE__; } } sub new_sub { my (undef, $ref) = @_; # special case for messages like <85k5su9k59.fsf_-_@lola.goethe.zz> $$ref =~ /\A(\r?\n)/s or return new(undef, $ref); my $hdr = substr($$ref, 0, $+[0], ''); # sv_chop on $$ref bless { hdr => \$hdr, crlf => $1, bdy => $ref }, __PACKAGE__; } # same output as Email::Simple::Header::header_raw, but we extract # headers on-demand instead of parsing them into a list which # requires O(n) lookups anyways sub header_raw { my $re = re_memo($_[1]); my @v = (${ $_[0]->{hdr} } =~ /$re/g); for (@v) { # for compatibility w/ Email::Simple::Header, s/\s+\z//s; s/\A\s+//s; s/\r?\n[ \t]*/ /gs; } wantarray ? @v : $v[0]; } # pick the first Content-Type header to match Email::MIME behavior. # It's usually the right one based on historical archives. sub ct ($) { # PublicInbox::EmlContentFoo::content_type: $_[0]->{ct} //= parse_content_type(header($_[0], 'Content-Type')); } # returns a queue of sub-parts iff it's worth descending into sub mp_descend ($$) { my ($self, $nr) = @_; # or $once for top-level my $ct = ct($self); my $type = lc($ct->{type}); if ($type eq 'message' && $MESSAGE_DESCEND{lc($ct->{subtype})}) { my $nxt = new(undef, body_raw($self)); $self->{-call_cb} = $nxt->{is_submsg} = 1; return [ $nxt ]; } return if $type ne 'multipart'; my $bnd = $ct->{attributes}->{boundary} // return; # single-part return if $bnd eq '' || length($bnd) >= $mime_boundary_length_limit; $bnd = quotemeta($bnd); # this is a multipart message that didn't get descended into in # public-inbox <= 1.5.0, so ensure we call the user callback for # this part to not break PSGI downloads. $self->{-call_cb} = $self->{is_submsg}; # "multipart" messages can exist w/o a body my $bdy = ($nr ? delete($self->{bdy}) : \(body_raw($self))) or return; # Cut at the the first epilogue, not subsequent ones. # *sigh* just the regexp match alone seems to bump RSS by # length($$bdy) on a ~30M string: my $epilogue_missing; if ($$bdy =~ /(?:\r?\n)?^--$bnd--[ \t]*\r?$/sm) { substr($$bdy, $-[0]) = ''; } else { $epilogue_missing = 1; } # *Sigh* split() doesn't work in-place and return CoW strings # because Perl wants to "\0"-terminate strings. So split() # again bumps RSS by length($$bdy) # Quiet warning for "Complex regular subexpression recursion limit" # in case we get many empty parts, it's harmless in this case no warnings 'regexp'; my ($pre, @parts) = split(/(?:\r?\n)?(?:^--$bnd[ \t]*\r?\n)+/ms, $$bdy, # + 3 since we don't want the last part # processed to include any other excluded # parts ($nr starts at 1, and I suck at math) $mime_parts_limit + 3 - $nr); if (@parts) { # the usual path if we got this far: undef $bdy; # release memory ASAP if $nr > 0 # compatibility with Email::MIME $parts[-1] =~ s/\n\r?\n\z/\n/s if $epilogue_missing; # ignore empty parts @parts = map { new_sub(undef, \$_) } grep /[^ \t\r\n]/s, @parts; # Keep "From: someone..." from preamble in old, # buggy versions of git-send-email, otherwise drop it # There's also a case where quoted text showed up in the # preamble # <20060515162817.65F0F1BBAE@citi.umich.edu> unshift(@parts, new_sub(undef, \$pre)) if $pre =~ /:/s; return \@parts; } # "multipart", but no boundary found, treat as single part $self->{bdy} //= $bdy; undef; } # $p = [ \@parts, $depth, $idx ] # $idx[0] grows as $depth grows, $idx[1] == $p->[-1] == current part # (callers need to be updated) # \@parts is a queue which empties when we're done with a parent part # same usage as PublicInbox::MsgIter::msg_iter # $cb - user-supplied callback sub # $arg - user-supplied arg (think pthread_create) # $once - unref body scalar during iteration sub each_part { my ($self, $cb, $arg, $once, $all) = @_; my $p = mp_descend($self, $once // 0) or return $cb->([$self, 0, 1], $arg); $cb->([$self, 0, 0], $arg) if ($all || $self->{-call_cb}); # rare $p = [ $p, 0 ]; my @s; # our virtual stack my $nr = 0; while ((scalar(@{$p->[0]}) || ($p = pop @s)) && ++$nr <= $mime_parts_limit) { ++$p->[-1]; # bump index my (undef, @idx) = @$p; @idx = (join('.', @idx)); my $depth = ($idx[0] =~ tr/././) + 1; my $sub = shift @{$p->[0]}; if ($depth < $mime_nesting_limit && (my $nxt = mp_descend($sub, $nr))) { push(@s, $p) if scalar @{$p->[0]}; $p = [ $nxt, @idx, 0 ]; ($all || $sub->{-call_cb}) and $cb->([$sub, $depth, @idx], $arg); } else { # a leaf node $cb->([$sub, $depth, @idx], $arg); } } } sub enc_qp { # prevent MIME::QuotedPrint from encoding CR as =0D since it's # against RFCs and breaks MUAs $_[0] =~ s/\r\n/\n/sg; encode_qp($_[0], "\r\n"); } sub dec_qp { # RFC 2822 requires all lines to end in CRLF, though... :< $_[0] = decode_qp($_[0]); $_[0] =~ s/\n/\r\n/sg; $_[0] } sub identity_codec { $_[0] } ########### compatibility section for existing Email::MIME uses ######### sub header_obj { bless { hdr => $_[0]->{hdr}, crlf => $_[0]->{crlf} }, __PACKAGE__; } sub subparts { my ($self) = @_; my $parts = mp_descend($self, 0) or return (); my $bnd = ct($self)->{attributes}->{boundary} // die 'BUG: no boundary'; my $bdy = $self->{bdy}; if ($$bdy =~ /\A(.*?)(?:\r?\n)?^--\Q$bnd\E[ \t]*\r?$/sm) { $self->{preamble} = $1; } if ($$bdy =~ /^--\Q$bnd\E--[ \t]*\r?\n(.+)\z/sm) { $self->{epilogue} = $1; } @$parts; } sub parts_set { my ($self, $parts) = @_; # we can't fully support what Email::MIME does, # just what our filter code needs: my $bnd = ct($self)->{attributes}->{boundary} // die <parts_set not supported for single-part messages EOF my $crlf = $self->{crlf}; my $fin_bnd = "$crlf--$bnd--$crlf"; $bnd = "$crlf--$bnd$crlf"; ${$self->{bdy}} = join($bnd, delete($self->{preamble}) // '', map { $_->as_string } @$parts ) . $fin_bnd . (delete($self->{epilogue}) // ''); undef; } sub body_set { my ($self, $body) = @_; my $bdy = $self->{bdy} = ref($body) ? $body : \$body; if (my $cte = header_raw($self, 'Content-Transfer-Encoding')) { my $enc = $MIME_ENC{lc($cte)} or croak("can't encode `$cte'"); $$bdy = $enc->($$bdy); # in-place } undef; } sub body_str_set { my ($self, $body_str) = @_; my $charset = ct($self)->{attributes}->{charset} or Carp::confess('body_str was given, but no charset is defined'); body_set($self, \(encode($charset, $body_str, Encode::FB_CROAK))); } sub content_type { scalar header($_[0], 'Content-Type') } # we only support raw header_set sub header_set { my ($self, $pfx, @vals) = @_; my $re = re_memo($pfx); my $hdr = $self->{hdr}; return $$hdr =~ s!$re!!g if !@vals; $pfx .= ': '; my $len = 78 - length($pfx); @vals = map {; # folding differs from Email::Simple::Header, # we favor tabs for visibility (and space savings :P) if (length($_) >= $len && (/\n[^ \t]/s || !/\n/s)) { local $Text::Wrap::columns = $len; local $Text::Wrap::huge = 'overflow'; $pfx . wrap('', "\t", $_) . $self->{crlf}; } else { $pfx . $_ . $self->{crlf}; } } @vals; $$hdr =~ s!$re!shift(@vals) // ''!ge; # replace current headers, first $$hdr .= join('', @vals); # append any leftovers not replaced # wantarray ? @_[2..$#_] : $_[2]; # Email::Simple::Header compat undef; # we don't care for the return value } # note: we only call this method on Subject sub header_str_set { my ($self, $name, @vals) = @_; for (@vals) { next unless /[^\x20-\x7e]/; # 39: int((75 - length("Subject: =?UTF-8?B?".'?=') ) / 4) * 3; s/(.{1,39})/ my $x = $1; utf8::encode($x); # to octets '=?UTF-8?B?'.encode_base64($x, '').'?=' /xges; } header_set($self, $name, @vals); } sub mhdr_decode ($) { eval { $MIME_Header->decode($_[0], Encode::FB_DEFAULT) } // $_[0]; } sub filename { my $dis = header_raw($_[0], 'Content-Disposition'); my $attrs = parse_content_disposition($dis)->{attributes}; my $fn = $attrs->{filename}; $fn = ct($_[0])->{attributes}->{name} if !defined($fn) || $fn eq ''; (defined($fn) && $fn =~ /=\?/) ? mhdr_decode($fn) : $fn; } sub xs_addr_str { # helper for ->header / ->header_str for (@_) { # array from header_raw() next unless /=\?/; my @g = parse_email_groups($_); # [ foo => [ E::A::X, ... ] for (my $i = 0; $i < @g; $i += 2) { if (defined($g[$i]) && $g[$i] =~ /=\?/) { $g[$i] = mhdr_decode($g[$i]); } my $addrs = $g[$i + 1]; for my $eax (@$addrs) { for my $m (qw(phrase comment)) { my $v = $eax->$m; $eax->$m(mhdr_decode($v)) if $v && $v =~ /=\?/; } } } $_ = format_email_groups(@g); } } eval { require Email::Address::XS; Email::Address::XS->import(qw(parse_email_groups format_email_groups)); 1; } or do { # fallback to just decoding everything, because parsing # email addresses correctly w/o C/XS is slow %DECODE_FULL = (%DECODE_FULL, %DECODE_ADDRESS); %DECODE_ADDRESS = (); }; *header = \&header_str; sub header_str { my ($self, $name) = @_; my @v = header_raw($self, $name); if ($DECODE_ADDRESS{$name}) { xs_addr_str(@v); } elsif ($DECODE_FULL{$name}) { for (@v) { $_ = mhdr_decode($_) if /=\?/; } } wantarray ? @v : $v[0]; } sub body_raw { ${$_[0]->{bdy} // \''}; } sub body { my $raw = body_raw($_[0]); my $cte = header_raw($_[0], 'Content-Transfer-Encoding') or return $raw; ($cte) = ($cte =~ /([a-zA-Z0-9\-]+)/) or return $raw; # For S/MIME, etc my $dec = $MIME_DEC{lc($cte)} or return $raw; $dec->($raw); } sub body_str { my ($self) = @_; my $ct = ct($self); my $charset = $ct->{attributes}->{charset}; if (!$charset) { if ($STR_TYPE{$ct->{type}} && $STR_SUBTYPE{$ct->{subtype}}) { return body($self); } Carp::confess("can't get body as a string for ", join("\n\t", header_raw($self, 'Content-Type'))); } decode($charset, body($self), Encode::FB_CROAK); } sub as_string { my ($self) = @_; my $ret = ${ $self->{hdr} }; return $ret unless defined($self->{bdy}); $ret .= $self->{crlf}; $ret .= ${$self->{bdy}}; } # Unlike Email::MIME::charset_set, this only changes the parsed # representation of charset used for search indexing and HTML display. # This does NOT affect what ->as_string returns. sub charset_set { ct($_[0])->{attributes}->{charset} = $_[1]; } sub crlf { $_[0]->{crlf} // "\n" } sub willneed { re_memo($_) for @_ } willneed(qw(From To Cc Date Subject Content-Type In-Reply-To References Message-ID X-Alt-Message-ID)); 1; public-inbox-1.6.1/lib/PublicInbox/EmlContentFoo.pm000066400000000000000000000177041377346120300221670ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES # This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # License: GPL-1.0+ or Artistic-1.0-Perl # # # # This license differs from the rest of public-inbox # # ABSTRACT: Parse a MIME Content-Type or Content-Disposition Header # # This is a fork of the Email::MIME::ContentType 1.022 with # minor improvements and incompatibilities; namely changes to # quiet warnings with legacy data. package PublicInbox::EmlContentFoo; use strict; use parent qw(Exporter); use v5.10.1; # find_mime_encoding() only appeared in Encode 2.87+ (Perl 5.26+), # while we support 2.35 shipped with Perl 5.10.1 use Encode 2.35 qw(find_encoding); my %mime_name_map; # $enc->mime_name => $enc object BEGIN { eval { Encode->import('find_mime_encoding') }; if ($@) { *find_mime_encoding = sub { $mime_name_map{lc($_[0])} }; %mime_name_map = map {; my $enc = find_encoding($_); my $m = lc($enc->mime_name // ''); $m => $enc; } Encode->encodings(':all'); # delete fallback for encodings w/o ->mime_name: delete $mime_name_map{''}; # an extra alias see Encode::MIME::NAME $mime_name_map{'utf8'} = find_encoding('UTF-8'); } } our @EXPORT_OK = qw(parse_content_type parse_content_disposition); our $STRICT_PARAMS = 1; my $ct_default = 'text/plain; charset=us-ascii'; my $re_token = # US-ASCII except SPACE, CTLs and tspecials ()<>@,;:\\"/[]?= qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/; my $re_token_non_strict = # allow CTLs and above ASCII qr/([\x00-\x08\x0B\x0C\x0E-\x1F\x7E-\xFF]+|$re_token)/; my $re_qtext = # US-ASCII except CR, LF, white space, backslash and quote qr/[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7E\x7F]/; my $re_quoted_pair = qr/\\[\x00-\x7F]/; my $re_quoted_string = qr/"((?:[ \t]*(?:$re_qtext|$re_quoted_pair))*[ \t]*)"/; my $re_qtext_non_strict = qr/[\x80-\xFF]|$re_qtext/; my $re_quoted_pair_non_strict = qr/\\[\x00-\xFF]/; my $re_quoted_string_non_strict = qr/"((?:[ \t]*(?:$re_qtext_non_strict|$re_quoted_pair_non_strict))*[ \t]*)"/; my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/; my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/; my $re_exvalue = qr/($re_charset)?'(?:$re_language)?'(.*)/; sub parse_content_type { my ($ct) = @_; # If the header isn't there or is empty, give default answer. $ct = $ct_default unless defined($ct) && length($ct); _unfold_lines($ct); _clean_comments($ct); # It is also recommend (sic.) that this default be assumed when a # syntactically invalid Content-Type header field is encountered. unless ($ct =~ s/^($re_token)\/($re_token)//) { unless ($STRICT_PARAMS && $ct =~ s/^($re_token_non_strict)\/ ($re_token_non_strict)//x) { #carp "Invalid Content-Type '$ct'"; return parse_content_type($ct_default); } } my ($type, $subtype) = (lc $1, lc $2); _clean_comments($ct); $ct =~ s/\s+$//; my $attributes = {}; if ($STRICT_PARAMS && length($ct) && $ct !~ /^;/) { # carp "Missing ';' before first Content-Type parameter '$ct'"; } else { $attributes = _process_rfc2231(_parse_attributes($ct)); } { type => $type, subtype => $subtype, attributes => $attributes, }; } my $cd_default = 'attachment'; sub parse_content_disposition { my ($cd) = @_; $cd = $cd_default unless defined($cd) && length($cd); _unfold_lines($cd); _clean_comments($cd); unless ($cd =~ s/^($re_token)//) { unless ($STRICT_PARAMS and $cd =~ s/^($re_token_non_strict)//) { #carp "Invalid Content-Disposition '$cd'"; return parse_content_disposition($cd_default); } } my $type = lc $1; _clean_comments($cd); $cd =~ s/\s+$//; my $attributes = {}; if ($STRICT_PARAMS && length($cd) && $cd !~ /^;/) { # carp "Missing ';' before first Content-Disposition parameter '$cd'"; } else { $attributes = _process_rfc2231(_parse_attributes($cd)); } { type => $type, attributes => $attributes, }; } sub _unfold_lines { $_[0] =~ s/(?:\r\n|[\r\n])(?=[ \t])//g; } sub _clean_comments { my $ret = ($_[0] =~ s/^\s+//); while (length $_[0]) { last unless $_[0] =~ s/^\(//; my $level = 1; while (length $_[0]) { my $ch = substr $_[0], 0, 1, ''; if ($ch eq '(') { $level++; } elsif ($ch eq ')') { $level--; last if $level == 0; } elsif ($ch eq '\\') { substr $_[0], 0, 1, ''; } } # carp "Unbalanced comment" if $level != 0 and $STRICT_PARAMS; $ret |= ($_[0] =~ s/^\s+//); } $ret; } sub _process_rfc2231 { my ($attribs) = @_; my %cont; my %encoded; foreach (keys %{$attribs}) { next unless $_ =~ m/^(.*)\*([0-9])\*?$/; my ($attr, $sec) = ($1, $2); $cont{$attr}->[$sec] = $attribs->{$_}; $encoded{$attr}->[$sec] = 1 if $_ =~ m/\*$/; delete $attribs->{$_}; } foreach (keys %cont) { my $key = $_; $key .= '*' if $encoded{$_}; $attribs->{$key} = join '', @{$cont{$_}}; } foreach (keys %{$attribs}) { next unless $_ =~ m/^(.*)\*$/; my $key = $1; next unless ($attribs->{$_} // '') =~ m/^$re_exvalue$/; my ($charset, $value) = ($1, $2); $value =~ s/%([0-9A-Fa-f]{2})/pack('C', hex($1))/eg; if (length $charset) { my $enc = find_mime_encoding($charset); if (defined $enc) { $value = $enc->decode($value); # } else { #carp "Unknown charset '$charset' in #attribute '$key' value"; } } $attribs->{$key} = $value; delete $attribs->{$_}; } $attribs; } sub _parse_attributes { local $_ = shift; substr($_, 0, 0, '; ') if length $_ and $_ !~ /^;/; my $attribs = {}; while (length $_) { s/^;// or $STRICT_PARAMS and do { #carp "Missing semicolon before parameter '$_'"; return $attribs; }; _clean_comments($_); unless (length $_) { # Some mail software generates a Content-Type like this: # "Content-Type: text/plain;" # RFC 1521 section 3 says a parameter must exist if # there is a semicolon. #carp "Extra semicolon after last parameter" if #$STRICT_PARAMS; return $attribs; } my $attribute; if (s/^($re_token)=//) { $attribute = lc $1; } else { if ($STRICT_PARAMS) { # carp "Illegal parameter '$_'"; return $attribs; } if (s/^($re_token_non_strict)=//) { $attribute = lc $1; } else { unless (s/^([^;=\s]+)\s*=//) { #carp "Cannot parse parameter '$_'"; return $attribs; } $attribute = lc $1; } } _clean_comments($_); my $value = _extract_attribute_value(); $attribs->{$attribute} = $value; _clean_comments($_); } $attribs; } sub _extract_attribute_value { # EXPECTS AND MODIFIES $_ my $value; while (length $_) { if (s/^($re_token)//) { $value .= $1; } elsif (s/^$re_quoted_string//) { my $sub = $1; $sub =~ s/\\(.)/$1/g; $value .= $sub; } elsif ($STRICT_PARAMS) { #my $char = substr $_, 0, 1; #carp "Unquoted '$char' not allowed"; return; } elsif (s/^($re_token_non_strict)//) { $value .= $1; } elsif (s/^$re_quoted_string_non_strict//) { my $sub = $1; $sub =~ s/\\(.)/$1/g; $value .= $sub; } my $erased = _clean_comments($_); last if !length $_ or /^;/; if ($STRICT_PARAMS) { #my $char = substr $_, 0, 1; #carp "Extra '$char' found after parameter"; return; } if ($erased) { # Sometimes semicolon is missing, so check for = char last if m/^$re_token_non_strict=/; $value .= ' '; } $value .= substr $_, 0, 1, ''; } $value; } 1; __END__ =func parse_content_type This routine is exported by default. This routine parses email content type headers according to section 5.1 of RFC 2045 and also RFC 2231 (Character Set and Parameter Continuations). It returns a hash as above, with entries for the C, the C, and a hash of C. =func parse_content_disposition This routine is exported by default. This routine parses email Content-Disposition headers according to RFC 2183 and RFC 2231. It returns a hash as above, with entries for the C, and a hash of C. =cut public-inbox-1.6.1/lib/PublicInbox/ExtMsg.pm000066400000000000000000000147651377346120300206660ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # # Used by the web interface to link to messages outside of the our # public-inboxes. Mail threads may cross projects/threads; so # we should ensure users can find more easily find them on other # sites. package PublicInbox::ExtMsg; use strict; use warnings; use PublicInbox::Hval qw(ascii_html prurl mid_href); use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::Smsg; our $MIN_PARTIAL_LEN = 16; # TODO: user-configurable our @EXT_URL = map { ascii_html($_) } ( # leading "//" denotes protocol-relative (http:// or https://) '//marc.info/?i=%s', '//www.mail-archive.com/search?l=mid&q=%s', 'nntp://news.gmane.io/%s', 'https://lists.debian.org/msgid-search/%s', '//docs.FreeBSD.org/cgi/mid.cgi?db=mid&id=%s', 'https://www.w3.org/mid/%s', 'http://www.postgresql.org/message-id/%s', 'https://lists.debconf.org/cgi-lurker/keyword.cgi?'. 'doc-url=/lurker&format=en.html&query=id:%s' ); sub PARTIAL_MAX () { 100 } sub search_partial ($$) { my ($ibx, $mid) = @_; return if length($mid) < $MIN_PARTIAL_LEN; my $srch = $ibx->search or return; my $opt = { limit => PARTIAL_MAX, mset => 2 }; my @try = ("m:$mid*"); my $chop = $mid; if ($chop =~ s/(\W+)(\w*)\z//) { my ($delim, $word) = ($1, $2); if (length($word)) { push @try, "m:$chop$delim"; push @try, "m:$chop$delim*"; } push @try, "m:$chop"; push @try, "m:$chop*"; } # break out long words individually to search for, because # too many messages begin with "Pine.LNX." (or "alpine" or "nycvar") if ($mid =~ /\w{9,}/) { my @long = ($mid =~ m!(\w{3,})!g); push(@try, join(' ', map { "m:$_" } @long)); # is the last element long enough to not trigger excessive # wildcard matches? if (length($long[-1]) > 8) { $long[-1] .= '*'; push(@try, join(' ', map { "m:$_" } @long)); } } foreach my $m (@try) { # If Xapian can't handle the wildcard since it # has too many results. $@ can be # Search::Xapian::QueryParserError or even: # "something terrible happened at ../Search/Xapian/Enquire.pm" my $mset = eval { $srch->mset($m, $opt) } or next; my @mids = map { $_->{mid} } @{$srch->mset_to_smsg($ibx, $mset)}; return \@mids if scalar(@mids); } } sub ext_msg_i { my ($other, $ctx) = @_; return if $other->{name} eq $ctx->{-inbox}->{name} || !$other->base_url; my $mm = $other->mm or return; # try to find the URL with Msgmap to avoid forking my $num = $mm->num_for($ctx->{mid}); if (defined $num) { push @{$ctx->{found}}, $other; } else { # no point in trying the fork fallback if we # know Xapian is up-to-date but missing the # message in the current repo push @{$ctx->{again}}, $other; } } sub ext_msg_step { my ($pi_cfg, $section, $ctx) = @_; if (defined($section)) { return if $section !~ m!\Apublicinbox\.([^/]+)\z!; my $ibx = $pi_cfg->lookup_name($1) or return; ext_msg_i($ibx, $ctx); } else { # undef == "EOF" finalize_exact($ctx); } } sub ext_msg { my ($ctx) = @_; sub { $ctx->{-wcb} = $_[0]; # HTTP server write callback if ($ctx->{env}->{'pi-httpd.async'}) { require PublicInbox::ConfigIter; my $iter = PublicInbox::ConfigIter->new( $ctx->{www}->{pi_config}, \&ext_msg_step, $ctx); $iter->event_step; } else { $ctx->{www}->{pi_config}->each_inbox(\&ext_msg_i, $ctx); finalize_exact($ctx); } }; } # called via PublicInbox::DS->EventLoop sub event_step { my ($ctx, $sync) = @_; # can't find a partial match in current inbox, try the others: my $ibx = shift @{$ctx->{again}} or return finalize_partial($ctx); my $mids = search_partial($ibx, $ctx->{mid}) or return ($sync ? undef : PublicInbox::DS::requeue($ctx)); $ctx->{n_partial} += scalar(@$mids); push @{$ctx->{partial}}, [ $ibx, $mids ]; $ctx->{n_partial} >= PARTIAL_MAX ? finalize_partial($ctx) : ($sync ? undef : PublicInbox::DS::requeue($ctx)); } sub finalize_exact { my ($ctx) = @_; return $ctx->{-wcb}->(exact($ctx)) if $ctx->{found}; # fall back to partial MID matching my $mid = $ctx->{mid}; my $cur = $ctx->{-inbox}; my $mids = search_partial($cur, $mid); if ($mids) { $ctx->{n_partial} = scalar(@$mids); push @{$ctx->{partial}}, [ $cur, $mids ]; } elsif ($ctx->{again} && length($mid) >= $MIN_PARTIAL_LEN) { bless $ctx, __PACKAGE__; if ($ctx->{env}->{'pi-httpd.async'}) { $ctx->event_step; return; } # synchronous fall-through $ctx->event_step while @{$ctx->{again}}; } finalize_partial($ctx); } sub finalize_partial { my ($ctx) = @_; my $mid = $ctx->{mid}; my $code = 404; my $href = mid_href($mid); my $html = ascii_html($mid); my $title = "<$html> not found"; my $s = "
Message-ID <$html>\nnot found\n";
	if (my $n_partial = $ctx->{n_partial}) {
		$code = 300;
		my $es = $n_partial == 1 ? '' : 'es';
		$n_partial .= '+' if ($n_partial == PARTIAL_MAX);
		$s .= "\n$n_partial partial match$es found:\n\n";
		my $cur_name = $ctx->{-inbox}->{name};
		foreach my $pair (@{$ctx->{partial}}) {
			my ($ibx, $res) = @$pair;
			my $env = $ctx->{env} if $ibx->{name} eq $cur_name;
			my $u = $ibx->base_url($env) or next;
			foreach my $m (@$res) {
				my $href = mid_href($m);
				my $html = ascii_html($m);
				$s .= qq{$u$html/\n};
			}
		}
	}
	my $ext = ext_urls($ctx, $mid, $href, $html);
	if ($ext ne '') {
		$s .= $ext;
		$code = 300;
	}
	$ctx->{-html_tip} = $s .= '
'; $ctx->{-title_html} = $title; $ctx->{-upfx} = '../'; $ctx->{-wcb}->(html_oneshot($ctx, $code)); } sub ext_urls { my ($ctx, $mid, $href, $html) = @_; # Fall back to external repos if configured if (@EXT_URL && index($mid, '@') >= 0) { my $env = $ctx->{env}; my $e = "\nPerhaps try an external site:\n\n"; foreach my $url (@EXT_URL) { my $u = prurl($env, $url); my $r = sprintf($u, $href); my $t = sprintf($u, $html); $e .= qq{$t\n}; } return $e; } '' } sub exact { my ($ctx) = @_; my $mid = $ctx->{mid}; my $found = $ctx->{found}; my $href = mid_href($mid); my $html = ascii_html($mid); my $title = "<$html> found in "; my $end = @$found == 1 ? 'another inbox' : 'other inboxes'; $ctx->{-title_html} = $title . $end; $ctx->{-upfx} = '../'; my $ext_urls = ext_urls($ctx, $mid, $href, $html); my $code = (@$found == 1 && $ext_urls eq '') ? 200 : 300; $ctx->{-html_tip} = join('', "
Message-ID: <$html>\nfound in $end:\n\n",
				(map {
					my $u = $_->base_url;
					qq($u$html/\n)
				} @$found),
			$ext_urls, '
'); html_oneshot($ctx, $code); } 1; public-inbox-1.6.1/lib/PublicInbox/FakeInotify.pm000066400000000000000000000042131377346120300216520ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ # for systems lacking Linux::Inotify2 or IO::KQueue, just emulates # enough of Linux::Inotify2 package PublicInbox::FakeInotify; use strict; use Time::HiRes qw(stat); use PublicInbox::DS; sub IN_MODIFY () { 0x02 } # match Linux inotify # my $IN_MOVED_TO = 0x80; # my $IN_CREATE = 0x100; sub MOVED_TO_OR_CREATE () { 0x80 | 0x100 } my $poll_intvl = 2; # same as Filesys::Notify::Simple sub new { bless { watch => {} }, __PACKAGE__ } # behaves like Linux::Inotify2->watch sub watch { my ($self, $path, $mask) = @_; my @st = stat($path) or return; my $k = "$path\0$mask"; $self->{watch}->{$k} = $st[10]; # 10 - ctime bless [ $self->{watch}, $k ], 'PublicInbox::FakeInotify::Watch'; } sub on_new_files ($$$$) { my ($events, $dh, $path, $old_ctime) = @_; while (defined(my $base = readdir($dh))) { next if $base =~ /\A\.\.?\z/; my $full = "$path/$base"; my @st = stat($full); if (@st && $st[10] > $old_ctime) { push @$events, bless(\$full, 'PublicInbox::FakeInotify::Event') } } } # behaves like non-blocking Linux::Inotify2->read sub read { my ($self) = @_; my $watch = $self->{watch} or return (); my $events = []; for my $x (keys %$watch) { my ($path, $mask) = split(/\0/, $x, 2); my @now = stat($path) or next; my $old_ctime = $watch->{$x}; $watch->{$x} = $now[10]; next if $old_ctime == $now[10]; if ($mask & IN_MODIFY) { push @$events, bless(\$path, 'PublicInbox::FakeInotify::Event') } elsif ($mask & MOVED_TO_OR_CREATE) { opendir(my $dh, $path) or do { warn "W: opendir $path: $!\n"; next; }; on_new_files($events, $dh, $path, $old_ctime); } } @$events; } sub poll_once { my ($obj) = @_; $obj->event_step; # PublicInbox::InboxIdle::event_step PublicInbox::DS::add_timer($poll_intvl, \&poll_once, $obj); } package PublicInbox::FakeInotify::Watch; use strict; sub cancel { my ($self) = @_; delete $self->[0]->{$self->[1]}; } sub name { my ($self) = @_; (split(/\0/, $self->[1], 2))[0]; } package PublicInbox::FakeInotify::Event; use strict; sub fullname { ${$_[0]} } 1; public-inbox-1.6.1/lib/PublicInbox/Feed.pm000066400000000000000000000071051377346120300203100ustar00rootroot00000000000000# Copyright (C) 2013-2020 all contributors # License: AGPL-3.0+ # # Used for generating Atom feeds for web-accessible mailing list archives. package PublicInbox::Feed; use strict; use warnings; use PublicInbox::View; use PublicInbox::WwwAtomStream; use PublicInbox::Smsg; # this loads w/o Search::Xapian sub generate_i { my ($ctx) = @_; shift @{$ctx->{msgs}}; } # main function sub generate { my ($ctx) = @_; my $msgs = $ctx->{msgs} = recent_msgs($ctx); return _no_thread() unless @$msgs; PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i); } sub generate_thread_atom { my ($ctx) = @_; my $msgs = $ctx->{msgs} = $ctx->{-inbox}->over->get_thread($ctx->{mid}); return _no_thread() unless @$msgs; PublicInbox::WwwAtomStream->response($ctx, 200, \&generate_i); } sub generate_html_index { my ($ctx) = @_; # if the 'r' query parameter is given, it is a legacy permalink # which we must continue supporting: my $qp = $ctx->{qp}; my $ibx = $ctx->{-inbox}; if ($qp && !$qp->{r} && $ibx->over) { return PublicInbox::View::index_topics($ctx); } my $env = $ctx->{env}; my $url = $ibx->base_url($env) . 'new.html'; my $qs = $env->{QUERY_STRING}; $url .= "?$qs" if $qs ne ''; [302, [ 'Location', $url, 'Content-Type', 'text/plain'], [ "Redirecting to $url\n" ] ]; } sub new_html_i { my ($ctx, $eml) = @_; $ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip}; $eml and return PublicInbox::View::eml_entry($ctx, $eml); my $smsg = shift @{$ctx->{msgs}} or $ctx->zmore(PublicInbox::View::pagination_footer( $ctx, './new.html')); $smsg; } sub new_html { my ($ctx) = @_; my $msgs = $ctx->{msgs} = recent_msgs($ctx); if (!@$msgs) { return [404, ['Content-Type', 'text/plain'], ["No messages, yet\n"] ]; } $ctx->{-html_tip} = '
';
	$ctx->{-upfx} = '';
	$ctx->{-hr} = 1;
	PublicInbox::WwwStream::aresponse($ctx, 200, \&new_html_i);
}

# private subs

sub _no_thread () {
	[404, ['Content-Type', 'text/plain'], ["No feed found for thread\n"]];
}

sub recent_msgs {
	my ($ctx) = @_;
	my $ibx = $ctx->{-inbox};
	my $max = $ibx->{feedmax};
	return PublicInbox::View::paginate_recent($ctx, $max) if $ibx->over;

	# only for rare v1 inboxes which aren't indexed at all
	my $qp = $ctx->{qp};
	my $hex = '[a-f0-9]';
	my $addmsg = qr!^:000000 100644 \S+ (\S+) A\t${hex}{2}/${hex}{38}$!;
	my $delmsg = qr!^:100644 000000 (\S+) \S+ D\t(${hex}{2}/${hex}{38})$!;
	my $refhex = qr/(?:HEAD|${hex}{4,})(?:~[0-9]+)?/;

	# revision ranges may be specified
	my $range = 'HEAD';
	my $r = $qp->{r} if $qp;
	if ($r && ($r =~ /\A(?:$refhex\.\.)?$refhex\z/o)) {
		$range = $r;
	}

	# get recent messages
	# we could use git log -z, but, we already know ssoma will not
	# leave us with filenames with spaces in them..
	my $log = $ibx->git->popen(qw/log
				--no-notes --no-color --raw -r
				--no-abbrev --abbrev-commit/,
				"--format=%H", $range);
	my %deleted; # only an optimization at this point
	my $last;
	my $last_commit;
	local $/ = "\n";
	my @oids;
	while (defined(my $line = <$log>)) {
		if ($line =~ /$addmsg/o) {
			my $add = $1;
			next if $deleted{$add}; # optimization-only
			push @oids, $add;
			if (scalar(@oids) >= $max) {
				$last = 1;
				last;
			}
		} elsif ($line =~ /$delmsg/o) {
			$deleted{$1} = 1;
		}
	}

	if ($last) {
		local $/ = "\n";
		while (my $line = <$log>) {
			if ($line =~ /^(${hex}{7,})/) {
				$last_commit = $1;
				last;
			}
		}
	}

	$last_commit and
		$ctx->{next_page} = qq[] .
					'next (older)';

	[ map { bless {blob => $_ }, 'PublicInbox::Smsg' } @oids ];
}

1;
public-inbox-1.6.1/lib/PublicInbox/Filter/000077500000000000000000000000001377346120300203315ustar00rootroot00000000000000public-inbox-1.6.1/lib/PublicInbox/Filter/Base.pm000066400000000000000000000052711377346120300215460ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 
#
# base class for creating per-list or per-project filters
package PublicInbox::Filter::Base;
use strict;
use warnings;
use PublicInbox::MsgIter;
use parent qw(Exporter);
our @EXPORT_OK = qw(REJECT); # we may export IGNORE if/when needed

sub No ($) { "*** We only accept plain-text mail, No $_[0] ***" }

our %DEFAULTS = (
	reject_suffix => [ qw(exe bat cmd com pif scr vbs cpl zip swf swfl) ],
	reject_type => [ 'text/html:'.No('HTML'), 'text/xhtml:'.No('HTML'),
		'application/vnd.*:'.No('vendor-specific formats'),
		'image/*:'.No('images'), 'video/*:'.No('video'),
		'audio/*:'.No('audio') ],
);
our $INVALID_FN = qr/\0/;

sub REJECT () { 100 }
sub ACCEPT { scalar @_ > 1 ? $_[1] : 1 }
sub IGNORE () { 0 }

my %patmap = ('*' => '.*', '?' => '.', '[' => '[', ']' => ']');
sub glob2pat {
	my ($glob) = @_;
        $glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge;
        $glob;
}

sub new {
	my ($class, %opts) = @_;
	my $self = bless { err => '', %opts }, $class;
	foreach my $f (qw(reject_suffix reject_type)) {
		# allow undef:
		$self->{$f} = $DEFAULTS{$f} unless exists $self->{$f};
	}
	if (defined $self->{reject_suffix}) {
		my $tmp = $self->{reject_suffix};
		$tmp = join('|', map { glob2pat($_) } @$tmp);
		$self->{reject_suffix} = qr/\.($tmp)\s*\z/i;
	}
	my $rt = [];
	if (defined $self->{reject_type}) {
		my $tmp = $self->{reject_type};
		@$rt = map {
			my ($type, $msg) = split(':', $_, 2);
			$type = lc $type;
			$msg ||= "Unacceptable Content-Type: $type";
			my $re = glob2pat($type);
			[ qr/\b$re\b/i, $msg ];
		} @$tmp;
	}
	$self->{reject_type} = $rt;
	$self;
}

sub reject ($$) {
	my ($self, $reason) = @_;
	$self->{err} = $reason;
	REJECT;
}

sub err ($) { $_[0]->{err} }

# by default, scrub is a no-op, see PublicInbox::Filter::Vger::scrub
# for an example of the override.  The $for_remove arg is set to
# disable altid setting for spam removal.
sub scrub {
	my ($self, $mime, $for_remove) = @_;
	$self->ACCEPT($mime);
}

# for MDA
sub delivery {
	my ($self, $mime) = @_;

	my $rt = $self->{reject_type};
	my $reject_suffix = $self->{reject_suffix} || $INVALID_FN;
	my (%sfx, %type);

	msg_iter($mime, sub {
		my ($part, $depth, @idx) = @{$_[0]};

		my $ct = $part->content_type || 'text/plain';
		foreach my $p (@$rt) {
			if ($ct =~ $p->[0]) {
				$type{$p->[1]} = 1;
			}
		}

		my $fn = $part->filename;
		if (defined($fn) && $fn =~ $reject_suffix) {
			$sfx{$1} = 1;
		}
	});

	my @r;
	if (keys %type) {
		push @r, sort keys %type;
	}
	if (keys %sfx) {
		push @r, 'Rejected suffixes(s): '.join(', ', sort keys %sfx);
	}

	@r ? $self->reject(join("\n", @r)) : $self->scrub($mime);
}

1;
public-inbox-1.6.1/lib/PublicInbox/Filter/Gmane.pm000066400000000000000000000016421377346120300217210ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors 
# License: AGPL-3.0+ 

# Filter for importing some archives from gmane
package PublicInbox::Filter::Gmane;
use base qw(PublicInbox::Filter::Base);
use strict;
use warnings;

sub scrub {
	my ($self, $mime) = @_;
	my $hdr = $mime->header_obj;

	# gmane rewrites Received headers, which increases spamminess
	# Some older archives set Original-To
	foreach my $x (qw(Received To)) {
		my @h = $hdr->header_raw("Original-$x");
		if (@h) {
			$hdr->header_set($x, @h);
			$hdr->header_set("Original-$x");
		}
	}

	# Approved triggers for the SA HEADER_SPAM rule,
	# X-From is gmane specific
	foreach my $drop (qw(Approved X-From)) {
		$hdr->header_set($drop);
	}

	# appears to be an old gmane bug:
	$hdr->header_set('connect()');

	$self->ACCEPT($mime);
}

sub delivery {
	my ($self, $mime) = @_;
	$self->scrub($mime);
}

1;
public-inbox-1.6.1/lib/PublicInbox/Filter/Mirror.pm000066400000000000000000000005001377346120300221340ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# Dumb filter for blindly accepting everything
package PublicInbox::Filter::Mirror;
use base qw(PublicInbox::Filter::Base);
use strict;
use warnings;

sub delivery { $_[0]->ACCEPT };

1;
public-inbox-1.6.1/lib/PublicInbox/Filter/RubyLang.pm000066400000000000000000000037061377346120300224200ustar00rootroot00000000000000# Copyright (C) 2017-2020 all contributors 
# License: AGPL-3.0+ 

# Filter for lists.ruby-lang.org trailers
package PublicInbox::Filter::RubyLang;
use base qw(PublicInbox::Filter::Base);
use strict;
use warnings;
use PublicInbox::MID qw(mids);

my $l1 = qr/Unsubscribe:\s
	/x;
my $l2 = qr{};

sub new {
	my ($class, %opts) = @_;
	my $altid = delete $opts{-altid};
	my $self = $class->SUPER::new(%opts);
	my $ibx = $self->{-inbox};
	# altid = serial:ruby-core:file=msgmap.sqlite3
	if (!$altid && $ibx && $ibx->{altid}) {
		$altid ||= $ibx->{altid}->[0];
	}
	if ($altid) {
		require PublicInbox::AltId;
		$self->{-altid} = PublicInbox::AltId->new($ibx, $altid, 1);
	}
	$self;
}

sub scrub_part ($) {
	my ($part) = @_;
	my $ct = $part->content_type;
	if (!$ct || $ct =~ m{\btext/plain\b}i) {
		my $s = eval { $part->body_str };
		if (defined $s && $s =~ s/\n?$l1\n$l2\n\z//os) {
			$part->body_str_set($s);
			return 1;
		}
	}
	0;
}

sub scrub {
	my ($self, $mime, $for_remove) = @_;
	# no msg_iter here, msg_iter is only for read-only access
	if (my @sub = $mime->subparts) {
		my $changed = 0;
		$changed |= scrub_part($_) for @sub;
		$mime->parts_set(\@sub) if $changed;
	} else {
		scrub_part($mime);
	}
	my $altid = $self->{-altid};
	if ($altid && !$for_remove) {
		my $hdr = $mime->header_obj;
		my $mids = mids($hdr);
		return $self->REJECT('Message-ID missing') unless (@$mids);
		my @v = $hdr->header_raw('X-Mail-Count');
		my $n;
		foreach (@v) {
			/\A\s*([0-9]+)\s*\z/ or next;
			$n = $1;
			last;
		}
		unless (defined $n) {
			return $self->REJECT('X-Mail-Count not numeric');
		}
		foreach my $mid (@$mids) {
			my $r = $altid->mm_alt->mid_set($n, $mid);
			next if $r == 0;
			last;
		}
	}
	$self->ACCEPT($mime);
}

sub delivery {
	my ($self, $mime) = @_;
	$self->scrub($mime);
}

1;
public-inbox-1.6.1/lib/PublicInbox/Filter/SubjectTag.pm000066400000000000000000000015061377346120300227240ustar00rootroot00000000000000# Copyright (C) 2017-2020 all contributors 
# License: AGPL-3.0+ 

# Filter for various [tags] in subjects
package PublicInbox::Filter::SubjectTag;
use strict;
use warnings;
use base qw(PublicInbox::Filter::Base);

sub new {
	my ($class, %opts) = @_;
	my $tag = delete $opts{-tag};
	die "tag not defined!\n" unless defined $tag && $tag ne '';
	my $self = $class->SUPER::new(%opts);
	$self->{tag_re} = qr/\A\s*(re:\s+|)\Q$tag\E\s*/i;
	$self;
}

sub scrub {
	my ($self, $mime) = @_;
	my $subj = $mime->header('Subject');
	if (defined $subj) {
		$subj =~ s/$self->{tag_re}/$1/; # $1 is "Re: "
		$mime->header_str_set('Subject', $subj);
	}
	$self->ACCEPT($mime);
}

# no suffix/article rejection for mirrors
sub delivery {
	my ($self, $mime) = @_;
	$self->scrub($mime);
}

1;
public-inbox-1.6.1/lib/PublicInbox/Filter/Vger.pm000066400000000000000000000021641377346120300215750ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# Filter for vger.kernel.org list trailer
package PublicInbox::Filter::Vger;
use base qw(PublicInbox::Filter::Base);
use strict;
use PublicInbox::Eml;

my $l0 = qr/-+/; # older messages only had one '-'
my $l1 =
 qr/To unsubscribe from this list: send the line "unsubscribe [\w-]+" in/;
my $l2 = qr/the body of a message to majordomo\@vger\.kernel\.org/;
my $l3 =
  qr!More majordomo info at +http://vger\.kernel\.org/majordomo-info\.html!;

# only LKML had this, and LKML nowadays has no list trailer since Jan 2016
my $l4 = qr!Please read the FAQ at +http://www\.tux\.org/lkml/!;

sub scrub {
	my ($self, $mime) = @_;
	my $s = $mime->as_string;

	# the vger appender seems to only work on the raw string,
	# so in multipart (e.g. GPG-signed) messages, the list trailer
	# becomes invisible to MIME-aware email clients.
	if ($s =~ s/$l0\n$l1\n$l2\n$l3\n($l4\n)?\z//os) {
		$mime = PublicInbox::Eml->new(\$s);
	}
	$self->ACCEPT($mime);
}

sub delivery {
	my ($self, $mime) = @_;
	$self->scrub($mime);
}

1;
public-inbox-1.6.1/lib/PublicInbox/GetlineBody.pm000066400000000000000000000024101377346120300216440ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# Wrap a pipe or file for PSGI streaming response bodies and calls the
# end callback when the object goes out-of-scope.
# This depends on rpipe being _blocking_ on getline.
#
# This is only used by generic PSGI servers and not public-inbox-httpd
package PublicInbox::GetlineBody;
use strict;
use warnings;

sub new {
	my ($class, $rpipe, $end, $end_arg, $buf, $filter) = @_;
	bless {
		rpipe => $rpipe,
		end => $end,
		end_arg => $end_arg,
		initial_buf => $buf,
		filter => $filter,
	}, $class;
}

# close should always be called after getline returns undef,
# but a client aborting a connection can ruin our day; so lets
# hope our underlying PSGI server does not leak references, here.
sub DESTROY { $_[0]->close }

sub getline {
	my ($self) = @_;
	my $rpipe = $self->{rpipe} or return; # EOF was set on previous call
	my $buf = delete($self->{initial_buf}) // $rpipe->getline;
	delete($self->{rpipe}) unless defined $buf; # set EOF for next call
	if (my $filter = $self->{filter}) {
		$buf = $filter->translate($buf);
	}
	$buf;
}

sub close {
	my ($self) = @_;
	my ($end, $end_arg) = delete @$self{qw(end end_arg)};
	$end->($end_arg) if $end;
}

1;
public-inbox-1.6.1/lib/PublicInbox/Git.pm000066400000000000000000000322641377346120300201740ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors 
# License: GPLv2 or later 
#
# Used to read files from a git repository without excessive forking.
# Used in our web interfaces as well as our -nntpd server.
# This is based on code in Git.pm which is GPLv2+, but modified to avoid
# dependence on environment variables for compatibility with mod_perl.
# There are also API changes to simplify our usage and data set.
package PublicInbox::Git;
use strict;
use v5.10.1;
use parent qw(Exporter);
use POSIX ();
use IO::Handle; # ->autoflush
use Errno qw(EINTR);
use File::Glob qw(bsd_glob GLOB_NOSORT);
use Time::HiRes qw(stat);
use PublicInbox::Spawn qw(popen_rd);
use PublicInbox::Tmpfile;
use Carp qw(croak);
our @EXPORT_OK = qw(git_unquote git_quote);
our $PIPE_BUFSIZ = 65536; # Linux default
our $in_cleanup;

use constant MAX_INFLIGHT =>
	(($^O eq 'linux' ? 4096 : POSIX::_POSIX_PIPE_BUF()) * 3)
	/
	65; # SHA-256 hex size + "\n" in preparation for git using non-SHA1

my %GIT_ESC = (
	a => "\a",
	b => "\b",
	f => "\f",
	n => "\n",
	r => "\r",
	t => "\t",
	v => "\013",
	'"' => '"',
	'\\' => '\\',
);
my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC;


# unquote pathnames used by git, see quote.c::unquote_c_style.c in git.git
sub git_unquote ($) {
	return $_[0] unless ($_[0] =~ /\A"(.*)"\z/);
	$_[0] = $1;
	$_[0] =~ s/\\([\\"abfnrtv])/$GIT_ESC{$1}/g;
	$_[0] =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
	$_[0];
}

sub git_quote ($) {
	if ($_[0] =~ s/([\\"\a\b\f\n\r\t\013]|[^[:print:]])/
		      '\\'.($ESC_GIT{$1}||sprintf("%0o",ord($1)))/egs) {
		return qq{"$_[0]"};
	}
	$_[0];
}

sub new {
	my ($class, $git_dir) = @_;
	# may contain {-tmp} field for File::Temp::Dir
	bless { git_dir => $git_dir, alt_st => '', -git_path => {} }, $class
}

sub git_path ($$) {
	my ($self, $path) = @_;
	$self->{-git_path}->{$path} ||= do {
		local $/ = "\n";
		chomp(my $str = $self->qx(qw(rev-parse --git-path), $path));

		# git prior to 2.5.0 did not understand --git-path
		if ($str eq "--git-path\n$path") {
			$str = "$self->{git_dir}/$path";
		}
		$str;
	};
}

sub alternates_changed {
	my ($self) = @_;
	my $alt = git_path($self, 'objects/info/alternates');
	my @st = stat($alt) or return 0;

	# can't rely on 'q' on some 32-bit builds, but `d' works
	my $st = pack('dd', $st[10], $st[7]); # 10: ctime, 7: size
	return 0 if $self->{alt_st} eq $st;
	$self->{alt_st} = $st; # always a true value
}

sub last_check_err {
	my ($self) = @_;
	my $fh = $self->{err_c} or return;
	sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
	defined(sysread($fh, my $buf, -s $fh)) or
			fail($self, "sysread failed: $!");
	$buf;
}

sub _bidi_pipe {
	my ($self, $batch, $in, $out, $pid, $err) = @_;
	if ($self->{$pid}) {
		if (defined $err) { # "err_c"
			my $fh = $self->{$err};
			sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
			truncate($fh, 0) or fail($self, "truncate failed: $!");
		}
		return;
	}
	my ($out_r, $out_w);
	pipe($out_r, $out_w) or fail($self, "pipe failed: $!");
	my @cmd = (qw(git), "--git-dir=$self->{git_dir}",
			qw(-c core.abbrev=40 cat-file), $batch);
	my $redir = { 0 => $out_r };
	if ($err) {
		my $id = "git.$self->{git_dir}$batch.err";
		my $fh = tmpfile($id) or fail($self, "tmpfile($id): $!");
		$self->{$err} = $fh;
		$redir->{2} = $fh;
	}
	my ($in_r, $p) = popen_rd(\@cmd, undef, $redir);
	$self->{$pid} = $p;
	$out_w->autoflush(1);
	if ($^O eq 'linux') { # 1031: F_SETPIPE_SZ
		fcntl($out_w, 1031, 4096);
		fcntl($in_r, 1031, 4096) if $batch eq '--batch-check';
	}
	$self->{$out} = $out_w;
	$self->{$in} = $in_r;
}

sub my_read ($$$) {
	my ($fh, $rbuf, $len) = @_;
	my $left = $len - length($$rbuf);
	my $r;
	while ($left > 0) {
		$r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf));
		if ($r) {
			$left -= $r;
		} else {
			next if (!defined($r) && $! == EINTR);
			return $r;
		}
	}
	\substr($$rbuf, 0, $len, '');
}

sub my_readline ($$) {
	my ($fh, $rbuf) = @_;
	while (1) {
		if ((my $n = index($$rbuf, "\n")) >= 0) {
			return substr($$rbuf, 0, $n + 1, '');
		}
		my $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf));
		next if $r || (!defined($r) && $! == EINTR);
		return defined($r) ? '' : undef; # EOF or error
	}
}

sub cat_async_retry ($$$$$) {
	my ($self, $inflight, $req, $cb, $arg) = @_;

	# {inflight} may be non-existent, but if it isn't we delete it
	# here to prevent cleanup() from waiting:
	delete $self->{inflight};
	cleanup($self);

	$self->{inflight} = $inflight;
	batch_prepare($self);
	my $buf = "$req\n";
	for (my $i = 0; $i < @$inflight; $i += 3) {
		$buf .= "$inflight->[$i]\n";
	}
	print { $self->{out} } $buf or fail($self, "write error: $!");
	unshift(@$inflight, \$req, $cb, $arg); # \$ref to indicate retried

	cat_async_step($self, $inflight); # take one step
}

sub cat_async_step ($$) {
	my ($self, $inflight) = @_;
	die 'BUG: inflight empty or odd' if scalar(@$inflight) < 3;
	my ($req, $cb, $arg) = splice(@$inflight, 0, 3);
	my $rbuf = delete($self->{cat_rbuf}) // \(my $new = '');
	my ($bref, $oid, $type, $size);
	my $head = my_readline($self->{in}, $rbuf);
	if ($head =~ /^([0-9a-f]{40,}) (\S+) ([0-9]+)$/) {
		($oid, $type, $size) = ($1, $2, $3 + 0);
		$bref = my_read($self->{in}, $rbuf, $size + 1) or
			fail($self, defined($bref) ? 'read EOF' : "read: $!");
		chop($$bref) eq "\n" or fail($self, 'LF missing after blob');
	} elsif ($head =~ / missing$/) {
		# ref($req) indicates it's already been retried
		if (!ref($req) && !$in_cleanup && alternates_changed($self)) {
			return cat_async_retry($self, $inflight,
						$req, $cb, $arg);
		}
		$type = 'missing';
		$oid = ref($req) ? $$req : $req;
	} else {
		fail($self, "Unexpected result from async git cat-file: $head");
	}
	eval { $cb->($bref, $oid, $type, $size, $arg) };
	$self->{cat_rbuf} = $rbuf if $$rbuf ne '';
	warn "E: $oid: $@\n" if $@;
}

sub cat_async_wait ($) {
	my ($self) = @_;
	my $inflight = delete $self->{inflight} or return;
	while (scalar(@$inflight)) {
		cat_async_step($self, $inflight);
	}
}

sub batch_prepare ($) {
	_bidi_pipe($_[0], qw(--batch in out pid));
}

sub _cat_file_cb {
	my ($bref, undef, undef, $size, $result) = @_;
	@$result = ($bref, $size);
}

sub cat_file {
	my ($self, $oid, $sizeref) = @_;
	my $result = [];
	cat_async($self, $oid, \&_cat_file_cb, $result);
	cat_async_wait($self);
	$$sizeref = $result->[1] if $sizeref;
	$result->[0];
}

sub check_async_step ($$) {
	my ($self, $inflight_c) = @_;
	die 'BUG: inflight empty or odd' if scalar(@$inflight_c) < 3;
	my ($req, $cb, $arg) = splice(@$inflight_c, 0, 3);
	my $rbuf = delete($self->{rbuf_c}) // \(my $new = '');
	chomp(my $line = my_readline($self->{in_c}, $rbuf));
	my ($hex, $type, $size) = split(/ /, $line);

	# Future versions of git.git may have type=ambiguous, but for now,
	# we must handle 'dangling' below (and maybe some other oddball
	# stuff):
	# https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
	if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') {
		my $ret = my_read($self->{in_c}, $rbuf, $type + 1);
		fail($self, defined($ret) ? 'read EOF' : "read: $!") if !$ret;
	}
	eval { $cb->($hex, $type, $size, $arg, $self) };
	warn "E: check($req) $@\n" if $@;
	$self->{rbuf_c} = $rbuf if $$rbuf ne '';
}

sub check_async_wait ($) {
	my ($self) = @_;
	my $inflight_c = delete $self->{inflight_c} or return;
	while (scalar(@$inflight_c)) {
		check_async_step($self, $inflight_c);
	}
}

sub check_async_begin ($) {
	my ($self) = @_;
	cleanup($self) if alternates_changed($self);
	_bidi_pipe($self, qw(--batch-check in_c out_c pid_c err_c));
	die 'BUG: already in async check' if $self->{inflight_c};
	$self->{inflight_c} = [];
}

sub check_async ($$$$) {
	my ($self, $oid, $cb, $arg) = @_;
	my $inflight_c = $self->{inflight_c} // check_async_begin($self);
	if (scalar(@$inflight_c) >= MAX_INFLIGHT) {
		check_async_step($self, $inflight_c);
	}
	print { $self->{out_c} } $oid, "\n" or fail($self, "write error: $!");
	push(@$inflight_c, $oid, $cb, $arg);
}

sub _check_cb { # check_async callback
	my ($hex, $type, $size, $result) = @_;
	@$result = ($hex, $type, $size);
}

sub check {
	my ($self, $oid) = @_;
	my $result = [];
	check_async($self, $oid, \&_check_cb, $result);
	check_async_wait($self);
	my ($hex, $type, $size) = @$result;

	# Future versions of git.git may show 'ambiguous', but for now,
	# we must handle 'dangling' below (and maybe some other oddball
	# stuff):
	# https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
	return if $type eq 'missing' || $type eq 'ambiguous';
	return if $hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop';
	($hex, $type, $size);
}

sub _destroy {
	my ($self, $rbuf, $in, $out, $pid, $err) = @_;
	my $p = delete $self->{$pid} or return;
	delete @$self{($rbuf, $in, $out)};
	delete $self->{$err} if $err; # `err_c'

	# PublicInbox::DS may not be loaded
	eval { PublicInbox::DS::dwaitpid($p, undef, undef) };
	waitpid($p, 0) if $@; # wait synchronously if not in event loop
}

sub cat_async_abort ($) {
	my ($self) = @_;
	my $inflight = delete $self->{inflight} or die 'BUG: not in async';
	cleanup($self);
}

sub fail {
	my ($self, $msg) = @_;
	$self->{inflight} ? cat_async_abort($self) : cleanup($self);
	croak("git $self->{git_dir}: $msg");
}

sub popen {
	my ($self, @cmd) = @_;
	@cmd = ('git', "--git-dir=$self->{git_dir}", @cmd);
	popen_rd(\@cmd);
}

sub qx {
	my ($self, @cmd) = @_;
	my $fh = $self->popen(@cmd);
	local $/ = "\n";
	return <$fh> if wantarray;
	local $/;
	<$fh>
}

# returns true if there are pending "git cat-file" processes
sub cleanup {
	my ($self) = @_;
	local $in_cleanup = 1;
	delete $self->{async_cat};
	check_async_wait($self);
	cat_async_wait($self);
	_destroy($self, qw(cat_rbuf in out pid));
	_destroy($self, qw(chk_rbuf in_c out_c pid_c err_c));
	!!($self->{pid} || $self->{pid_c});
}

# assuming a well-maintained repo, this should be a somewhat
# accurate estimation of its size
# TODO: show this in the WWW UI as a hint to potential cloners
sub packed_bytes {
	my ($self) = @_;
	my $n = 0;
	my $pack_dir = git_path($self, 'objects/pack');
	foreach my $p (bsd_glob("$pack_dir/*.pack", GLOB_NOSORT)) {
		$n += -s $p;
	}
	$n
}

sub DESTROY { cleanup(@_) }

sub local_nick ($) {
	my ($self) = @_;
	my $ret = '???';
	# don't show full FS path, basename should be OK:
	if ($self->{git_dir} =~ m!/([^/]+)(?:/\.git)?\z!) {
		$ret = "/path/to/$1";
	}
	wantarray ? ($ret) : $ret;
}

sub host_prefix_url ($$) {
	my ($env, $url) = @_;
	return $url if index($url, '//') >= 0;
	my $scheme = $env->{'psgi.url_scheme'};
	my $host_port = $env->{HTTP_HOST} //
		"$env->{SERVER_NAME}:$env->{SERVER_PORT}";
	"$scheme://$host_port". ($env->{SCRIPT_NAME} || '/') . $url;
}

sub pub_urls {
	my ($self, $env) = @_;
	if (my $urls = $self->{cgit_url}) {
		return map { host_prefix_url($env, $_) } @$urls;
	}
	local_nick($self);
}

sub cat_async_begin {
	my ($self) = @_;
	cleanup($self) if alternates_changed($self);
	batch_prepare($self);
	die 'BUG: already in async' if $self->{inflight};
	$self->{inflight} = [];
}

sub cat_async ($$$;$) {
	my ($self, $oid, $cb, $arg) = @_;
	my $inflight = $self->{inflight} // cat_async_begin($self);
	if (scalar(@$inflight) >= MAX_INFLIGHT) {
		cat_async_step($self, $inflight);
	}

	print { $self->{out} } $oid, "\n" or fail($self, "write error: $!");
	push(@$inflight, $oid, $cb, $arg);
}

# this is safe to call inside $cb, but not guaranteed to enqueue
# returns true if successful, undef if not.
sub async_prefetch {
	my ($self, $oid, $cb, $arg) = @_;
	if (defined($self->{async_cat}) && (my $inflight = $self->{inflight})) {
		# we could use MAX_INFLIGHT here w/o the halving,
		# but lets not allow one client to monopolize a git process
		if (scalar(@$inflight) < int(MAX_INFLIGHT/2)) {
			print { $self->{out} } $oid, "\n" or
						fail($self, "write error: $!");
			return push(@$inflight, $oid, $cb, $arg);
		}
	}
	undef;
}

sub extract_cmt_time {
	my ($bref, undef, undef, undef, $modified) = @_;

	if ($$bref =~ /^committer .*?> ([0-9]+) [\+\-]?[0-9]+/sm) {
		my $cmt_time = $1 + 0;
		$$modified = $cmt_time if $cmt_time > $$modified;
	}
}

# returns the modified time of a git repo, same as the "modified" field
# of a grokmirror manifest
sub modified ($) {
	my ($self) = @_;
	my $modified = 0;
	my $fh = popen($self, qw(rev-parse --branches));
	local $/ = "\n";
	while (my $oid = <$fh>) {
		chomp $oid;
		cat_async($self, $oid, \&extract_cmt_time, \$modified);
	}
	cat_async_wait($self);
	$modified || time;
}

1;
__END__
=pod

=head1 NAME

PublicInbox::Git - git wrapper

=head1 VERSION

version 1.0

=head1 SYNOPSIS

	use PublicInbox::Git;
	chomp(my $git_dir = `git rev-parse --git-dir`);
	$git_dir or die "GIT_DIR= must be specified\n";
	my $git = PublicInbox::Git->new($git_dir);

=head1 DESCRIPTION

Unstable API outside of the L method.
It requires L to be installed.

=head1 METHODS

=cut

=head2 new

	my $git = PublicInbox::Git->new($git_dir);

Initialize a new PublicInbox::Git object for use with L
This is the only public API method we support.  Everything else
in this module is subject to change.

=head1 SEE ALSO

L, L

=head1 CONTACT

All feedback welcome via plain-text mail to L

The mail archives are hosted at L

=head1 COPYRIGHT

Copyright (C) 2016 all contributors L

License: AGPL-3.0+ L

=cut
public-inbox-1.6.1/lib/PublicInbox/GitAsyncCat.pm000066400000000000000000000024661377346120300216230ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 
#
# internal class used by PublicInbox::Git + PublicInbox::DS
# This parses the output pipe of "git cat-file --batch"
#
# Note: this does NOT set the non-blocking flag, we expect `git cat-file'
# to be a local process, and git won't start writing a blob until it's
# fully read.  So minimize context switching and read as much as possible
# and avoid holding a buffer in our heap any longer than it has to live.
package PublicInbox::GitAsyncCat;
use strict;
use parent qw(PublicInbox::DS Exporter);
use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
our @EXPORT = qw(git_async_cat);

sub _add {
	my ($class, $git) = @_;
	$git->batch_prepare;
	my $self = bless { git => $git }, $class;
	$self->SUPER::new($git->{in}, EPOLLIN|EPOLLET);
	\undef; # this is a true ref()
}

sub event_step {
	my ($self) = @_;
	my $git = $self->{git};
	return $self->close if ($git->{in} // 0) != ($self->{sock} // 1);
	my $inflight = $git->{inflight};
	if ($inflight && @$inflight) {
		$git->cat_async_step($inflight);
		$self->requeue if @$inflight || exists $git->{cat_rbuf};
	}
}

sub git_async_cat ($$$$) {
	my ($git, $oid, $cb, $arg) = @_;
	$git->cat_async($oid, $cb, $arg);
	$git->{async_cat} //= _add(__PACKAGE__, $git);
}

1;
public-inbox-1.6.1/lib/PublicInbox/GitCredential.pm000066400000000000000000000027551377346120300221710ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 
package PublicInbox::GitCredential;
use strict;
use PublicInbox::Spawn qw(popen_rd);

sub run ($$) {
	my ($self, $op) = @_;
	my ($in_r, $in_w);
	pipe($in_r, $in_w) or die "pipe: $!";
	my $out_r = popen_rd([qw(git credential), $op], undef, { 0 => $in_r });
	close $in_r or die "close in_r: $!";

	my $out = '';
	for my $k (qw(url protocol host username password)) {
		defined(my $v = $self->{$k}) or next;
		die "`$k' contains `\\n' or `\\0'\n" if $v =~ /[\n\0]/;
		$out .= "$k=$v\n";
	}
	$out .= "\n";
	print $in_w $out or die "print (git credential $op): $!";
	close $in_w or die "close (git credential $op): $!";
	return $out_r if $op eq 'fill';
	<$out_r> and die "unexpected output from `git credential $op'\n";
	close $out_r or die "`git credential $op' failed: \$!=$! \$?=$?\n";
}

sub check_netrc ($) {
	my ($self) = @_;

	# part of the standard library, but distributions may split it out
	eval { require Net::Netrc };
	if ($@) {
		warn "W: Net::Netrc missing: $@\n";
		return;
	}
	if (my $x = Net::Netrc->lookup($self->{host}, $self->{username})) {
		$self->{username} //= $x->login;
		$self->{password} = $x->password;
	}
}

sub fill {
	my ($self) = @_;
	my $out_r = run($self, 'fill');
	while (<$out_r>) {
		chomp;
		return if $_ eq '';
		/\A([^=]+)=(.*)\z/ or die "bad line: $_\n";
		$self->{$1} = $2;
	}
	close $out_r or die "git credential fill failed: \$!=$! \$?=$?\n";
}

1;
public-inbox-1.6.1/lib/PublicInbox/GitHTTPBackend.pm000066400000000000000000000110501377346120300221320ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# when no endpoints match, fallback to this and serve a static file
# or smart HTTP.  This is our wrapper for git-http-backend(1)
package PublicInbox::GitHTTPBackend;
use strict;
use warnings;
use Fcntl qw(:seek);
use IO::Handle; # ->flush
use HTTP::Date qw(time2str);
use PublicInbox::Qspawn;
use PublicInbox::Tmpfile;
use PublicInbox::WwwStatic qw(r @NO_CACHE);

# 32 is same as the git-daemon connection limit
my $default_limiter = PublicInbox::Qspawn::Limiter->new(32);

# n.b. serving "description" and "cloneurl" should be innocuous enough to
# not cause problems.  serving "config" might...
my @text = qw[HEAD info/refs info/attributes
	objects/info/(?:http-alternates|alternates|packs)
	cloneurl description];

my @binary = qw!
	objects/[a-f0-9]{2}/[a-f0-9]{38}
	objects/pack/pack-[a-f0-9]{40}\.(?:pack|idx)
	!;

our $ANY = join('|', @binary, @text, 'git-upload-pack');
my $BIN = join('|', @binary);
my $TEXT = join('|', @text);

sub serve {
	my ($env, $git, $path) = @_;

	# Documentation/technical/http-protocol.txt in git.git
	# requires one and exactly one query parameter:
	if ($env->{QUERY_STRING} =~ /\Aservice=git-[A-Za-z0-9_]+-pack\z/ ||
				$path =~ /\Agit-[A-Za-z0-9_]+-pack\z/) {
		my $ok = serve_smart($env, $git, $path);
		return $ok if $ok;
	}

	serve_dumb($env, $git, $path);
}

sub err ($@) {
	my ($env, @msg) = @_;
	$env->{'psgi.errors'}->print(@msg, "\n");
}

my $prev = 0;
my $exp;
sub cache_one_year {
	my ($h) = @_;
	my $t = time + 31536000;
	push @$h, 'Expires', $t == $prev ? $exp : ($exp = time2str($prev = $t)),
		'Cache-Control', 'public, max-age=31536000';
}

sub serve_dumb {
	my ($env, $git, $path) = @_;

	my $h = [];
	my $type;
	if ($path =~ m!\Aobjects/[a-f0-9]{2}/[a-f0-9]{38}\z!) {
		$type = 'application/x-git-loose-object';
		cache_one_year($h);
	} elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40}\.pack\z!) {
		$type = 'application/x-git-packed-objects';
		cache_one_year($h);
	} elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40}\.idx\z!) {
		$type = 'application/x-git-packed-objects-toc';
		cache_one_year($h);
	} elsif ($path =~ /\A(?:$TEXT)\z/o) {
		$type = 'text/plain';
		push @$h, @NO_CACHE;
	} else {
		return r(404);
	}
	$path = "$git->{git_dir}/$path";
	PublicInbox::WwwStatic::response($env, $h, $path, $type);
}

sub git_parse_hdr { # {parse_hdr} for Qspawn
	my ($r, $bref, $dumb_args) = @_;
	my $res = parse_cgi_headers($r, $bref) or return; # incomplete
	$res->[0] == 403 ? serve_dumb(@$dumb_args) : $res;
}

# returns undef if 403 so it falls back to dumb HTTP
sub serve_smart {
	my ($env, $git, $path) = @_;
	my %env = %ENV;
	# GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL
	# may be set in the server-process and are passed as-is
	foreach my $name (qw(QUERY_STRING
				REMOTE_USER REMOTE_ADDR
				HTTP_CONTENT_ENCODING
				CONTENT_TYPE
				SERVER_PROTOCOL
				REQUEST_METHOD)) {
		my $val = $env->{$name};
		$env{$name} = $val if defined $val;
	}
	my $limiter = $git->{-httpbackend_limiter} || $default_limiter;
	$env{GIT_HTTP_EXPORT_ALL} = '1';
	$env{PATH_TRANSLATED} = "$git->{git_dir}/$path";
	my $rdr = input_prepare($env) or return r(500);
	my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr);
	$qsp->psgi_return($env, $limiter, \&git_parse_hdr, [$env, $git, $path]);
}

sub input_prepare {
	my ($env) = @_;

	my $input = $env->{'psgi.input'};
	my $fd = eval { fileno($input) };
	if (defined $fd && $fd >= 0) {
		return { 0 => $fd };
	}
	my $id = "git-http.input.$env->{REMOTE_ADDR}:$env->{REMOTE_PORT}";
	my $in = tmpfile($id);
	unless (defined $in) {
		err($env, "could not open temporary file: $!");
		return;
	}
	my $buf;
	while (1) {
		my $r = $input->read($buf, 8192);
		unless (defined $r) {
			err($env, "error reading input: $!");
			return;
		}
		last if $r == 0;
		unless (print $in $buf) {
			err($env, "error writing temporary file: $!");
			return;
		}
	}
	# ensure it's visible to git-http-backend(1):
	unless ($in->flush) {
		err($env, "error writing temporary file: $!");
		return;
	}
	unless (defined(sysseek($in, 0, SEEK_SET))) {
		err($env, "error seeking temporary file: $!");
		return;
	}
	{ 0 => $in };
}

sub parse_cgi_headers {
	my ($r, $bref) = @_;
	return r(500) unless defined $r && $r >= 0;
	$$bref =~ s/\A(.*?)\r?\n\r?\n//s or return $r == 0 ? r(500) : undef;
	my $h = $1;
	my $code = 200;
	my @h;
	foreach my $l (split(/\r?\n/, $h)) {
		my ($k, $v) = split(/:\s*/, $l, 2);
		if ($k =~ /\AStatus\z/i) {
			($code) = ($v =~ /\b([0-9]+)\b/);
		} else {
			push @h, $k, $v;
		}
	}
	[ $code, \@h ]
}

1;
public-inbox-1.6.1/lib/PublicInbox/GzipFilter.pm000066400000000000000000000127771377346120300215370ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 
#
# In public-inbox <=1.5.0, public-inbox-httpd favored "getline"
# response bodies to take a "pull"-based approach to feeding
# slow clients (as opposed to a more common "push" model).
#
# In newer versions, public-inbox-httpd supports a backpressure-aware
# pull/push model which also accounts for slow git blob storage.
# async_next callbacks only run when the DS {wbuf} is drained
# async_eml callbacks only run when a blob arrives from git.
#
# We continue to support getline+close for generic PSGI servers.
package PublicInbox::GzipFilter;
use strict;
use parent qw(Exporter);
use Compress::Raw::Zlib qw(Z_OK);
use PublicInbox::CompressNoop;
use PublicInbox::Eml;
use PublicInbox::GitAsyncCat;

our @EXPORT_OK = qw(gzf_maybe);
my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip);

sub new { bless {}, shift } # qspawn filter

# for Qspawn if using $env->{'pi-httpd.async'}
sub attach {
	my ($self, $http_out) = @_;
	$self->{http_out} = $http_out; # PublicInbox::HTTP::{Chunked,Identity}
	$self
}

sub gz_or_noop {
	my ($res_hdr, $env) = @_;
	if (($env->{HTTP_ACCEPT_ENCODING} // '') =~ /\bgzip\b/) {
		$env->{'plack.skip-deflater'} = 1;
		push @$res_hdr, @GZIP_HDRS;
		gzip_or_die();
	} else {
		PublicInbox::CompressNoop::new();
	}
}

sub gzf_maybe ($$) { bless { gz => gz_or_noop(@_) }, __PACKAGE__ }

sub psgi_response {
	my ($self, $code, $res_hdr) = @_;
	my $env = $self->{env};
	$self->{gz} //= gz_or_noop($res_hdr, $env);
	if ($env->{'pi-httpd.async'}) {
		my $http = $env->{'psgix.io'}; # PublicInbox::HTTP
		$http->{forward} = $self;
		sub {
			my ($wcb) = @_; # -httpd provided write callback
			$self->{http_out} = $wcb->([$code, $res_hdr]);
			$self->can('async_next')->($http); # start stepping
		};
	} else { # generic PSGI code path
		[ $code, $res_hdr, $self ];
	}
}

sub qsp_maybe ($$) {
	my ($res_hdr, $env) = @_;
	return if ($env->{HTTP_ACCEPT_ENCODING} // '') !~ /\bgzip\b/;
	my $hdr = join("\n", @$res_hdr);
	return if $hdr !~ m!^Content-Type\n
				(?:(?:text/(?:html|plain))|
				application/atom\+xml)\b!ixsm;
	return if $hdr =~ m!^Content-Encoding\ngzip\n!smi;
	return if $hdr =~ m!^Content-Length\n[0-9]+\n!smi;
	return if $hdr =~ m!^Transfer-Encoding\n!smi;
	# in case Plack::Middleware::Deflater is loaded:
	return if $env->{'plack.skip-deflater'}++;
	push @$res_hdr, @GZIP_HDRS;
	bless {}, __PACKAGE__;
}

sub gzip_or_die () {
	my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
	$err == Z_OK or die "Deflate->new failed: $err";
	$gz;
}

# for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
# Also used for ->getline callbacks
sub translate ($$) {
	my $self = $_[0]; # $_[1] => input

	# allocate the zlib context lazily here, instead of in ->new.
	# Deflate contexts are memory-intensive and this object may
	# be sitting in the Qspawn limiter queue for a while.
	my $gz = $self->{gz} //= gzip_or_die();
	my $zbuf = delete($self->{zbuf});
	if (defined $_[1]) { # my $buf = $_[1];
		my $err = $gz->deflate($_[1], $zbuf);
		die "gzip->deflate: $err" if $err != Z_OK;
		return $zbuf if length($zbuf) >= 8192;

		$self->{zbuf} = $zbuf;
		'';
	} else { # undef == EOF
		my $err = $gz->flush($zbuf);
		die "gzip->flush: $err" if $err != Z_OK;
		$zbuf;
	}
}

sub write {
	# my $ret = bytes::length($_[1]); # XXX does anybody care?
	$_[0]->{http_out}->write(translate($_[0], $_[1]));
}

# similar to ->translate; use this when we're sure we know we have
# more data to buffer after this
sub zmore {
	my $self = $_[0]; # $_[1] => input
	my $err = $self->{gz}->deflate($_[1], $self->{zbuf});
	die "gzip->deflate: $err" if $err != Z_OK;
	undef;
}

# flushes and returns the final bit of gzipped data
sub zflush ($;$) {
	my $self = $_[0]; # $_[1] => final input (optional)
	my $zbuf = delete $self->{zbuf};
	my $gz = delete $self->{gz};
	my $err;
	if (defined $_[1]) {
		$err = $gz->deflate($_[1], $zbuf);
		die "gzip->deflate: $err" if $err != Z_OK;
	}
	$err = $gz->flush($zbuf);
	die "gzip->flush: $err" if $err != Z_OK;
	$zbuf;
}

sub close {
	my ($self) = @_;
	if (my $http_out = delete $self->{http_out}) {
		$http_out->write(zflush($self));
		$http_out->close;
	}
}

sub bail  {
	my $self = shift;
	if (my $env = $self->{env}) {
		eval { $env->{'psgi.errors'}->print(@_, "\n") };
		warn("E: error printing to psgi.errors: $@", @_) if $@;
		my $http = $env->{'psgix.io'} or return; # client abort
		eval { $http->close }; # should hit our close
		warn "E: error in http->close: $@" if $@;
		eval { $self->close }; # just in case...
		warn "E: error in self->close: $@" if $@;
	} else {
		warn @_, "\n";
	}
}

# this is public-inbox-httpd-specific
sub async_blob_cb { # git->cat_async callback
	my ($bref, $oid, $type, $size, $self) = @_;
	my $http = $self->{env}->{'psgix.io'};
	$http->{forward} or return; # client aborted
	my $smsg = $self->{smsg} or bail($self, 'BUG: no smsg');
	if (!defined($oid)) {
		# it's possible to have TOCTOU if an admin runs
		# public-inbox-(edit|purge), just move onto the next message
		warn "E: $smsg->{blob} missing in $self->{-inbox}->{inboxdir}\n";
		return $http->next_step($self->can('async_next'));
	}
	$smsg->{blob} eq $oid or bail($self, "BUG: $smsg->{blob} != $oid");
	eval { $self->async_eml(PublicInbox::Eml->new($bref)) };
	bail($self, "E: async_eml: $@") if $@;
	$http->next_step($self->can('async_next'));
}

sub smsg_blob {
	my ($self, $smsg) = @_;
	git_async_cat($self->{-inbox}->git, $smsg->{blob},
			\&async_blob_cb, $self);
}

1;
public-inbox-1.6.1/lib/PublicInbox/HTTP.pm000066400000000000000000000334361377346120300202320ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 
#
# Generic PSGI server for convenience.  It aims to provide
# a consistent experience for public-inbox admins so they don't have
# to learn different ways to admin both NNTP and HTTP components.
# There's nothing which depends on public-inbox, here.
# Each instance of this class represents a HTTP client socket
#
# fields:
# httpd: PublicInbox::HTTPD ref
# env: PSGI env hashref
# input_left: bytes left to read in request body (e.g. POST/PUT)
# remote_addr: remote IP address as a string (e.g. "127.0.0.1")
# remote_port: peer port
# forward: response body object, response to ->getline + ->close
# alive: HTTP keepalive state:
#	0: drop connection when done
#	1: keep connection when done
#	2: keep connection, chunk responses
package PublicInbox::HTTP;
use strict;
use parent qw(PublicInbox::DS);
use bytes (); # only for bytes::length
use Fcntl qw(:seek);
use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl
use Plack::Util;
use HTTP::Status qw(status_message);
use HTTP::Date qw(time2str);
use IO::Handle; # ->write
use PublicInbox::DS qw(msg_more);
use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
use PublicInbox::Tmpfile;
use constant {
	CHUNK_START => -1,   # [a-f0-9]+\r\n
	CHUNK_END => -2,     # \r\n
	CHUNK_ZEND => -3,    # \r\n
	CHUNK_MAX_HDR => 256,
};
use Errno qw(EAGAIN);

my $pipelineq = [];
sub process_pipelineq () {
	my $q = $pipelineq;
	$pipelineq = [];
	foreach (@$q) {
		next unless $_->{sock};
		rbuf_process($_);
	}
}

# Use the same configuration parameter as git since this is primarily
# a slow-client sponge for git-http-backend
# TODO: support per-respository http.maxRequestBuffer somehow...
our $MAX_REQUEST_BUFFER = $ENV{GIT_HTTP_MAX_REQUEST_BUFFER} ||
			(10 * 1024 * 1024);

open(my $null_io, '<', '/dev/null') or die "failed to open /dev/null: $!";
my $http_date;
my $prev = 0;
sub http_date () {
	my $now = time;
	$now == $prev ? $http_date : ($http_date = time2str($prev = $now));
}

sub new ($$$) {
	my ($class, $sock, $addr, $httpd) = @_;
	my $self = bless { httpd => $httpd }, $class;
	my $ev = EPOLLIN;
	my $wbuf;
	if ($sock->can('accept_SSL') && !$sock->accept_SSL) {
		return CORE::close($sock) if $! != EAGAIN;
		$ev = PublicInbox::TLS::epollbit() or return CORE::close($sock);
		$wbuf = [ \&PublicInbox::DS::accept_tls_step ];
	}
	$self->{wbuf} = $wbuf if $wbuf;
	($self->{remote_addr}, $self->{remote_port}) =
		PublicInbox::Daemon::host_with_port($addr);
	$self->SUPER::new($sock, $ev | EPOLLONESHOT);
}

sub event_step { # called by PublicInbox::DS
	my ($self) = @_;

	return unless $self->flush_write && $self->{sock};

	# only read more requests if we've drained the write buffer,
	# otherwise we can be buffering infinitely w/o backpressure

	return read_input($self) if ref($self->{env});
	my $rbuf = $self->{rbuf} // (\(my $x = ''));
	$self->do_read($rbuf, 8192, bytes::length($$rbuf)) or return;
	rbuf_process($self, $rbuf);
}

sub rbuf_process {
	my ($self, $rbuf) = @_;
	$rbuf //= $self->{rbuf} // (\(my $x = ''));

	my %env = %{$self->{httpd}->{env}}; # full hash copy
	my $r = parse_http_request($$rbuf, \%env);

	# We do not support Trailers in chunked requests, for now
	# (they are rarely-used and git (as of 2.7.2) does not use them)
	if ($r == -1 || $env{HTTP_TRAILER} ||
			# this length-check is necessary for PURE_PERL=1:
			($r == -2 && bytes::length($$rbuf) > 0x4000)) {
		return quit($self, 400);
	}
	if ($r < 0) { # incomplete
		$self->rbuf_idle($rbuf);
		return $self->requeue;
	}
	$$rbuf = substr($$rbuf, $r);
	my $len = input_prepare($self, \%env);
	defined $len or return write_err($self, undef); # EMFILE/ENFILE

	$len ? read_input($self, $rbuf) : app_dispatch($self, undef, $rbuf);
}

# IO::Handle::write returns boolean, this returns bytes written:
sub xwrite ($$$) {
	my ($fh, $rbuf, $max) = @_;
	my $w = bytes::length($$rbuf);
	$w = $max if $w > $max;
	$fh->write($$rbuf, $w) or return;
	$w;
}

sub read_input ($;$) {
	my ($self, $rbuf) = @_;
	$rbuf //= $self->{rbuf} // (\(my $x = ''));
	my $env = $self->{env};
	return read_input_chunked($self, $rbuf) if env_chunked($env);

	# env->{CONTENT_LENGTH} (identity)
	my $len = delete $self->{input_left};
	my $input = $env->{'psgi.input'};

	while ($len > 0) {
		if ($$rbuf ne '') {
			my $w = xwrite($input, $rbuf, $len);
			return write_err($self, $len) unless $w;
			$len -= $w;
			die "BUG: $len < 0 (w=$w)" if $len < 0;
			if ($len == 0) { # next request may be pipelined
				$$rbuf = substr($$rbuf, $w);
				last;
			}
			$$rbuf = '';
		}
		$self->do_read($rbuf, 8192) or return recv_err($self, $len);
		# continue looping if $r > 0;
	}
	app_dispatch($self, $input, $rbuf);
}

sub app_dispatch {
	my ($self, $input, $rbuf) = @_;
	$self->rbuf_idle($rbuf);
	my $env = $self->{env};
	$self->{env} = undef; # for exists() check in ->busy
	$env->{REMOTE_ADDR} = $self->{remote_addr};
	$env->{REMOTE_PORT} = $self->{remote_port};
	if (defined(my $host = $env->{HTTP_HOST})) {
		$host =~ s/:([0-9]+)\z// and $env->{SERVER_PORT} = $1;
		$env->{SERVER_NAME} = $host;
	}
	if (defined $input) {
		sysseek($input, 0, SEEK_SET) or
			die "BUG: psgi.input seek failed: $!";
	}
	# note: NOT $self->{sock}, we want our close (+ PublicInbox::DS::close),
	# to do proper cleanup:
	$env->{'psgix.io'} = $self; # for ->close or async_pass
	my $res = Plack::Util::run_app($self->{httpd}->{app}, $env);
	eval {
		if (ref($res) eq 'CODE') {
			$res->(sub { response_write($self, $env, $_[0]) });
		} else {
			response_write($self, $env, $res);
		}
	};
	if ($@) {
		err($self, "response_write error: $@");
		$self->close;
	}
}

sub response_header_write {
	my ($self, $env, $res) = @_;
	my $proto = $env->{SERVER_PROTOCOL} or return; # HTTP/0.9 :P
	my $status = $res->[0];
	my $h = "$proto $status " . status_message($status) . "\r\n";
	my ($len, $chunked);
	my $headers = $res->[1];

	for (my $i = 0; $i < @$headers; $i += 2) {
		my $k = $headers->[$i];
		my $v = $headers->[$i + 1];
		next if $k =~ /\A(?:Connection|Date)\z/i;

		$len = $v if $k =~ /\AContent-Length\z/i;
		if ($k =~ /\ATransfer-Encoding\z/i && $v =~ /\bchunked\b/i) {
			$chunked = 1;
		}
		$h .= "$k: $v\r\n";
	}

	my $conn = $env->{HTTP_CONNECTION} || '';
	my $term = defined($len) || $chunked;
	my $prot_persist = ($proto eq 'HTTP/1.1') && ($conn !~ /\bclose\b/i);
	my $alive;
	if (!$term && $prot_persist) { # auto-chunk
		$chunked = $alive = 2;
		$h .= "Transfer-Encoding: chunked\r\n";
		# no need for "Connection: keep-alive" with HTTP/1.1
	} elsif ($term && ($prot_persist || ($conn =~ /\bkeep-alive\b/i))) {
		$alive = 1;
		$h .= "Connection: keep-alive\r\n";
	} else {
		$alive = 0;
		$h .= "Connection: close\r\n";
	}
	$h .= 'Date: ' . http_date() . "\r\n\r\n";

	if (($len || $chunked) && $env->{REQUEST_METHOD} ne 'HEAD') {
		msg_more($self, $h);
	} else {
		$self->write(\$h);
	}
	$alive;
}

# middlewares such as Deflater may write empty strings
sub chunked_write ($$) {
	my $self = $_[0];
	return if $_[1] eq '';
	msg_more($self, sprintf("%x\r\n", bytes::length($_[1])));
	msg_more($self, $_[1]);

	# use $self->write(\"\n\n") if you care about real-time
	# streaming responses, public-inbox WWW does not.
	msg_more($self, "\r\n");
}

sub identity_write ($$) {
	my $self = $_[0];
	$self->write(\($_[1])) if $_[1] ne '';
}

sub next_request ($) {
	my ($self) = @_;
	if ($self->{rbuf}) {
		# avoid recursion for pipelined requests
		PublicInbox::DS::requeue(\&process_pipelineq) if !@$pipelineq;
		push @$pipelineq, $self;
	} else { # wait for next request
		$self->requeue;
	}
}

sub response_done {
	my ($self, $alive) = @_;
	delete $self->{env}; # we're no longer busy
	$self->write(\"0\r\n\r\n") if $alive == 2;
	$self->write($alive ? \&next_request : \&close);
}

sub getline_pull {
	my ($self) = @_;
	my $forward = $self->{forward};

	# limit our own running time for fairness with other
	# clients and to avoid buffering too much:
	my $buf = eval {
		local $/ = \8192;
		$forward->getline;
	} if $forward;

	if (defined $buf) {
		# may close in PublicInbox::DS::write
		if ($self->{alive} == 2) {
			chunked_write($self, $buf);
		} else {
			identity_write($self, $buf);
		}

		if ($self->{sock}) {
			# autovivify wbuf
			my $new_size = push(@{$self->{wbuf}}, \&getline_pull);

			# wbuf may be populated by {chunked,identity}_write()
			# above, no need to rearm if so:
			$self->requeue if $new_size == 1;
			return; # likely
		}
	} elsif ($@) {
		err($self, "response ->getline error: $@");
		$self->close;
	}
	# avoid recursion
	if (delete $self->{forward}) {
		eval { $forward->close };
		if ($@) {
			err($self, "response ->close error: $@");
			$self->close; # idempotent
		}
	}
	response_done($self, delete $self->{alive});
}

sub response_write {
	my ($self, $env, $res) = @_;
	my $alive = response_header_write($self, $env, $res);
	if (defined(my $body = $res->[2])) {
		if (ref $body eq 'ARRAY') {
			if ($alive == 2) {
				chunked_write($self, $_) for @$body;
			} else {
				identity_write($self, $_) for @$body;
			}
			response_done($self, $alive);
		} else {
			$self->{forward} = $body;
			$self->{alive} = $alive;
			getline_pull($self); # kick-off!
		}
	# these are returned to the calling application:
	} elsif ($alive == 2) {
		bless [ $self, $alive ], 'PublicInbox::HTTP::Chunked';
	} else {
		bless [ $self, $alive ], 'PublicInbox::HTTP::Identity';
	}
}

sub input_tmpfile ($) {
	my $input = tmpfile('http.input', $_[0]->{sock}) or return;
	$input->autoflush(1);
	$input;
}

sub input_prepare {
	my ($self, $env) = @_;
	my ($input, $len);

	# rfc 7230 3.3.2, 3.3.3,: favor Transfer-Encoding over Content-Length
	my $hte = $env->{HTTP_TRANSFER_ENCODING};
	if (defined $hte) {
		# rfc7230 3.3.3, point 3 says only chunked is accepted
		# as the final encoding.  Since neither public-inbox-httpd,
		# git-http-backend, or our WWW-related code uses "gzip",
		# "deflate" or "compress" as the Transfer-Encoding, we'll
		# reject them:
		return quit($self, 400) if $hte !~ /\Achunked\z/i;

		$len = CHUNK_START;
		$input = input_tmpfile($self);
	} else {
		$len = $env->{CONTENT_LENGTH};
		if (defined $len) {
			# rfc7230 3.3.3.4
			return quit($self, 400) if $len !~ /\A[0-9]+\z/;

			return quit($self, 413) if $len > $MAX_REQUEST_BUFFER;
			$input = $len ? input_tmpfile($self) : $null_io;
		} else {
			$input = $null_io;
		}
	}

	# TODO: expire idle clients on ENFILE / EMFILE
	return unless $input;

	$env->{'psgi.input'} = $input;
	$self->{env} = $env;
	$self->{input_left} = $len || 0;
}

sub env_chunked { ($_[0]->{HTTP_TRANSFER_ENCODING} // '') =~ /\Achunked\z/i }

sub err ($$) {
	eval { $_[0]->{httpd}->{env}->{'psgi.errors'}->print($_[1]."\n") };
}

sub write_err {
	my ($self, $len) = @_;
	my $msg = $! || '(zero write)';
	$msg .= " ($len bytes remaining)" if defined $len;
	err($self, "error buffering to input: $msg");
	quit($self, 500);
}

sub recv_err {
	my ($self, $len) = @_;
	if ($! == EAGAIN) { # epoll/kevent watch already set by do_read
		$self->{input_left} = $len;
	} else {
		err($self, "error reading input: $! ($len bytes remaining)");
	}
}

sub read_input_chunked { # unlikely...
	my ($self, $rbuf) = @_;
	$rbuf //= $self->{rbuf} // (\(my $x = ''));
	my $input = $self->{env}->{'psgi.input'};
	my $len = delete $self->{input_left};

	while (1) { # chunk start
		if ($len == CHUNK_ZEND) {
			$$rbuf =~ s/\A\r\n//s and
				return app_dispatch($self, $input, $rbuf);

			return quit($self, 400) if bytes::length($$rbuf) > 2;
		}
		if ($len == CHUNK_END) {
			if ($$rbuf =~ s/\A\r\n//s) {
				$len = CHUNK_START;
			} elsif (bytes::length($$rbuf) > 2) {
				return quit($self, 400);
			}
		}
		if ($len == CHUNK_START) {
			if ($$rbuf =~ s/\A([a-f0-9]+).*?\r\n//i) {
				$len = hex $1;
				if (($len + -s $input) > $MAX_REQUEST_BUFFER) {
					return quit($self, 413);
				}
			} elsif (bytes::length($$rbuf) > CHUNK_MAX_HDR) {
				return quit($self, 400);
			}
			# will break from loop since $len >= 0
		}

		if ($len < 0) { # chunk header is trickled, read more
			$self->do_read($rbuf, 8192, bytes::length($$rbuf)) or
				return recv_err($self, $len);
			# (implicit) goto chunk_start if $r > 0;
		}
		$len = CHUNK_ZEND if $len == 0;

		# drain the current chunk
		until ($len <= 0) {
			if ($$rbuf ne '') {
				my $w = xwrite($input, $rbuf, $len);
				return write_err($self, "$len chunk") if !$w;
				$len -= $w;
				if ($len == 0) {
					# we may have leftover data to parse
					# in chunk
					$$rbuf = substr($$rbuf, $w);
					$len = CHUNK_END;
				} elsif ($len < 0) {
					die "BUG: len < 0: $len";
				} else {
					$$rbuf = '';
				}
			}
			if ($$rbuf eq '') {
				# read more of current chunk
				$self->do_read($rbuf, 8192) or
					return recv_err($self, $len);
			}
		}
	}
}

sub quit {
	my ($self, $status) = @_;
	my $h = "HTTP/1.1 $status " . status_message($status) . "\r\n\r\n";
	$self->write(\$h);
	$self->close;
	undef; # input_prepare expects this
}

sub close {
	my $self = $_[0];
	if (my $forward = delete $self->{forward}) {
		eval { $forward->close };
		err($self, "forward ->close error: $@") if $@;
	}
	$self->SUPER::close; # PublicInbox::DS::close
}

# for graceful shutdown in PublicInbox::Daemon:
sub busy () {
	my ($self) = @_;
	($self->{rbuf} || exists($self->{env}) || $self->{wbuf});
}

# runs $cb on the next iteration of the event loop at earliest
sub next_step {
	my ($self, $cb) = @_;
	return unless exists $self->{sock};
	$self->requeue if 1 == push(@{$self->{wbuf}}, $cb);
}

# Chunked and Identity packages are used for writing responses.
# They may be exposed to the PSGI application when the PSGI app
# returns a CODE ref for "push"-based responses
package PublicInbox::HTTP::Chunked;
use strict;

sub write {
	# ([$http], $buf) = @_;
	PublicInbox::HTTP::chunked_write($_[0]->[0], $_[1])
}

sub close {
	# $_[0] = [$http, $alive]
	PublicInbox::HTTP::response_done(@{$_[0]});
}

package PublicInbox::HTTP::Identity;
use strict;
our @ISA = qw(PublicInbox::HTTP::Chunked);

sub write {
	# ([$http], $buf) = @_;
	PublicInbox::HTTP::identity_write($_[0]->[0], $_[1]);
}

1;
public-inbox-1.6.1/lib/PublicInbox/HTTPD.pm000066400000000000000000000025701377346120300203310ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# wraps a listen socket for HTTP and links it to the PSGI app in
# public-inbox-httpd
package PublicInbox::HTTPD;
use strict;
use warnings;
use Plack::Util;
use PublicInbox::HTTPD::Async;
use PublicInbox::Daemon;

sub pi_httpd_async { PublicInbox::HTTPD::Async->new(@_) }

sub new {
	my ($class, $sock, $app) = @_;
	my $n = getsockname($sock) or die "not a socket: $sock $!\n";
	my ($host, $port) = PublicInbox::Daemon::host_with_port($n);

	my %env = (
		SERVER_NAME => $host,
		SERVER_PORT => $port,
		SCRIPT_NAME => '',
		'psgi.version' => [ 1, 1 ],
		'psgi.errors' => \*STDERR,
		'psgi.url_scheme' => 'http',
		'psgi.nonblocking' => Plack::Util::TRUE,
		'psgi.streaming' => Plack::Util::TRUE,
		'psgi.run_once'	 => Plack::Util::FALSE,
		'psgi.multithread' => Plack::Util::FALSE,
		'psgi.multiprocess' => Plack::Util::TRUE,

		# We don't use this anywhere, but we can support
		# other PSGI apps which might use it:
		'psgix.input.buffered' => Plack::Util::TRUE,

		# XXX unstable API!, only GitHTTPBackend needs
		# this to limit git-http-backend(1) parallelism.
		# We also check for the truthiness of this to
		# detect when to use git_async_cat for slow blobs
		'pi-httpd.async' => \&pi_httpd_async
	);
	bless {
		app => $app,
		env => \%env
	}, $class;
}

1;
public-inbox-1.6.1/lib/PublicInbox/HTTPD/000077500000000000000000000000001377346120300177675ustar00rootroot00000000000000public-inbox-1.6.1/lib/PublicInbox/HTTPD/Async.pm000066400000000000000000000066471377346120300214170ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 
#
# XXX This is a totally unstable API for public-inbox internal use only
# This is exposed via the 'pi-httpd.async' key in the PSGI env hash.
# The name of this key is not even stable!
# Currently intended for use with read-only pipes with expensive
# processes such as git-http-backend(1), cgit(1)
#
# fields:
# http: PublicInbox::HTTP ref
# fh: PublicInbox::HTTP::{Identity,Chunked} ref (can ->write + ->close)
# cb: initial read callback
# arg: arg for {cb}
# end_obj: CODE or object which responds to ->event_step when ->close is called
package PublicInbox::HTTPD::Async;
use strict;
use parent qw(PublicInbox::DS);
use Errno qw(EAGAIN);
use PublicInbox::Syscall qw(EPOLLIN EPOLLET);

# This is called via: $env->{'pi-httpd.async'}->()
# $io is a read-only pipe ($rpipe) for now, but may be a
# bidirectional socket in the future.
sub new {
	my ($class, $io, $cb, $arg, $end_obj) = @_;

	# no $io? call $cb at the top of the next event loop to
	# avoid recursion:
	unless (defined($io)) {
		PublicInbox::DS::requeue($cb ? $cb : $arg);
		die '$end_obj unsupported w/o $io' if $end_obj;
		return;
	}
	my $self = bless {
		cb => $cb, # initial read callback
		arg => $arg, # arg for $cb
		end_obj => $end_obj, # like END{}, can ->event_step
	}, $class;
	IO::Handle::blocking($io, 0);
	$self->SUPER::new($io, EPOLLIN | EPOLLET);
}

sub event_step {
	my ($self) = @_;
	if (my $cb = delete $self->{cb}) {
		# this may call async_pass when headers are done
		$cb->(delete $self->{arg});
	} elsif (my $sock = $self->{sock}) {
		my $http = $self->{http};
		# $self->{sock} is a read pipe for git-http-backend or cgit
		# and 65536 is the default Linux pipe size
		my $r = sysread($sock, my $buf, 65536);
		if ($r) {
			$self->{fh}->write($buf); # may call $http->close
			if ($http->{sock}) { # !closed
				$self->requeue;
				# let other clients get some work done, too
				return;
			}

			# else: fall through to close below...
		} elsif (!defined $r && $! == EAGAIN) {
			return; # EPOLLET means we'll be notified
		}

		# Done! Error handling will happen in $self->{fh}->close
		# called by end_obj->event_step handler
		delete $http->{forward};
		$self->close; # queues end_obj->event_step to be called
	} # else { # we may've been requeued but closed by $http
}

# once this is called, all data we read is passed to the
# to the PublicInbox::HTTP instance ($http) via $fh->write
sub async_pass {
	my ($self, $http, $fh, $bref) = @_;
	# In case the client HTTP connection ($http) dies, it
	# will automatically close this ($self) object.
	$http->{forward} = $self;

	# write anything we overread when we were reading headers
	$fh->write($$bref); # PublicInbox:HTTP::{chunked,identity}_wcb

	# we're done with this, free this memory up ASAP since the
	# calls after this may use much memory:
	$$bref = undef;

	$self->{http} = $http;
	$self->{fh} = $fh;

	# either hit EAGAIN or ->requeue to keep EPOLLET happy
	event_step($self);
}

# may be called as $forward->close in PublicInbox::HTTP or EOF (event_step)
sub close {
	my $self = $_[0];
	$self->SUPER::close; # DS::close

	# we defer this to the next timer loop since close is deferred
	if (my $end_obj = delete $self->{end_obj}) {
		# this calls $end_obj->event_step
		# (likely PublicInbox::Qspawn::event_step,
		#  NOT PublicInbox::HTTPD::Async::event_step)
		PublicInbox::DS::requeue($end_obj);
	}
}

1;
public-inbox-1.6.1/lib/PublicInbox/HlMod.pm000066400000000000000000000101221377346120300204410ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors 
# License: AGPL-3.0+ 

# I have no idea how stable or safe this is for handling untrusted
# input, but it seems to have been around for a while, and the
# highlight(1) executable is supported by gitweb and cgit.
#
# I'm also unsure about API stability, but highlight 3.x seems to
# have been around a few years and ikiwiki (apparently the only
# user of the SWIG/Perl bindings, at least in Debian) hasn't needed
# major changes to support it in recent years.
#
# Some code stolen from ikiwiki (GPL-2.0+)
# wrapper for SWIG-generated highlight.pm bindings
package PublicInbox::HlMod;
use strict;
use warnings;
use highlight; # SWIG-generated stuff
use PublicInbox::Hval qw(src_escape ascii_html);
my $hl;

sub _parse_filetypes ($) {
	my $ft_conf = $_[0]->getFiletypesConfPath('filetypes') or
				die 'filetypes.conf not found by highlight';
	open my $fh, '<', $ft_conf or die "failed to open($ft_conf): $!";
	local $/;
	my $cfg = <$fh>;
	my %ext2lang;
	my @shebang; # order matters

	# Hrm... why isn't this exposed by the highlight API?
	# highlight >= 3.2 format (bind-style) (from ikiwiki)
	while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
			 Extensions\s*=\s*{([^}]+)}/sgx) {
		my $lang = $1;
		foreach my $bit (split(/,/, $2)) {
			$bit =~ s/.*"(.*)".*/$1/s;
			$ext2lang{$bit} = $lang;
		}
	}
	# AFAIK, all the regexps used by in filetypes.conf distributed
	# by highlight work as Perl REs
	while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
			Shebang\s*=\s*\[\s*\[([^}]+)\s*\]\s*\]\s*}\s*,/sgx) {
		my ($lang, $re) = ($1, $2);
		eval {
			my $perl_re = qr/$re/;
			push @shebang, [ $lang, $perl_re ];
		};
		if ($@) {
			warn "$lang shebang=[[$re]] did not work in Perl: $@";
		}
	}
	(\%ext2lang, \@shebang);
}

# We only need one instance, so we don't need to do
# highlight::CodeGenerator::deleteInstance
sub new {
	my ($class) = @_;
	$hl ||= do {
		my $dir = highlight::DataDir->new;
		$dir->initSearchDirectories('');
		my ($ext2lang, $shebang) = _parse_filetypes($dir);
		bless {
			-dir => $dir,
			-ext2lang => $ext2lang,
			-shebang => $shebang,
		}, $class;
	};
}

sub _shebang2lang ($$) {
	my ($self, $str) = @_;
	my $shebang = $self->{-shebang};
	foreach my $s (@$shebang) {
		return $s->[0] if $$str =~ $s->[1];
	}
	undef;
}

sub _path2lang ($$) {
	my ($self, $path) = @_;
	my ($ext) = ($path =~ m!([^\\/\.]+)\z!);
	$ext = lc($ext);
	$self->{-ext2lang}->{$ext} || $ext;
}

sub do_hl {
	my ($self, $str, $path) = @_;
	my $lang = _path2lang($self, $path) if defined $path;
	do_hl_lang($self, $str, $lang);
}

sub do_hl_lang {
	my ($self, $str, $lang) = @_;

	my $dir = $self->{-dir};
	my $langpath;

	if (defined $lang) {
		$langpath = $dir->getLangPath("$lang.lang") or return;
		$lang = undef unless -f $langpath
	}
	unless (defined $lang) {
		$lang = _shebang2lang($self, $str) or return;
		$langpath = $dir->getLangPath("$lang.lang") or return;
		return unless -f $langpath
	}
	my $gen = $self->{$langpath} ||= do {
		my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
		$g->setFragmentCode(1); # generate html fragment

		# whatever theme works
		my $themepath = $dir->getThemePath('print.theme');
		$g->initTheme($themepath);
		$g->loadLanguage($langpath);
		$g->setEncoding('utf-8');
		$g;
	};

	# we assume $$str is valid UTF-8, but the SWIG binding doesn't
	# know that, so ensure it's marked as UTF-8 even if it isnt...
	my $out = $gen->generateString($$str);
	utf8::decode($out);
	src_escape($out);
	\$out;
}

# Highlight text, but support Markdown "```$LANG" notation
# while preserving WYSIWYG of plain-text documentation.
# This is NOT to be enabled by default or encouraged for parsing
# emails, since it is NOT stable and can lead to standards
# proliferation of email.
sub do_hl_text {
	my ($self, $str) = @_;

	$$str = join('', map {
		if (/\A(``` ?)(\w+)\s*?\n(.+)(^```\s*)\z/sm) {
			my ($pfx, $lang, $code, $post) = ($1, $2, $3, $4);
			my $hl = do_hl_lang($self, \$code, $lang) || \$code;
			$pfx . $lang . "\n" . $$hl . $post;
		} else {
			ascii_html($_);
		}
	} split(/(^``` ?\w+\s*?\n.+?^```\s*$)/sm, $$str));
}

1;
public-inbox-1.6.1/lib/PublicInbox/Hval.pm000066400000000000000000000075411377346120300203430ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors 
# License: AGPL-3.0+ 
#
# represents a header value in various forms.  Used for HTML generation
# in our web interface(s)
package PublicInbox::Hval;
use strict;
use warnings;
use Encode qw(find_encoding);
use PublicInbox::MID qw/mid_clean mid_escape/;
use base qw/Exporter/;
our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape
		to_attr prurl mid_href fmt_ts ts2str/;
use POSIX qw(strftime);
my $enc_ascii = find_encoding('us-ascii');

# safe-ish acceptable filename pattern for portability
our $FN = '[a-zA-Z0-9][a-zA-Z0-9_\-\.]+[a-zA-Z0-9]'; # needs \z anchor

sub mid_href { ascii_html(mid_escape($_[0])) }

# some of these overrides are standard C escapes so they're
# easy-to-understand when rendered.
my %escape_sequence = (
	"\x00" => '\\0', # NUL
	"\x07" => '\\a', # bell
	"\x08" => '\\b', # backspace
	"\x09" => "\t", # obvious to show as-is
	"\x0a" => "\n", # obvious to show as-is
	"\x0b" => '\\v', # vertical tab
	"\x0c" => '\\f', # form feed
	"\x0d" => '\\r', # carriage ret (not preceding \n)
	"\x1b" => '^[', # ASCII escape (mutt seems to escape this way)
	"\x7f" => '\\x7f', # DEL
);

my %xhtml_map = (
	'"' => '"',
	'&' => '&',
	"'" => ''',
	'<' => '<',
	'>' => '>',
);

$xhtml_map{chr($_)} = sprintf('\\x%02x', $_) for (0..31);
%xhtml_map = (%xhtml_map, %escape_sequence);

# for post-processing the output of highlight.pm and perhaps other
# highlighers in the future
sub src_escape ($) {
	$_[0] =~ s/\r\n/\n/sg;
	$_[0] =~ s/'/'/sg; # workaround https://bugs.debian.org/927409
	$_[0] =~ s/([\x7f\x00-\x1f])/$xhtml_map{$1}/sge;
	$_[0] = $enc_ascii->encode($_[0], Encode::HTMLCREF);
}

sub ascii_html {
	my ($s) = @_;
	$s =~ s/([<>&'"\x7f\x00-\x1f])/$xhtml_map{$1}/sge;
	$enc_ascii->encode($s, Encode::HTMLCREF);
}

# returns a protocol-relative URL string
sub prurl ($$) {
	my ($env, $u) = @_;
	if (ref($u) eq 'ARRAY') {
		my $h = $env->{HTTP_HOST} // $env->{SERVER_NAME};
		my @host_match = grep(/\b\Q$h\E\b/, @$u);
		$u = $host_match[0] // $u->[0];
		# fall through to below:
	}
	index($u, '//') == 0 ? "$env->{'psgi.url_scheme'}:$u" : $u;
}

# for misguided people who believe in this stuff, give them a
# substitution for '.'
# ․ · and ͺ were also candidates:
#   https://public-inbox.org/meta/20170615015250.GA6484@starla/
# However, • was chosen to make copy+paste errors more obvious
sub obfuscate_addrs ($$;$) {
	my $ibx = $_[0];
	my $repl = $_[2] // '•';
	my $re = $ibx->{-no_obfuscate_re}; # regex of domains
	my $addrs = $ibx->{-no_obfuscate}; # { $address => 1 }
	$_[1] =~ s/(([\w\.\+=\-]+)\@([\w\-]+\.[\w\.\-]+))/
		my ($addr, $user, $domain) = ($1, $2, $3);
		if ($addrs->{$addr} || ((defined $re && $domain =~ $re))) {
			$addr;
		} else {
			$domain =~ s!([^\.]+)\.!$1$repl!;
			$user . '@' . $domain
		}
		/sge;
}

# like format_sanitized_subject in git.git pretty.c with '%f' format string
sub to_filename ($) {
	my $s = (split(/\n/, $_[0]))[0] // return; # empty string => undef
	$s =~ s/[^A-Za-z0-9_\.]+/-/g;
	$s =~ tr/././s;
	$s =~ s/[\.\-]+\z//;
	$s =~ s/\A[\.\-]+//;
	$s eq '' ? undef : $s;
}

# convert a filename (or any string) to HTML attribute

my %ESCAPES = map { chr($_) => sprintf('::%02x', $_) } (0..255);
$ESCAPES{'/'} = ':'; # common

sub to_attr ($) {
	my ($str) = @_;

	# git would never do this to us:
	return if index($str, '//') >= 0;

	my $first = '';
	utf8::encode($str); # to octets
	if ($str =~ s/\A([^A-Ya-z])//ms) { # start with a letter
		  $first = sprintf('Z%02x', ord($1));
	}
	$str =~ s/([^A-Za-z0-9_\.\-])/$ESCAPES{$1}/egms;
	utf8::decode($str); # allow wide chars
	$first . $str;
}

# for the t= query parameter passed to overview DB
sub ts2str ($) { strftime('%Y%m%d%H%M%S', gmtime($_[0])) };

# human-friendly format
sub fmt_ts ($) { strftime('%Y-%m-%d %k:%M', gmtime($_[0])) }

1;
public-inbox-1.6.1/lib/PublicInbox/IMAP.pm000066400000000000000000001163211377346120300201740ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 
#
# Each instance of this represents an IMAP client connected to
# public-inbox-imapd.  Much of this was taken from NNTP, but
# further refined while experimenting on future ideas to handle
# slow storage.
#
# data notes:
#
# * NNTP article numbers are UIDs, mm->created_at is UIDVALIDITY
#
# * public-inboxes are sliced into mailboxes of 50K messages
#   to not overload MUAs: $NEWSGROUP_NAME.$SLICE_INDEX
#   Slices are similar in concept to v2 "epochs".  Epochs
#   are for the limitations of git clients, while slices are
#   for the limitations of IMAP clients.
#
# * We also take advantage of slices being only 50K to store
#   "UID offset" to message sequence number (MSN) mapping
#   as a 50K uint16_t array (via pack("S*", ...)).  "UID offset"
#   is the offset from {uid_base} which determines the start of
#   the mailbox slice.
#
# fields:
# imapd: PublicInbox::IMAPD ref
# ibx: PublicInbox::Inbox ref
# long_cb: long_response private data
# uid_base: base UID for mailbox slice (0-based)
# -login_tag: IMAP TAG for LOGIN
# -idle_tag: IMAP response tag for IDLE
# uo2m: UID-to-MSN mapping
package PublicInbox::IMAP;
use strict;
use parent qw(PublicInbox::DS);
use PublicInbox::Eml;
use PublicInbox::EmlContentFoo qw(parse_content_disposition);
use PublicInbox::DS qw(now);
use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
use PublicInbox::GitAsyncCat;
use Text::ParseWords qw(parse_line);
use Errno qw(EAGAIN);
use PublicInbox::IMAPsearchqp;

my $Address;
for my $mod (qw(Email::Address::XS Mail::Address)) {
	eval "require $mod" or next;
	$Address = $mod and last;
}
die "neither Email::Address::XS nor Mail::Address loaded: $@" if !$Address;

sub LINE_MAX () { 8000 } # RFC 2683 3.2.1.5

# Changing UID_SLICE will cause grief for clients which cache.
# This also needs to be <64K: we pack it into a uint16_t
# for long_response UID (offset) => MSN mappings
sub UID_SLICE () { 50_000 }

# these values area also used for sorting
sub NEED_SMSG () { 1 }
sub NEED_BLOB () { NEED_SMSG|2 }
sub CRLF_BREF () { 4 }
sub EML_HDR () { 8 }
sub CRLF_HDR () { 16 }
sub EML_BDY () { 32 }
sub CRLF_BDY () { 64 }
my $OP_EML_NEW = [ EML_HDR - 1, \&op_eml_new ];
my $OP_CRLF_BREF = [ CRLF_BREF, \&op_crlf_bref ];
my $OP_CRLF_HDR = [ CRLF_HDR, \&op_crlf_hdr ];
my $OP_CRLF_BDY = [ CRLF_BDY, \&op_crlf_bdy ];

my %FETCH_NEED = (
	'BODY[HEADER]' => [ NEED_BLOB|EML_HDR|CRLF_HDR, \&emit_rfc822_header ],
	'BODY[TEXT]' => [ NEED_BLOB|EML_BDY|CRLF_BDY, \&emit_rfc822_text ],
	'BODY[]' => [ NEED_BLOB|CRLF_BREF, \&emit_rfc822 ],
	'RFC822.HEADER' => [ NEED_BLOB|EML_HDR|CRLF_HDR, \&emit_rfc822_header ],
	'RFC822.TEXT' => [ NEED_BLOB|EML_BDY|CRLF_BDY, \&emit_rfc822_text ],
	'RFC822.SIZE' => [ NEED_SMSG, \&emit_rfc822_size ],
	RFC822 => [ NEED_BLOB|CRLF_BREF, \&emit_rfc822 ],
	BODY => [ NEED_BLOB|EML_HDR|EML_BDY, \&emit_body ],
	BODYSTRUCTURE => [ NEED_BLOB|EML_HDR|EML_BDY, \&emit_bodystructure ],
	ENVELOPE => [ NEED_BLOB|EML_HDR, \&emit_envelope ],
	FLAGS => [ 0, \&emit_flags ],
	INTERNALDATE => [ NEED_SMSG, \&emit_internaldate ],
);
my %FETCH_ATT = map { $_ => [ $_ ] } keys %FETCH_NEED;

# aliases (RFC 3501 section 6.4.5)
$FETCH_ATT{FAST} = [ qw(FLAGS INTERNALDATE RFC822.SIZE) ];
$FETCH_ATT{ALL} = [ @{$FETCH_ATT{FAST}}, 'ENVELOPE' ];
$FETCH_ATT{FULL} = [ @{$FETCH_ATT{ALL}}, 'BODY' ];

for my $att (keys %FETCH_ATT) {
	my %h = map { $_ => $FETCH_NEED{$_} } @{$FETCH_ATT{$att}};
	$FETCH_ATT{$att} = \%h;
}
undef %FETCH_NEED;

my $valid_range = '[0-9]+|[0-9]+:[0-9]+|[0-9]+:\*';
$valid_range = qr/\A(?:$valid_range)(?:,(?:$valid_range))*\z/;

# RFC 3501 5.4. Autologout Timer needs to be >= 30min
$PublicInbox::DS::EXPTIME = 60 * 30;

sub greet ($) {
	my ($self) = @_;
	my $capa = capa($self);
	$self->write(\"* OK [$capa] public-inbox-imapd ready\r\n");
}

sub new ($$$) {
	my ($class, $sock, $imapd) = @_;
	my $self = bless { imapd => $imapd }, 'PublicInbox::IMAP_preauth';
	my $ev = EPOLLIN;
	my $wbuf;
	if ($sock->can('accept_SSL') && !$sock->accept_SSL) {
		return CORE::close($sock) if $! != EAGAIN;
		$ev = PublicInbox::TLS::epollbit() or return CORE::close($sock);
		$wbuf = [ \&PublicInbox::DS::accept_tls_step, \&greet ];
	}
	$self->SUPER::new($sock, $ev | EPOLLONESHOT);
	if ($wbuf) {
		$self->{wbuf} = $wbuf;
	} else {
		greet($self);
	}
	$self->update_idle_time;
	$self;
}

sub logged_in { 1 }

sub capa ($) {
	my ($self) = @_;

	# dovecot advertises IDLE pre-login; perhaps because some clients
	# depend on it, so we'll do the same
	my $capa = 'CAPABILITY IMAP4rev1 IDLE';
	if ($self->logged_in) {
		$capa .= ' COMPRESS=DEFLATE';
	} else {
		if (!($self->{sock} // $self)->can('accept_SSL') &&
			$self->{imapd}->{accept_tls}) {
			$capa .= ' STARTTLS';
		}
		$capa .= ' AUTH=ANONYMOUS';
	}
}

sub login_success ($$) {
	my ($self, $tag) = @_;
	bless $self, 'PublicInbox::IMAP';
	my $capa = capa($self);
	"$tag OK [$capa] Logged in\r\n";
}

sub auth_challenge_ok ($) {
	my ($self) = @_;
	my $tag = delete($self->{-login_tag}) or return;
	login_success($self, $tag);
}

sub cmd_login ($$$$) {
	my ($self, $tag) = @_; # ignore ($user, $password) = ($_[2], $_[3])
	login_success($self, $tag);
}

sub cmd_close ($$) {
	my ($self, $tag) = @_;
	delete @$self{qw(uid_base uo2m)};
	delete $self->{ibx} ? "$tag OK Close done\r\n"
				: "$tag BAD No mailbox\r\n";
}

sub cmd_logout ($$) {
	my ($self, $tag) = @_;
	delete $self->{-idle_tag};
	$self->write(\"* BYE logging out\r\n$tag OK Logout done\r\n");
	$self->shutdn; # PublicInbox::DS::shutdn
	undef;
}

sub cmd_authenticate ($$$) {
	my ($self, $tag) = @_; # $method = $_[2], should be "ANONYMOUS"
	$self->{-login_tag} = $tag;
	"+\r\n"; # challenge
}

sub cmd_capability ($$) {
	my ($self, $tag) = @_;
	'* '.capa($self)."\r\n$tag OK Capability done\r\n";
}

# uo2m: UID Offset to MSN, this is an arrayref by default,
# but uo2m_hibernate can compact and deduplicate it
sub uo2m_ary_new ($;$) {
	my ($self, $exists) = @_;
	my $base = $self->{uid_base};
	my $uids = $self->{ibx}->over->uid_range($base + 1, $base + UID_SLICE);

	# convert UIDs to offsets from {base}
	my @tmp; # [$UID_OFFSET] => $MSN
	my $msn = 0;
	++$base;
	$tmp[$_ - $base] = ++$msn for @$uids;
	$$exists = $msn if $exists;
	\@tmp;
}

# changes UID-offset-to-MSN mapping into a deduplicated scalar:
# uint16_t uo2m[UID_SLICE].
# May be swapped out for idle clients if THP is disabled.
sub uo2m_hibernate ($) {
	my ($self) = @_;
	ref(my $uo2m = $self->{uo2m}) or return;
	my %dedupe = ( uo2m_pack($uo2m) => undef );
	$self->{uo2m} = (keys(%dedupe))[0];
	undef;
}

sub uo2m_last_uid ($) {
	my ($self) = @_;
	defined(my $uo2m = $self->{uo2m}) or die 'BUG: uo2m_last_uid w/o {uo2m}';
	(ref($uo2m) ? @$uo2m : (length($uo2m) >> 1)) + $self->{uid_base};
}

sub uo2m_pack ($) {
	# $_[0] is an arrayref of MSNs, it may have undef gaps if there
	# are gaps in the corresponding UIDs: [ msn1, msn2, undef, msn3 ]
	no warnings 'uninitialized';
	pack('S*', @{$_[0]});
}

# extend {uo2m} to account for new messages which arrived since
# {uo2m} was created.
sub uo2m_extend ($$;$) {
	my ($self, $new_uid_max) = @_;
	defined(my $uo2m = $self->{uo2m}) or
		return($self->{uo2m} = uo2m_ary_new($self));
	my $beg = uo2m_last_uid($self); # last UID we've learned
	return $uo2m if $beg >= $new_uid_max; # fast path

	# need to extend the current range:
	my $base = $self->{uid_base};
	++$beg;
	my $uids = $self->{ibx}->over->uid_range($beg, $base + UID_SLICE);
	return $uo2m if !scalar(@$uids);
	my @tmp; # [$UID_OFFSET] => $MSN
	my $write_method = $_[2] // 'msg_more';
	if (ref($uo2m)) {
		my $msn = $uo2m->[-1];
		$tmp[$_ - $beg] = ++$msn for @$uids;
		$self->$write_method("* $msn EXISTS\r\n");
		push @$uo2m, @tmp;
		$uo2m;
	} else {
		my $msn = unpack('S', substr($uo2m, -2, 2));
		$tmp[$_ - $beg] = ++$msn for @$uids;
		$self->$write_method("* $msn EXISTS\r\n");
		$uo2m .= uo2m_pack(\@tmp);
		my %dedupe = ($uo2m => undef);
		$self->{uo2m} = (keys %dedupe)[0];
	}
}

sub cmd_noop ($$) {
	my ($self, $tag) = @_;
	defined($self->{uid_base}) and
		uo2m_extend($self, $self->{uid_base} + UID_SLICE);
	\"$tag OK Noop done\r\n";
}

# the flexible version which works on scalars and array refs.
# Must call uo2m_extend before this
sub uid2msn ($$) {
	my ($self, $uid) = @_;
	my $uo2m = $self->{uo2m};
	my $off = $uid - $self->{uid_base} - 1;
	ref($uo2m) ? $uo2m->[$off] : unpack('S', substr($uo2m, $off << 1, 2));
}

# returns an arrayref of UIDs, so MSNs can be translated to UIDs via:
# $msn2uid->[$MSN-1] => $UID.  The result of this is always ephemeral
# and does not live beyond the event loop.
sub msn2uid ($) {
	my ($self) = @_;
	my $base = $self->{uid_base};
	my $uo2m = uo2m_extend($self, $base + UID_SLICE);
	$uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m);

	my $uo = 0;
	my @msn2uid;
	for my $msn (@$uo2m) {
		++$uo;
		$msn2uid[$msn - 1] = $uo + $base if $msn;
	}
	\@msn2uid;
}

# converts a set of message sequence numbers in requests to UIDs:
sub msn_to_uid_range ($$) {
	my $msn2uid = $_[0];
	$_[1] =~ s!([0-9]+)!$msn2uid->[$1 - 1] // ($msn2uid->[-1] // 0 + 1)!sge;
}

# called by PublicInbox::InboxIdle
sub on_inbox_unlock {
	my ($self, $ibx) = @_;
	my $uid_end = $self->{uid_base} + UID_SLICE;
	uo2m_extend($self, $uid_end, 'write');
	my $new = uo2m_last_uid($self);
	if ($new == $uid_end) { # max exceeded $uid_end
		# continue idling w/o inotify
		my $sock = $self->{sock} or return;
		$ibx->unsubscribe_unlock(fileno($sock));
	}
}

# called every X minute(s) or so by PublicInbox::DS::later
my $IDLERS = {};
my $idle_timer;
sub idle_tick_all {
	my $old = $IDLERS;
	$IDLERS = {};
	for my $i (values %$old) {
		next if ($i->{wbuf} || !exists($i->{-idle_tag}));
		$i->update_idle_time or next;
		$IDLERS->{fileno($i->{sock})} = $i;
		$i->write(\"* OK Still here\r\n");
	}
	$idle_timer = scalar keys %$IDLERS ?
			PublicInbox::DS::later(\&idle_tick_all) : undef;
}

sub cmd_idle ($$) {
	my ($self, $tag) = @_;
	# IDLE seems allowed by dovecot w/o a mailbox selected *shrug*
	my $ibx = $self->{ibx} or return "$tag BAD no mailbox selected\r\n";
	my $uid_end = $self->{uid_base} + UID_SLICE;
	uo2m_extend($self, $uid_end);
	my $sock = $self->{sock} or return;
	my $fd = fileno($sock);
	$self->{-idle_tag} = $tag;
	# only do inotify on most recent slice
	if ($ibx->over->max < $uid_end) {
		$ibx->subscribe_unlock($fd, $self);
		$self->{imapd}->idler_start;
	}
	$idle_timer //= PublicInbox::DS::later(\&idle_tick_all);
	$IDLERS->{$fd} = $self;
	\"+ idling\r\n"
}

sub stop_idle ($$) {
	my ($self, $ibx) = @_;
	my $sock = $self->{sock} or return;
	my $fd = fileno($sock);
	delete $IDLERS->{$fd};
	$ibx->unsubscribe_unlock($fd);
}

sub idle_done ($$) {
	my ($self, $tag) = @_; # $tag is "DONE" (case-insensitive)
	defined(my $idle_tag = delete $self->{-idle_tag}) or
		return "$tag BAD not idle\r\n";
	my $ibx = $self->{ibx} or do {
		warn "BUG: idle_tag set w/o inbox";
		return "$tag BAD internal bug\r\n";
	};
	stop_idle($self, $ibx);
	"$idle_tag OK Idle done\r\n";
}

sub ensure_slices_exist ($$$) {
	my ($imapd, $ibx, $max) = @_;
	defined(my $mb_top = $ibx->{newsgroup}) or return;
	my $mailboxes = $imapd->{mailboxes};
	my @created;
	for (my $i = int($max/UID_SLICE); $i >= 0; --$i) {
		my $sub_mailbox = "$mb_top.$i";
		last if exists $mailboxes->{$sub_mailbox};
		$mailboxes->{$sub_mailbox} = $ibx;
		$sub_mailbox =~ s/\Ainbox\./INBOX./i; # more familiar to users
		push @created, $sub_mailbox;
	}
	return unless @created;
	my $l = $imapd->{inboxlist} or return;
	push @$l, map { qq[* LIST (\\HasNoChildren) "." $_\r\n] } @created;
}

sub inbox_lookup ($$;$) {
	my ($self, $mailbox, $examine) = @_;
	my ($ibx, $exists, $uidmax, $uid_base) = (undef, 0, 0, 0);
	$mailbox = lc $mailbox;
	$ibx = $self->{imapd}->{mailboxes}->{$mailbox} or return;
	my $over = $ibx->over;
	if ($over != $ibx) { # not a dummy
		$mailbox =~ /\.([0-9]+)\z/ or
				die "BUG: unexpected dummy mailbox: $mailbox\n";
		$uid_base = $1 * UID_SLICE;

		# ->num_highwater caches for writers, so use ->meta_accessor
		$uidmax = $ibx->mm->meta_accessor('num_highwater') // 0;
		if ($examine) {
			$self->{uid_base} = $uid_base;
			$self->{ibx} = $ibx;
			$self->{uo2m} = uo2m_ary_new($self, \$exists);
		} else {
			$exists = $over->imap_exists;
		}
		ensure_slices_exist($self->{imapd}, $ibx, $over->max);
	} else {
		if ($examine) {
			$self->{uid_base} = $uid_base;
			$self->{ibx} = $ibx;
			delete $self->{uo2m};
		}
		# if "INBOX.foo.bar" is selected and "INBOX.foo.bar.0",
		# check for new UID ranges (e.g. "INBOX.foo.bar.1")
		if (my $z = $self->{imapd}->{mailboxes}->{"$mailbox.0"}) {
			ensure_slices_exist($self->{imapd}, $z, $z->over->max);
		}
	}
	($ibx, $exists, $uidmax + 1, $uid_base);
}

sub cmd_examine ($$$) {
	my ($self, $tag, $mailbox) = @_;
	# XXX: do we need this? RFC 5162/7162
	my $ret = $self->{ibx} ? "* OK [CLOSED] previous closed\r\n" : '';
	my ($ibx, $exists, $uidnext, $base) = inbox_lookup($self, $mailbox, 1);
	return "$tag NO Mailbox doesn't exist: $mailbox\r\n" if !$ibx;
	$ret .= <{uidvalidity}]\r
$tag OK [READ-ONLY] EXAMINE/SELECT done\r
EOF
}

sub _esc ($) {
	my ($v) = @_;
	if (!defined($v)) {
		'NIL';
	} elsif ($v =~ /[{"\r\n%*\\\[]/) { # literal string
		'{' . length($v) . "}\r\n" . $v;
	} else { # quoted string
		qq{"$v"}
	}
}

sub addr_envelope ($$;$) {
	my ($eml, $x, $y) = @_;
	my $v = $eml->header_raw($x) //
		($y ? $eml->header_raw($y) : undef) // return 'NIL';

	my @x = $Address->parse($v) or return 'NIL';
	'(' . join('',
		map { '(' . join(' ',
				_esc($_->name), 'NIL',
				_esc($_->user), _esc($_->host)
			) . ')'
		} @x) .
	')';
}

sub eml_envelope ($) {
	my ($eml) = @_;
	'(' . join(' ',
		_esc($eml->header_raw('Date')),
		_esc($eml->header_raw('Subject')),
		addr_envelope($eml, 'From'),
		addr_envelope($eml, 'Sender', 'From'),
		addr_envelope($eml, 'Reply-To', 'From'),
		addr_envelope($eml, 'To'),
		addr_envelope($eml, 'Cc'),
		addr_envelope($eml, 'Bcc'),
		_esc($eml->header_raw('In-Reply-To')),
		_esc($eml->header_raw('Message-ID')),
	) . ')';
}

sub _esc_hash ($) {
	my ($hash) = @_;
	if ($hash && scalar keys %$hash) {
		$hash = [ %$hash ]; # flatten hash into 1-dimensional array
		'(' . join(' ', map { _esc($_) } @$hash) . ')';
	} else {
		'NIL';
	}
}

sub body_disposition ($) {
	my ($eml) = @_;
	my $cd = $eml->header_raw('Content-Disposition') or return 'NIL';
	$cd = parse_content_disposition($cd);
	my $buf = '('._esc($cd->{type});
	$buf .= ' ' . _esc_hash(delete $cd->{attributes});
	$buf .= ')';
}

sub body_leaf ($$;$) {
	my ($eml, $structure, $hold) = @_;
	my $buf = '';
	$eml->{is_submsg} and # parent was a message/(rfc822|news|global)
		$buf .= eml_envelope($eml). ' ';
	my $ct = $eml->ct;
	$buf .= '('._esc($ct->{type}).' ';
	$buf .= _esc($ct->{subtype});
	$buf .= ' ' . _esc_hash(delete $ct->{attributes});
	$buf .= ' ' . _esc($eml->header_raw('Content-ID'));
	$buf .= ' ' . _esc($eml->header_raw('Content-Description'));
	my $cte = $eml->header_raw('Content-Transfer-Encoding') // '7bit';
	$buf .= ' ' . _esc($cte);
	$buf .= ' ' . $eml->{imap_body_len};
	$buf .= ' '.($eml->body_raw =~ tr/\n/\n/) if lc($ct->{type}) eq 'text';

	# for message/(rfc822|global|news), $hold[0] should have envelope
	$buf .= ' ' . (@$hold ? join('', @$hold) : 'NIL') if $hold;

	if ($structure) {
		$buf .= ' '._esc($eml->header_raw('Content-MD5'));
		$buf .= ' '. body_disposition($eml);
		$buf .= ' '._esc($eml->header_raw('Content-Language'));
		$buf .= ' '._esc($eml->header_raw('Content-Location'));
	}
	$buf .= ')';
}

sub body_parent ($$$) {
	my ($eml, $structure, $hold) = @_;
	my $ct = $eml->ct;
	my $type = lc($ct->{type});
	if ($type eq 'multipart') {
		my $buf = '(';
		$buf .= @$hold ? join('', @$hold) : 'NIL';
		$buf .= ' '._esc($ct->{subtype});
		if ($structure) {
			$buf .= ' '._esc_hash(delete $ct->{attributes});
			$buf .= ' '.body_disposition($eml);
			$buf .= ' '._esc($eml->header_raw('Content-Language'));
			$buf .= ' '._esc($eml->header_raw('Content-Location'));
		}
		$buf .= ')';
		@$hold = ($buf);
	} else { # message/(rfc822|global|news)
		@$hold = (body_leaf($eml, $structure, $hold));
	}
}

# this is gross, but we need to process the parent part AFTER
# the child parts are done
sub bodystructure_prep {
	my ($p, $q) = @_;
	my ($eml, $depth) = @$p; # ignore idx
	# set length here, as $eml->{bdy} gets deleted for message/rfc822
	$eml->{imap_body_len} = length($eml->body_raw);
	push @$q, $eml, $depth;
}

# for FETCH BODY and FETCH BODYSTRUCTURE
sub fetch_body ($;$) {
	my ($eml, $structure) = @_;
	my @q;
	$eml->each_part(\&bodystructure_prep, \@q, 0, 1);
	my $cur_depth = 0;
	my @hold;
	do {
		my ($part, $depth) = splice(@q, -2);
		my $is_mp_parent = $depth == ($cur_depth - 1);
		$cur_depth = $depth;

		if ($is_mp_parent) {
			body_parent($part, $structure, \@hold);
		} else {
			unshift @hold, body_leaf($part, $structure);
		}
	} while (@q);
	join('', @hold);
}

sub requeue_once ($) {
	my ($self) = @_;
	# COMPRESS users all share the same DEFLATE context.
	# Flush it here to ensure clients don't see
	# each other's data
	$self->zflush;

	# no recursion, schedule another call ASAP,
	# but only after all pending writes are done.
	# autovivify wbuf:
	my $new_size = push(@{$self->{wbuf}}, \&long_step);

	# wbuf may be populated by $cb, no need to rearm if so:
	$self->requeue if $new_size == 1;
}

sub fetch_run_ops {
	my ($self, $smsg, $bref, $ops, $partial) = @_;
	my $uid = $smsg->{num};
	$self->msg_more('* '.uid2msn($self, $uid)." FETCH (UID $uid");
	my ($eml, $k);
	for (my $i = 0; $i < @$ops;) {
		$k = $ops->[$i++];
		$ops->[$i++]->($self, $k, $smsg, $bref, $eml);
	}
	partial_emit($self, $partial, $eml) if $partial;
	$self->msg_more(")\r\n");
}

sub fetch_blob_cb { # called by git->cat_async via git_async_cat
	my ($bref, $oid, $type, $size, $fetch_arg) = @_;
	my ($self, undef, $msgs, $range_info, $ops, $partial) = @$fetch_arg;
	my $ibx = $self->{ibx} or return $self->close; # client disconnected
	my $smsg = shift @$msgs or die 'BUG: no smsg';
	if (!defined($oid)) {
		# it's possible to have TOCTOU if an admin runs
		# public-inbox-(edit|purge), just move onto the next message
		warn "E: $smsg->{blob} missing in $ibx->{inboxdir}\n";
		return requeue_once($self);
	} else {
		$smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
	}
	my $pre;
	if (!$self->{wbuf} && (my $nxt = $msgs->[0])) {
		$pre = $ibx->git->async_prefetch($nxt->{blob},
						\&fetch_blob_cb, $fetch_arg);
	}
	fetch_run_ops($self, $smsg, $bref, $ops, $partial);
	$pre ? $self->zflush : requeue_once($self);
}

sub emit_rfc822 {
	my ($self, $k, undef, $bref) = @_;
	$self->msg_more(" $k {" . length($$bref)."}\r\n");
	$self->msg_more($$bref);
}

# Mail::IMAPClient::message_string cares about this by default,
# (->Ignoresizeerrors attribute).  Admins are encouraged to
# --reindex for IMAP support, anyways.
sub emit_rfc822_size {
	my ($self, $k, $smsg) = @_;
	$self->msg_more(' RFC822.SIZE ' . $smsg->{bytes});
}

sub emit_internaldate {
	my ($self, undef, $smsg) = @_;
	$self->msg_more(' INTERNALDATE "'.$smsg->internaldate.'"');
}

sub emit_flags { $_[0]->msg_more(' FLAGS ()') }

sub emit_envelope {
	my ($self, undef, undef, undef, $eml) = @_;
	$self->msg_more(' ENVELOPE '.eml_envelope($eml));
}

sub emit_rfc822_header {
	my ($self, $k, undef, undef, $eml) = @_;
	$self->msg_more(" $k {".length(${$eml->{hdr}})."}\r\n");
	$self->msg_more(${$eml->{hdr}});
}

# n.b. this is sorted to be after any emit_eml_new ops
sub emit_rfc822_text {
	my ($self, $k, undef, $bref) = @_;
	$self->msg_more(" $k {".length($$bref)."}\r\n");
	$self->msg_more($$bref);
}

sub emit_bodystructure {
	my ($self, undef, undef, undef, $eml) = @_;
	$self->msg_more(' BODYSTRUCTURE '.fetch_body($eml, 1));
}

sub emit_body {
	my ($self, undef, undef, undef, $eml) = @_;
	$self->msg_more(' BODY '.fetch_body($eml));
}

# set $eml once ($_[4] == $eml, $_[3] == $bref)
sub op_eml_new { $_[4] = PublicInbox::Eml->new($_[3]) }

# s/From / fixes old bug from import (pre-a0c07cba0e5d8b6a)
sub to_crlf_full {
	${$_[0]} =~ s/(?{hdr}) }

sub op_crlf_bdy { ${$_[4]->{bdy}} =~ s/(?{bdy} }

sub uid_clamp ($$$) {
	my ($self, $beg, $end) = @_;
	my $uid_min = $self->{uid_base} + 1;
	my $uid_end = $uid_min + UID_SLICE - 1;
	$$beg = $uid_min if $$beg < $uid_min;
	$$end = $uid_end if $$end > $uid_end;
}

sub range_step ($$) {
	my ($self, $range_csv) = @_;
	my ($beg, $end, $range);
	if ($$range_csv =~ s/\A([^,]+),//) {
		$range = $1;
	} else {
		$range = $$range_csv;
		$$range_csv = undef;
	}
	my $uid_base = $self->{uid_base};
	my $uid_end = $uid_base + UID_SLICE;
	if ($range =~ /\A([0-9]+):([0-9]+)\z/) {
		($beg, $end) = ($1 + 0, $2 + 0);
		uid_clamp($self, \$beg, \$end);
	} elsif ($range =~ /\A([0-9]+):\*\z/) {
		$beg = $1 + 0;
		$end = $self->{ibx}->over->max;
		$end = $uid_end if $end > $uid_end;
		$beg = $end if $beg > $end;
		uid_clamp($self, \$beg, \$end);
	} elsif ($range =~ /\A[0-9]+\z/) {
		$beg = $end = $range + 0;
		# just let the caller do an out-of-range query if a single
		# UID is out-of-range
		++$beg if ($beg <= $uid_base || $end > $uid_end);
	} else {
		return 'BAD fetch range';
	}
	[ $beg, $end, $$range_csv ];
}

sub refill_range ($$$) {
	my ($self, $msgs, $range_info) = @_;
	my ($beg, $end, $range_csv) = @$range_info;
	if (scalar(@$msgs = @{$self->{ibx}->over->query_xover($beg, $end)})) {
		$range_info->[0] = $msgs->[-1]->{num} + 1;
		return;
	}
	return 'OK Fetch done' if !$range_csv;
	my $next_range = range_step($self, \$range_csv);
	return $next_range if !ref($next_range); # error
	@$range_info = @$next_range;
	undef; # keep looping
}

sub fetch_blob { # long_response
	my ($self, $tag, $msgs, $range_info, $ops, $partial) = @_;
	while (!@$msgs) { # rare
		if (my $end = refill_range($self, $msgs, $range_info)) {
			$self->write(\"$tag $end\r\n");
			return;
		}
	}
	uo2m_extend($self, $msgs->[-1]->{num});
	git_async_cat($self->{ibx}->git, $msgs->[0]->{blob},
			\&fetch_blob_cb, \@_);
}

sub fetch_smsg { # long_response
	my ($self, $tag, $msgs, $range_info, $ops) = @_;
	while (!@$msgs) { # rare
		if (my $end = refill_range($self, $msgs, $range_info)) {
			$self->write(\"$tag $end\r\n");
			return;
		}
	}
	uo2m_extend($self, $msgs->[-1]->{num});
	fetch_run_ops($self, $_, undef, $ops) for @$msgs;
	@$msgs = ();
	1; # more
}

sub refill_uids ($$$;$) {
	my ($self, $uids, $range_info, $sql) = @_;
	my ($beg, $end, $range_csv) = @$range_info;
	my $over = $self->{ibx}->over;
	while (1) {
		if (scalar(@$uids = @{$over->uid_range($beg, $end, $sql)})) {
			$range_info->[0] = $uids->[-1] + 1; # update $beg
			return;
		} elsif (!$range_csv) {
			return 0;
		} else {
			my $next_range = range_step($self, \$range_csv);
			return $next_range if !ref($next_range); # error
			($beg, $end, $range_csv) = @$range_info = @$next_range;
			# continue looping
		}
	}
}

sub fetch_uid { # long_response
	my ($self, $tag, $uids, $range_info, $ops) = @_;
	if (defined(my $err = refill_uids($self, $uids, $range_info))) {
		$err ||= 'OK Fetch done';
		$self->write("$tag $err\r\n");
		return;
	}
	my $adj = $self->{uid_base} + 1;
	my $uo2m = uo2m_extend($self, $uids->[-1]);
	$uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m);
	my ($i, $k);
	for (@$uids) {
		$self->msg_more("* $uo2m->[$_ - $adj] FETCH (UID $_");
		for ($i = 0; $i < @$ops;) {
			$k = $ops->[$i++];
			$ops->[$i++]->($self, $k);
		}
		$self->msg_more(")\r\n");
	}
	@$uids = ();
	1; # more
}

sub cmd_status ($$$;@) {
	my ($self, $tag, $mailbox, @items) = @_;
	return "$tag BAD no items\r\n" if !scalar(@items);
	($items[0] !~ s/\A\(//s || $items[-1] !~ s/\)\z//s) and
		return "$tag BAD invalid args\r\n";
	my ($ibx, $exists, $uidnext) = inbox_lookup($self, $mailbox);
	return "$tag NO Mailbox doesn't exist: $mailbox\r\n" if !$ibx;
	my @it;
	for my $it (@items) {
		$it = uc($it);
		push @it, $it;
		if ($it =~ /\A(?:MESSAGES|UNSEEN|RECENT)\z/) {
			push @it, $exists;
		} elsif ($it eq 'UIDNEXT') {
			push @it, $uidnext;
		} elsif ($it eq 'UIDVALIDITY') {
			push @it, $ibx->{uidvalidity};
		} else {
			return "$tag BAD invalid item\r\n";
		}
	}
	return "$tag BAD no items\r\n" if !@it;
	"* STATUS $mailbox (".join(' ', @it).")\r\n" .
	"$tag OK Status done\r\n";
}

my %patmap = ('*' => '.*', '%' => '[^\.]*');
sub cmd_list ($$$$) {
	my ($self, $tag, $refname, $wildcard) = @_;
	my $l = $self->{imapd}->{inboxlist};
	if ($refname eq '' && $wildcard eq '') {
		# request for hierarchy delimiter
		$l = [ qq[* LIST (\\Noselect) "." ""\r\n] ];
	} elsif ($refname ne '' || $wildcard ne '*') {
		$wildcard =~ s!([^a-z0-9_])!$patmap{$1} // "\Q$1"!egi;
		$l = [ grep(/ \Q$refname\E$wildcard\r\n\z/is, @$l) ];
	}
	\(join('', @$l, "$tag OK List done\r\n"));
}

sub cmd_lsub ($$$$) {
	my (undef, $tag) = @_; # same args as cmd_list
	"$tag OK Lsub done\r\n";
}

sub eml_index_offs_i { # PublicInbox::Eml::each_part callback
	my ($p, $all) = @_;
	my ($eml, undef, $idx) = @$p;
	if ($idx && lc($eml->ct->{type}) eq 'multipart') {
		$eml->{imap_bdy} = $eml->{bdy} // \'';
	}
	$all->{$idx} = $eml; # $idx => Eml
}

# prepares an index for BODY[$SECTION_IDX] fetches
sub eml_body_idx ($$) {
	my ($eml, $section_idx) = @_;
	my $idx = $eml->{imap_all_parts} //= do {
		my $all = {};
		$eml->each_part(\&eml_index_offs_i, $all, 0, 1);
		# top-level of multipart, BODY[0] not allowed (nz-number)
		delete $all->{0};
		$all;
	};
	$idx->{$section_idx};
}

# BODY[($SECTION_IDX)?(.$SECTION_NAME)?]<$offset.$bytes>
sub partial_body {
	my ($eml, $section_idx, $section_name) = @_;
	if (defined $section_idx) {
		$eml = eml_body_idx($eml, $section_idx) or return;
	}
	if (defined $section_name) {
		if ($section_name eq 'MIME') {
			# RFC 3501 6.4.5 states:
			#	The MIME part specifier MUST be prefixed
			#	by one or more numeric part specifiers
			return unless defined $section_idx;
			return $eml->header_obj->as_string . "\r\n";
		}
		my $bdy = $eml->{bdy} // $eml->{imap_bdy} // \'';
		$eml = PublicInbox::Eml->new($$bdy);
		if ($section_name eq 'TEXT') {
			return $eml->body_raw;
		} elsif ($section_name eq 'HEADER') {
			return $eml->header_obj->as_string . "\r\n";
		} else {
			die "BUG: bad section_name=$section_name";
		}
	}
	${$eml->{bdy} // $eml->{imap_bdy} // \''};
}

# similar to what's in PublicInbox::Eml::re_memo, but doesn't memoize
# to avoid OOM with malicious users
sub hdrs_regexp ($) {
	my ($hdrs) = @_;
	my $names = join('|', map { "\Q$_" } split(/[ \t]+/, $hdrs));
	qr/^(?:$names):[ \t]*[^\n]*\r?\n # 1st line
		# continuation lines:
		(?:[^:\n]*?[ \t]+[^\n]*\r?\n)*
		/ismx;
}

# BODY[($SECTION_IDX.)?HEADER.FIELDS.NOT ($HDRS)]<$offset.$bytes>
sub partial_hdr_not {
	my ($eml, $section_idx, $hdrs_re) = @_;
	if (defined $section_idx) {
		$eml = eml_body_idx($eml, $section_idx) or return;
	}
	my $str = $eml->header_obj->as_string;
	$str =~ s/$hdrs_re//g;
	$str =~ s/(?
sub partial_hdr_get {
	my ($eml, $section_idx, $hdrs_re) = @_;
	if (defined $section_idx) {
		$eml = eml_body_idx($eml, $section_idx) or return;
	}
	my $str = $eml->header_obj->as_string;
	$str = join('', ($str =~ m/($hdrs_re)/g));
	$str =~ s/(?= 0) {
		my $next = shift @$want or return;
		$att .= ' ' . uc($next);
	}
	if ($att =~ /\ABODY\[([0-9]+(?:\.[0-9]+)*)? # 1 - section_idx
			(?:\.(HEADER|MIME|TEXT))? # 2 - section_name
			\](?:<([0-9]+)(?:\.([0-9]+))?>)?\z/sx) { # 3, 4
		$partial->{$att} = [ \&partial_body, $1, $2, $3, $4 ];
		$$need |= CRLF_BREF|EML_HDR|EML_BDY;
	} elsif ($att =~ /\ABODY\[(?:([0-9]+(?:\.[0-9]+)*)\.)? # 1 - section_idx
				(?:HEADER\.FIELDS(\.NOT)?)\x20 # 2
				\(([A-Z0-9\-\x20]+)\) # 3 - hdrs
			\](?:<([0-9]+)(?:\.([0-9]+))?>)?\z/sx) { # 4 5
		my $tmp = $partial->{$att} = [ $2 ? \&partial_hdr_not
						: \&partial_hdr_get,
						$1, undef, $4, $5 ];
		$tmp->[2] = hdrs_regexp($3);

		# don't emit CRLF_HDR instruction, here, partial_hdr_*
		# will do CRLF conversion with only the extracted result
		# and not waste time converting lines we don't care about.
		$$need |= EML_HDR;
	} else {
		undef;
	}
}

sub partial_emit ($$$) {
	my ($self, $partial, $eml) = @_;
	for (@$partial) {
		my ($k, $cb, @args) = @$_;
		my ($offset, $len) = splice(@args, -2);
		# $cb is partial_body|partial_hdr_get|partial_hdr_not
		my $str = $cb->($eml, @args) // '';
		if (defined $offset) {
			if (defined $len) {
				$str = substr($str, $offset, $len);
				$k =~ s/\.$len>\z/>/ or warn
"BUG: unable to remove `.$len>' from `$k'";
			} else {
				$str = substr($str, $offset);
				$len = length($str);
			}
		} else {
			$len = length($str);
		}
		$self->msg_more(" $k {$len}\r\n");
		$self->msg_more($str);
	}
}

sub fetch_compile ($) {
	my ($want) = @_;
	if ($want->[0] =~ s/\A\(//s) {
		$want->[-1] =~ s/\)\z//s or return 'BAD no rparen';
	}
	my (%partial, %seen, @op);
	my $need = 0;
	while (defined(my $att = shift @$want)) {
		$att = uc($att);
		next if $att eq 'UID'; # always returned
		$att =~ s/\ABODY\.PEEK\[/BODY\[/; # we're read-only
		my $x = $FETCH_ATT{$att};
		if ($x) {
			while (my ($k, $fl_cb) = each %$x) {
				next if $seen{$k}++;
				$need |= $fl_cb->[0];
				push @op, [ @$fl_cb, $k ];
			}
		} elsif (!partial_prepare(\$need, \%partial, $want, $att)) {
			return "BAD param: $att";
		}
	}
	my @r;

	# stabilize partial order for consistency and ease-of-debugging:
	if (scalar keys %partial) {
		$need |= NEED_BLOB;
		$r[2] = [ map { [ $_, @{$partial{$_}} ] } sort keys %partial ];
	}

	push @op, $OP_EML_NEW if ($need & (EML_HDR|EML_BDY));

	# do we need CRLF conversion?
	if ($need & CRLF_BREF) {
		push @op, $OP_CRLF_BREF;
	} elsif (my $crlf = ($need & (CRLF_HDR|CRLF_BDY))) {
		if ($crlf == (CRLF_HDR|CRLF_BDY)) {
			push @op, $OP_CRLF_BREF;
		} elsif ($need & CRLF_HDR) {
			push @op, $OP_CRLF_HDR;
		} else {
			push @op, $OP_CRLF_BDY;
		}
	}

	$r[0] = $need & NEED_BLOB ? \&fetch_blob :
		($need & NEED_SMSG ? \&fetch_smsg : \&fetch_uid);

	# r[1] = [ $key1, $cb1, $key2, $cb2, ... ]
	use sort 'stable'; # makes output more consistent
	$r[1] = [ map { ($_->[2], $_->[1]) } sort { $a->[0] <=> $b->[0] } @op ];
	@r;
}

sub cmd_uid_fetch ($$$$;@) {
	my ($self, $tag, $range_csv, @want) = @_;
	my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
	my ($cb, $ops, $partial) = fetch_compile(\@want);
	return "$tag $cb\r\n" unless $ops;

	# cb is one of fetch_blob, fetch_smsg, fetch_uid
	$range_csv = 'bad' if $range_csv !~ $valid_range;
	my $range_info = range_step($self, \$range_csv);
	return "$tag $range_info\r\n" if !ref($range_info);
	uo2m_hibernate($self) if $cb == \&fetch_blob; # slow, save RAM
	long_response($self, $cb, $tag, [], $range_info, $ops, $partial);
}

sub cmd_fetch ($$$$;@) {
	my ($self, $tag, $range_csv, @want) = @_;
	my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
	my ($cb, $ops, $partial) = fetch_compile(\@want);
	return "$tag $cb\r\n" unless $ops;

	# cb is one of fetch_blob, fetch_smsg, fetch_uid
	$range_csv = 'bad' if $range_csv !~ $valid_range;
	msn_to_uid_range(msn2uid($self), $range_csv);
	my $range_info = range_step($self, \$range_csv);
	return "$tag $range_info\r\n" if !ref($range_info);
	uo2m_hibernate($self) if $cb == \&fetch_blob; # slow, save RAM
	long_response($self, $cb, $tag, [], $range_info, $ops, $partial);
}

sub msn_convert ($$) {
	my ($self, $uids) = @_;
	my $adj = $self->{uid_base} + 1;
	my $uo2m = uo2m_extend($self, $uids->[-1]);
	$uo2m = [ unpack('S*', $uo2m) ] if !ref($uo2m);
	$_ = $uo2m->[$_ - $adj] for @$uids;
}

sub search_uid_range { # long_response
	my ($self, $tag, $sql, $range_info, $want_msn) = @_;
	my $uids = [];
	if (defined(my $err = refill_uids($self, $uids, $range_info, $sql))) {
		$err ||= 'OK Search done';
		$self->write("\r\n$tag $err\r\n");
		return;
	}
	msn_convert($self, $uids) if $want_msn;
	$self->msg_more(join(' ', '', @$uids));
	1; # more
}

sub parse_query ($$) {
	my ($self, $query) = @_;
	my $q = PublicInbox::IMAPsearchqp::parse($self, $query);
	if (ref($q)) {
		my $max = $self->{ibx}->over->max;
		my $beg = 1;
		uid_clamp($self, \$beg, \$max);
		$q->{range_info} = [ $beg, $max ];
	}
	$q;
}

sub refill_xap ($$$$) {
	my ($self, $uids, $range_info, $q) = @_;
	my ($beg, $end) = @$range_info;
	my $srch = $self->{ibx}->search;
	my $opt = { mset => 2, limit => 1000 };
	my $mset = $srch->mset("$q uid:$beg..$end", $opt);
	@$uids = @{$srch->mset_to_artnums($mset)};
	if (@$uids) {
		$range_info->[0] = $uids->[-1] + 1; # update $beg
		return; # possibly more
	}
	0; # all done
}

sub search_xap_range { # long_response
	my ($self, $tag, $q, $range_info, $want_msn) = @_;
	my $uids = [];
	if (defined(my $err = refill_xap($self, $uids, $range_info, $q))) {
		$err ||= 'OK Search done';
		$self->write("\r\n$tag $err\r\n");
		return;
	}
	msn_convert($self, $uids) if $want_msn;
	$self->msg_more(join(' ', '', @$uids));
	1; # more
}

sub search_common {
	my ($self, $tag, $query, $want_msn) = @_;
	my $ibx = $self->{ibx} or return "$tag BAD No mailbox selected\r\n";
	my $q = parse_query($self, $query);
	return "$tag $q\r\n" if !ref($q);
	my ($sql, $range_info) = delete @$q{qw(sql range_info)};
	if (!scalar(keys %$q)) { # overview.sqlite3
		$self->msg_more('* SEARCH');
		long_response($self, \&search_uid_range,
				$tag, $sql, $range_info, $want_msn);
	} elsif ($q = $q->{xap}) {
		$self->{ibx}->search or
			return "$tag BAD search not available for mailbox\r\n";
		$self->msg_more('* SEARCH');
		long_response($self, \&search_xap_range,
				$tag, $q, $range_info, $want_msn);
	} else {
		"$tag BAD Error\r\n";
	}
}

sub cmd_uid_search ($$$) {
	my ($self, $tag, $query) = @_;
	search_common($self, $tag, $query);
}

sub cmd_search ($$$;) {
	my ($self, $tag, $query) = @_;
	search_common($self, $tag, $query, 1);
}

sub args_ok ($$) { # duplicated from PublicInbox::NNTP
	my ($cb, $argc) = @_;
	my $tot = prototype $cb;
	my ($nreq, undef) = split(';', $tot);
	$nreq = ($nreq =~ tr/$//) - 1;
	$tot = ($tot =~ tr/$//) - 1;
	($argc <= $tot && $argc >= $nreq);
}

# returns 1 if we can continue, 0 if not due to buffered writes or disconnect
sub process_line ($$) {
	my ($self, $l) = @_;

	# TODO: IMAP allows literals for big requests to upload messages
	# (which we don't support) but maybe some big search queries use it.
	# RFC 3501 9 (2) doesn't permit TAB or multiple SP
	my ($tag, $req, @args) = parse_line('[ \t]+', 0, $l);
	pop(@args) if (@args && !defined($args[-1]));
	if (@args && uc($req) eq 'UID') {
		$req .= "_".(shift @args);
	}
	my $res = eval {
		if (defined(my $idle_tag = $self->{-idle_tag})) {
			(uc($tag // '') eq 'DONE' && !defined($req)) ?
				idle_done($self, $tag) :
				"$idle_tag BAD expected DONE\r\n";
		} elsif (my $cmd = $self->can('cmd_'.lc($req // ''))) {
			if ($cmd == \&cmd_uid_search || $cmd == \&cmd_search) {
				# preserve user-supplied quotes for search
				(undef, @args) = split(/ search /i, $l, 2);
			}
			$cmd->($self, $tag, @args);
		} else { # this is weird
			auth_challenge_ok($self) //
					($tag // '*') .
					' BAD Error in IMAP command '.
					($req // '(???)').
					": Unknown command\r\n";
		}
	};
	my $err = $@;
	if ($err && $self->{sock}) {
		$l =~ s/\r?\n//s;
		err($self, 'error from: %s (%s)', $l, $err);
		$tag //= '*';
		$res = "$tag BAD program fault - command not performed\r\n";
	}
	return 0 unless defined $res;
	$self->write($res);
}

sub long_step {
	my ($self) = @_;
	# wbuf is unset or empty, here; {long} may add to it
	my ($fd, $cb, $t0, @args) = @{$self->{long_cb}};
	my $more = eval { $cb->($self, @args) };
	if ($@ || !$self->{sock}) { # something bad happened...
		delete $self->{long_cb};
		my $elapsed = now() - $t0;
		if ($@) {
			err($self,
			    "%s during long response[$fd] - %0.6f",
			    $@, $elapsed);
		}
		out($self, " deferred[$fd] aborted - %0.6f", $elapsed);
		$self->close;
	} elsif ($more) { # $self->{wbuf}:
		$self->update_idle_time;

		# control passed to git_async_cat if $more == \undef
		requeue_once($self) if !ref($more);
	} else { # all done!
		delete $self->{long_cb};
		my $elapsed = now() - $t0;
		my $fd = fileno($self->{sock});
		out($self, " deferred[$fd] done - %0.6f", $elapsed);
		my $wbuf = $self->{wbuf}; # do NOT autovivify

		$self->requeue unless $wbuf && @$wbuf;
	}
}

sub err ($$;@) {
	my ($self, $fmt, @args) = @_;
	printf { $self->{imapd}->{err} } $fmt."\n", @args;
}

sub out ($$;@) {
	my ($self, $fmt, @args) = @_;
	printf { $self->{imapd}->{out} } $fmt."\n", @args;
}

sub long_response ($$;@) {
	my ($self, $cb, @args) = @_; # cb returns true if more, false if done

	my $sock = $self->{sock} or return;
	# make sure we disable reading during a long response,
	# clients should not be sending us stuff and making us do more
	# work while we are stream a response to them
	$self->{long_cb} = [ fileno($sock), $cb, now(), @args ];
	long_step($self); # kick off!
	undef;
}

# callback used by PublicInbox::DS for any (e)poll (in/out/hup/err)
sub event_step {
	my ($self) = @_;

	return unless $self->flush_write && $self->{sock} && !$self->{long_cb};

	$self->update_idle_time;
	# only read more requests if we've drained the write buffer,
	# otherwise we can be buffering infinitely w/o backpressure

	my $rbuf = $self->{rbuf} // \(my $x = '');
	my $line = index($$rbuf, "\n");
	while ($line < 0) {
		if (length($$rbuf) >= LINE_MAX) {
			$self->write(\"\* BAD request too long\r\n");
			return $self->close;
		}
		$self->do_read($rbuf, LINE_MAX, length($$rbuf)) or
				return uo2m_hibernate($self);
		$line = index($$rbuf, "\n");
	}
	$line = substr($$rbuf, 0, $line + 1, '');
	$line =~ s/\r?\n\z//s;
	return $self->close if $line =~ /[[:cntrl:]]/s;
	my $t0 = now();
	my $fd = fileno($self->{sock});
	my $r = eval { process_line($self, $line) };
	my $pending = $self->{wbuf} ? ' pending' : '';
	out($self, "[$fd] %s - %0.6f$pending - $r", $line, now() - $t0);

	return $self->close if $r < 0;
	$self->rbuf_idle($rbuf);
	$self->update_idle_time;

	# maybe there's more pipelined data, or we'll have
	# to register it for socket-readiness notifications
	$self->requeue unless $pending;
}

sub compressed { undef }

sub zflush {} # overridden by IMAPdeflate

# RFC 4978
sub cmd_compress ($$$) {
	my ($self, $tag, $alg) = @_;
	return "$tag BAD DEFLATE only\r\n" if uc($alg) ne "DEFLATE";
	return "$tag BAD COMPRESS active\r\n" if $self->compressed;

	# CRIME made TLS compression obsolete
	# return "$tag NO [COMPRESSIONACTIVE]\r\n" if $self->tls_compressed;

	PublicInbox::IMAPdeflate->enable($self, $tag);
	$self->requeue;
	undef
}

sub cmd_starttls ($$) {
	my ($self, $tag) = @_;
	my $sock = $self->{sock} or return;
	if ($sock->can('stop_SSL') || $self->compressed) {
		return "$tag BAD TLS or compression already enabled\r\n";
	}
	my $opt = $self->{imapd}->{accept_tls} or
		return "$tag BAD can not initiate TLS negotiation\r\n";
	$self->write(\"$tag OK begin TLS negotiation now\r\n");
	$self->{sock} = IO::Socket::SSL->start_SSL($sock, %$opt);
	$self->requeue if PublicInbox::DS::accept_tls_step($self);
	undef;
}

# for graceful shutdown in PublicInbox::Daemon:
sub busy {
	my ($self, $now) = @_;
	if (defined($self->{-idle_tag})) {
		$self->write(\"* BYE server shutting down\r\n");
		return; # not busy anymore
	}
	($self->{rbuf} || $self->{wbuf} || $self->not_idle_long($now));
}

sub close {
	my ($self) = @_;
	if (my $ibx = delete $self->{ibx}) {
		stop_idle($self, $ibx);
	}
	$self->SUPER::close; # PublicInbox::DS::close
}

# we're read-only, so SELECT and EXAMINE do the same thing
no warnings 'once';
*cmd_select = \&cmd_examine;

package PublicInbox::IMAP_preauth;
our @ISA = qw(PublicInbox::IMAP);

sub logged_in { 0 }

1;
public-inbox-1.6.1/lib/PublicInbox/IMAPClient.pm000066400000000000000000000072611377346120300213350ustar00rootroot00000000000000# This library is free software; you can redistribute it and/or modify it
# under the same terms as Perl itself, either Perl version 5.8.0 or, at
# your option, any later version of Perl 5 you may have available.
#
# The license for this file differs from the rest of public-inbox.
#
# Workaround some bugs in upstream Mail::IMAPClient when
# compression is enabled:
# - reference cycle: https://rt.cpan.org/Ticket/Display.html?id=132654
# - read starvation: https://rt.cpan.org/Ticket/Display.html?id=132720
package PublicInbox::IMAPClient;
use strict;
use parent 'Mail::IMAPClient';
use Errno qw(EAGAIN);

# RFC4978 COMPRESS
sub compress {
    my ($self) = @_;

    # BUG? strict check on capability commented out for now...
    #my $can = $self->has_capability("COMPRESS")
    #return undef unless $can and $can eq "DEFLATE";

    $self->_imap_command("COMPRESS DEFLATE") or return undef;

    my $zcl = $self->_load_module("Compress-Zlib") or return undef;

    # give caller control of args if desired
    $self->Compress(
        [
            -WindowBits => -$zcl->MAX_WBITS(),
            -Level      => $zcl->Z_BEST_SPEED()
        ]
    ) unless ( $self->Compress and ref( $self->Compress ) eq "ARRAY" );

    my ( $rc, $do, $io );

    ( $do, $rc ) = Compress::Zlib::deflateInit( @{ $self->Compress } );
    unless ( $rc == $zcl->Z_OK ) {
        $self->LastError("deflateInit failed (rc=$rc)");
        return undef;
    }

    ( $io, $rc ) =
      Compress::Zlib::inflateInit( -WindowBits => -$zcl->MAX_WBITS() );
    unless ( $rc == $zcl->Z_OK ) {
        $self->LastError("inflateInit failed (rc=$rc)");
        return undef;
    }

    $self->{Prewritemethod} = sub {
        my ( $self, $string ) = @_;

        my ( $rc, $out1, $out2 );
        ( $out1, $rc ) = $do->deflate($string);
        ( $out2, $rc ) = $do->flush( $zcl->Z_PARTIAL_FLUSH() )
          unless ( $rc != $zcl->Z_OK );

        unless ( $rc == $zcl->Z_OK ) {
            $self->LastError("deflate/flush failed (rc=$rc)");
            return undef;
        }

        return $out1 . $out2;
    };

    # need to retain some state for Readmoremethod/Readmethod calls
    my ( $Zbuf, $Ibuf ) = ( "", "" );

    $self->{Readmoremethod} = sub {
        my $self = shift;
        return 1 if ( length($Zbuf) || length($Ibuf) );
        $self->__read_more(@_);
    };

    $self->{Readmethod} = sub {
        my ( $self, $fh, $buf, $len, $off ) = @_;

        # get more data, but empty $Ibuf first if any data is left
        my ( $lz, $li ) = ( length $Zbuf, length $Ibuf );
        if ( $lz || !$li ) {
            my $readlen = $self->Buffer || 4096;
            my $ret = sysread( $fh, $Zbuf, $readlen, length $Zbuf );
            $lz = length $Zbuf;
            return $ret if ( !$ret && !$lz );    # $ret is undef or 0
        }

        # accumulate inflated data in $Ibuf
        if ($lz) {
            my ( $tbuf, $rc ) = $io->inflate( \$Zbuf );
            unless ( $rc == $zcl->Z_OK ) {
                $self->LastError("inflate failed (rc=$rc)");
                return undef;
            }
            $Ibuf .= $tbuf;
            $li = length $Ibuf;
        }

        if ( !$li ) {
            # note: faking EAGAIN here is only safe with level-triggered
            # I/O readiness notifications (select, poll).  Refactoring
            # callers will be needed in the unlikely case somebody wants
            # to use edge-triggered notifications (EV_CLEAR, EPOLLET).
            $! = EAGAIN;
            return undef;
        }

        # pull desired length of data from $Ibuf
        my $tbuf = substr( $Ibuf, 0, $len );
        substr( $Ibuf, 0, $len ) = "";
        substr( $$buf, $off ) = $tbuf;

        return length $tbuf;
    };

    return $self;
}

1;
public-inbox-1.6.1/lib/PublicInbox/IMAPD.pm000066400000000000000000000064021377346120300202760ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 

# represents an IMAPD (currently a singleton),
# see script/public-inbox-imapd for how it is used
package PublicInbox::IMAPD;
use strict;
use PublicInbox::Config;
use PublicInbox::ConfigIter;
use PublicInbox::InboxIdle;
use PublicInbox::IMAP;
use PublicInbox::DummyInbox;
my $dummy = bless { uidvalidity => 0 }, 'PublicInbox::DummyInbox';

sub new {
	my ($class) = @_;
	bless {
		mailboxes => {},
		err => \*STDERR,
		out => \*STDOUT,
		# accept_tls => { SSL_server => 1, ..., SSL_reuse_ctx => ... }
		# pi_config => PublicInbox::Config
		# idler => PublicInbox::InboxIdle
	}, $class;
}

sub imapd_refresh_ibx { # pi_config->each_inbox cb
	my ($ibx, $imapd) = @_;
	my $ngname = $ibx->{newsgroup} or return;
	if (ref $ngname) {
		warn 'multiple newsgroups not supported: '.
			join(', ', @$ngname). "\n";
		return;
	} elsif ($ngname =~ m![^a-z0-9/_\.\-\~\@\+\=:]! ||
		 $ngname =~ /\.[0-9]+\z/) {
		warn "mailbox name invalid: newsgroup=`$ngname'\n";
		return;
	}
	$ibx->over or return;
	$ibx->{over} = undef;
	my $mm = $ibx->mm or return;
	$ibx->{mm} = undef;

	# RFC 3501 2.3.1.1 -  "A good UIDVALIDITY value to use in
	# this case is a 32-bit representation of the creation
	# date/time of the mailbox"
	defined($ibx->{uidvalidity} = $mm->created_at) or return;
	PublicInbox::IMAP::ensure_slices_exist($imapd, $ibx, $mm->max // 0);

	# preload to avoid fragmentation:
	$ibx->description;
	$ibx->base_url;

	# ensure dummies are selectable
	my $dummies = $imapd->{dummies};
	do {
		$dummies->{$ngname} = $dummy;
	} while ($ngname =~ s/\.[^\.]+\z//);
}

sub imapd_refresh_finalize {
	my ($imapd, $pi_config) = @_;
	my $mailboxes;
	if (my $next = delete $imapd->{imapd_next}) {
		$imapd->{mailboxes} = delete $next->{mailboxes};
		$mailboxes = delete $next->{dummies};
	} else {
		$mailboxes = delete $imapd->{dummies};
	}
	%$mailboxes = (%$mailboxes, %{$imapd->{mailboxes}});
	$imapd->{mailboxes} = $mailboxes;
	$imapd->{inboxlist} = [
		map {
			my $no = $mailboxes->{$_} == $dummy ? '' : 'No';
			my $u = $_; # capitalize "INBOX" for user-familiarity
			$u =~ s/\Ainbox(\.|\z)/INBOX$1/i;
			qq[* LIST (\\Has${no}Children) "." $u\r\n]
		} keys %$mailboxes
	];
	$imapd->{pi_config} = $pi_config;
	if (my $idler = $imapd->{idler}) {
		$idler->refresh($pi_config);
	}
}

sub imapd_refresh_step { # pi_config->iterate_start cb
	my ($pi_config, $section, $imapd) = @_;
	if (defined($section)) {
		return if $section !~ m!\Apublicinbox\.([^/]+)\z!;
		my $ibx = $pi_config->lookup_name($1) or return;
		imapd_refresh_ibx($ibx, $imapd->{imapd_next});
	} else { # undef == "EOF"
		imapd_refresh_finalize($imapd, $pi_config);
	}
}

sub refresh_groups {
	my ($self, $sig) = @_;
	my $pi_config = PublicInbox::Config->new;
	if ($sig) { # SIGHUP is handled through the event loop
		$self->{imapd_next} = { dummies => {}, mailboxes => {} };
		my $iter = PublicInbox::ConfigIter->new($pi_config,
						\&imapd_refresh_step, $self);
		$iter->event_step;
	} else { # initial start is synchronous
		$self->{dummies} = {};
		$pi_config->each_inbox(\&imapd_refresh_ibx, $self);
		imapd_refresh_finalize($self, $pi_config);
	}
}

sub idler_start {
	$_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_config});
}

1;
public-inbox-1.6.1/lib/PublicInbox/IMAPTracker.pm000066400000000000000000000041611377346120300215060ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors 
# License: AGPL-3.0+ 
package PublicInbox::IMAPTracker;
use strict;
use parent qw(PublicInbox::Lock);
use DBI;
use DBD::SQLite;
use PublicInbox::Config;

sub create_tables ($) {
	my ($dbh) = @_;

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS imap_last (
	url VARCHAR PRIMARY KEY NOT NULL,
	uid_validity INTEGER NOT NULL,
	uid INTEGER NOT NULL,
	UNIQUE (url)
)

}

sub dbh_new ($) {
	my ($dbname) = @_;
	my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", '', '', {
		AutoCommit => 1,
		RaiseError => 1,
		PrintError => 0,
		sqlite_use_immediate_transaction => 1,
	});
	$dbh->{sqlite_unicode} = 1;

	# TRUNCATE reduces I/O compared to the default (DELETE).
	# Allow and preserve user-overridden WAL, but don't force it.
	my $jm = $dbh->selectrow_array('PRAGMA journal_mode');
	$dbh->do('PRAGMA journal_mode = TRUNCATE') if $jm ne 'wal';

	create_tables($dbh);
	$dbh;
}

sub get_last ($) {
	my ($self) = @_;
	my $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
SELECT uid_validity, uid FROM imap_last WHERE url = ?

	$sth->execute($self->{url});
	$sth->fetchrow_array;
}

sub update_last ($$$) {
	my ($self, $validity, $last_uid) = @_;
	return unless defined $last_uid;
	my $sth = $self->{dbh}->prepare_cached(<<'');
INSERT OR REPLACE INTO imap_last (url, uid_validity, uid)
VALUES (?, ?, ?)

	$self->lock_acquire;
	my $rv = $sth->execute($self->{url}, $validity, $last_uid);
	$self->lock_release;
	$rv;
}

sub new {
	my ($class, $url) = @_;

	# original name for compatibility with old setups:
	my $dbname = PublicInbox::Config->config_dir() . "/imap.sqlite3";

	# use the new XDG-compliant name for new setups:
	if (!-f $dbname) {
		$dbname = ($ENV{XDG_DATA_HOME} //
			(($ENV{HOME} // '/nonexistent').'/.local/share')) .
			'/public-inbox/imap.sqlite3';
	}
	if (!-f $dbname) {
		require File::Path;
		require File::Basename;
		File::Path::mkpath(File::Basename::dirname($dbname));
	}
	my $self = bless { lock_path => "$dbname.lock", url => $url }, $class;
	$self->lock_acquire;
	$self->{dbh} = dbh_new($dbname);
	$self->lock_release;
	$self;
}

1;
public-inbox-1.6.1/lib/PublicInbox/IMAPdeflate.pm000066400000000000000000000063351377346120300215240ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 
# TODO: reduce duplication from PublicInbox::NNTPdeflate

# RFC 4978
package PublicInbox::IMAPdeflate;
use strict;
use warnings;
use 5.010_001;
use base qw(PublicInbox::IMAP);
use Compress::Raw::Zlib;

my %IN_OPT = (
	-Bufsize => 1024,
	-WindowBits => -15, # RFC 1951
	-AppendOutput => 1,
);

# global deflate context and buffer
my $zbuf = \(my $buf = '');
my $zout;
{
	my $err;
	($zout, $err) = Compress::Raw::Zlib::Deflate->new(
		# nnrpd (INN) and Compress::Raw::Zlib favor MemLevel=9,
		# the zlib C library and git use MemLevel=8 as the default
		# -MemLevel => 9,
		-Bufsize => 65536, # same as nnrpd
		-WindowBits => -15, # RFC 1951
		-AppendOutput => 1,
	);
	$err == Z_OK or die "Failed to initialize zlib deflate stream: $err";
}

sub enable {
	my ($class, $self, $tag) = @_;
	my ($in, $err) = Compress::Raw::Zlib::Inflate->new(%IN_OPT);
	if ($err != Z_OK) {
		$self->err("Inflate->new failed: $err");
		$self->write(\"$tag BAD failed to activate compression\r\n");
		return;
	}
	$self->write(\"$tag OK DEFLATE active\r\n");
	bless $self, $class;
	$self->{zin} = $in;
}

# overrides PublicInbox::NNTP::compressed
sub compressed { 1 }

sub do_read ($$$$) {
	my ($self, $rbuf, $len, $off) = @_;

	my $zin = $self->{zin} or return; # closed
	my $doff;
	my $dbuf = delete($self->{dbuf}) // '';
	$doff = length($dbuf);
	my $r = PublicInbox::DS::do_read($self, \$dbuf, $len, $doff) or return;

	# Workaround inflate bug appending to OOK scalars:
	# 
	# We only have $off if the client is pipelining, and pipelining
	# is where our substr() OOK optimization in event_step makes sense.
	if ($off) {
		my $copy = $$rbuf;
		undef $$rbuf;
		$$rbuf = $copy;
	}

	# assert(length($$rbuf) == $off) as far as NNTP.pm is concerned
	# -ConsumeInput is true, so $dbuf is automatically emptied
	my $err = $zin->inflate($dbuf, $rbuf);
	if ($err == Z_OK) {
		$self->{dbuf} = $dbuf if $dbuf ne '';
		$r = length($$rbuf) and return $r;
		# nothing ready, yet, get more, later
		$self->requeue;
	} else {
		delete $self->{zin};
		$self->close;
	}
	0;
}

# override PublicInbox::DS::msg_more
sub msg_more ($$) {
	my $self = $_[0];

	# $_[1] may be a reference or not for ->deflate
	my $err = $zout->deflate($_[1], $zbuf);
	$err == Z_OK or die "->deflate failed $err";
	1;
}

sub zflush ($) {
	my ($self) = @_;

	my $deflated = $zbuf;
	$zbuf = \(my $next = '');

	my $err = $zout->flush($deflated, Z_FULL_FLUSH);
	$err == Z_OK or die "->flush failed $err";

	# We can still let the lower socket layer do buffering:
	PublicInbox::DS::msg_more($self, $$deflated);
}

# compatible with PublicInbox::DS::write, so $_[1] may be a reference or not
sub write ($$) {
	my $self = $_[0];
	return PublicInbox::DS::write($self, $_[1]) if ref($_[1]) eq 'CODE';

	my $deflated = $zbuf;
	$zbuf = \(my $next = '');

	# $_[1] may be a reference or not for ->deflate
	my $err = $zout->deflate($_[1], $deflated);
	$err == Z_OK or die "->deflate failed $err";
	$err = $zout->flush($deflated, Z_FULL_FLUSH);
	$err == Z_OK or die "->flush failed $err";

	# We can still let the socket layer do buffering:
	PublicInbox::DS::write($self, $deflated);
}

1;
public-inbox-1.6.1/lib/PublicInbox/IMAPsearchqp.pm000066400000000000000000000203161377346120300217210ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 
# IMAP search query parser.  cf RFC 3501

# We currently compile Xapian queries to a string which is fed
# to Xapian's query parser.  However, we may use Xapian-provided
# Query object API to build an optree, instead.
package PublicInbox::IMAPsearchqp;
use strict;
use Parse::RecDescent;
use Time::Local qw(timegm);
use POSIX qw(strftime);
our $q = bless {}, __PACKAGE__; # singleton, reachable in generated P::RD
my @MoY = qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC);
my %MM = map {; $MoY[$_-1] => sprintf('%02u', $_) } (1..12);

# IMAP to Xapian header search key mapping
my %IH2X = (
	SUBJECT => 's:',
	BODY => 'b:',
	# TEXT => undef, # => everything
	FROM => 'f:',
	TO => 't:',
	CC => 'c:',
	# BCC => 'bcc:', # TODO

	# IMAP allows searching arbitrary headers via
	# "HEADER $field_name $string" which gets silly expensive.
	# We only allow the headers we already index.
	'MESSAGE-ID' => 'm:',
	'LIST-ID' => 'l:',
	# KEYWORD # TODO ? dfpre,dfpost,...
);

sub uid_set_xap ($$) {
	my ($self, $seq_set) = @_;
	my @u;
	do {
		my $u = $self->{imap}->range_step(\$seq_set);
		die $u unless ref($u); # break out of the parser on error
		push @u, "uid:$u->[0]..$u->[1]";
	} while ($seq_set);
	push(@{$q->{xap}}, @u > 1 ? '('.join(' OR ', @u).')' : $u[0]);
}

sub xap_only ($;$) {
	my ($self, $query) = @_;
	delete $self->{sql}; # query too complex for over.sqlite3
	push @{$self->{xap}}, $query if defined($query);

	# looks like we can't use SQLite-only, convert SQLite UID
	# ranges to Xapian:
	if (my $uid = delete $self->{uid}) {
		uid_set_xap($self, $_) for @$uid;
	}
	1;
}

sub ih2x {
	my ($self, $field_name, $s) = @_; # $self == $q
	$s =~ /\A"(.*?)"\z/s and $s = $1;

	# AFAIK Xapian can't handle [*"] in probabilistic terms,
	# and it relies on lowercase
	my $xk = defined($field_name) ? ($IH2X{$field_name} // '') : '';
	xap_only($self,
		lc(join(' ', map { qq[$xk"$_"] } split(/[\*"\s]+/, $s))));
	1;
}

sub subq_enter {
	xap_only($q);
	my $old = delete($q->{xap}) // [];
	my $nr = push @{$q->{stack}}, $old;
	die 'BAD deep recursion' if $nr > 10;
	$q->{xap} = [];
}

sub subq_leave {
	my $child = delete $q->{xap};
	my $parent = $q->{xap} = pop @{$q->{stack}};
	push(@$parent, @$child > 1 ? '('.join(' ', @$child).')' : $child->[0]);
	1;
}

sub yyyymmdd ($) {
	my ($item) = @_;
	my ($dd, $mon, $yyyy) = split(/-/, $item->{date}, 3);
	my $mm = $MM{$mon} // die "BAD month: $mon";
	wantarray ? ($yyyy, $mm, sprintf('%02u', $dd))
		: timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
}

sub SENTSINCE {
	my ($self, $item) = @_;
	my ($yyyy, $mm, $dd) = yyyymmdd($item);
	push @{$self->{xap}}, "d:$yyyy$mm$dd..";
	my $sql = $self->{sql} or return 1;
	my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
	$$sql .= " AND ds >= $ds";
}

sub SENTON {
	my ($self, $item) = @_;
	my ($yyyy, $mm, $dd) = yyyymmdd($item);
	my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
	my $end = $ds + 86399; # no leap day
	my $dt_end = strftime('%Y%m%d%H%M%S', gmtime($end));
	push @{$self->{xap}}, "dt:$yyyy$mm$dd"."000000..$dt_end";
	my $sql = $self->{sql} or return 1;
	$$sql .= " AND ds >= $ds AND ds <= $end";
}

sub SENTBEFORE {
	my ($self, $item) = @_;
	my ($yyyy, $mm, $dd) = yyyymmdd($item);
	push @{$self->{xap}}, "d:..$yyyy$mm$dd";
	my $sql = $self->{sql} or return 1;
	my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
	$$sql .= " AND ds <= $ds";
}

sub ON {
	my ($self, $item) = @_;
	my $ts = yyyymmdd($item);
	my $end = $ts + 86399; # no leap day
	push @{$self->{xap}}, "ts:$ts..$end";
	my $sql = $self->{sql} or return 1;
	$$sql .= " AND ts >= $ts AND ts <= $end";
}

sub BEFORE {
	my ($self, $item) = @_;
	my $ts = yyyymmdd($item);
	push @{$self->{xap}}, "ts:..$ts";
	my $sql = $self->{sql} or return 1;
	$$sql .= " AND ts <= $ts";
}

sub SINCE {
	my ($self, $item) = @_;
	my $ts = yyyymmdd($item);
	push @{$self->{xap}}, "ts:$ts..";
	my $sql = $self->{sql} or return 1;
	$$sql .= " AND ts >= $ts";
}

sub uid_set ($$) {
	my ($self, $seq_set) = @_;
	if ($self->{sql}) {
		push @{$q->{uid}}, $seq_set;
	} else { # we've gone Xapian-only
		uid_set_xap($self, $seq_set);
	}
	1;
}

sub msn_set {
	my ($self, $seq_set) = @_;
	PublicInbox::IMAP::msn_to_uid_range(
		$self->{msn2uid} //= $self->{imap}->msn2uid, $seq_set);
	uid_set($self, $seq_set);
}

# things that should not match
sub impossible {
	my ($self) = @_;
	push @{$self->{xap}}, 'bytes:..0';
	my $sql = $self->{sql} or return 1;
	$$sql .= ' AND num < 0';
}

my $prd = Parse::RecDescent->new(<<'EOG');

{ my $q = $PublicInbox::IMAPsearchqp::q; }
search_key : CHARSET(?) search_key1(s) { $return = $q }

# n.b. we silently ignore most per-message flags right now;
# they're here for now to not dump parser errors.
search_key1 : "ALL" | "ANSWERED" | "RECENT" | "UNSEEN" | "SEEN" | "NEW"
	| "UNANSWERED" | "UNDELETED" | "UNDRAFT" | "UNFLAGGED"
	| DELETED | DRAFT | FLAGGED | OLD
	| OR_search_keys
	| NOT_search_key
	| LARGER_number
	| SMALLER_number
	| SENTSINCE_date
	| SENTON_date
	| SENTBEFORE_date
	| SINCE_date
	| ON_date
	| BEFORE_date
	| FROM_string
	| HEADER_field_name_string
	| TO_string
	| CC_string
	| BCC_string
	| SUBJECT_string
	| BODY_string
	| TEXT_string
	| UID_set
	| MSN_set
	| sub_query
	| 

charset : /\S+/
CHARSET : 'CHARSET' charset
{ $item{charset} =~ /\A(?:UTF-8|US-ASCII)\z/ ? 1 : die('NO [BADCHARSET]'); }

SENTSINCE_date : 'SENTSINCE' date { $q->SENTSINCE(\%item) }
SENTON_date : 'SENTON' date { $q->SENTON(\%item) }
SENTBEFORE_date : 'SENTBEFORE' date { $q->SENTBEFORE(\%item) }

SINCE_date : 'SINCE' date { $q->SINCE(\%item) }
ON_date : 'ON' date { $q->ON(\%item) }
BEFORE_date : 'BEFORE' date { $q->BEFORE(\%item) }

MSN_set : sequence_set { $q->msn_set($item{sequence_set}) }
UID_set : "UID" sequence_set { $q->uid_set($item{sequence_set}) }
LARGER_number : "LARGER" number { $q->xap_only("bytes:$item{number}..") }
SMALLER_number : "SMALLER" number { $q->xap_only("bytes:..$item{number}") }

DELETED : "DELETED" { $q->impossible }
OLD : "OLD" { $q->impossible }
FLAGGED : "FLAGGED" { $q->impossible }
DRAFT : "DRAFT" { $q->impossible }

# pass "NOT" through XXX is this right?
OP_NOT : "NOT" { $q->xap_only('NOT') }
NOT_search_key : OP_NOT search_key1
OP_OR : "OR" {
	$q->xap_only('OP_OR');
	my $cur = delete $q->{xap};
	push @{$q->{stack}}, $cur;
	$q->{xap} = [];
}
search_key_a : search_key1
{
	my $ka = delete $q->{xap};
	$q->{xap} = [];
	push @{$q->{stack}}, $ka;
}
OR_search_keys : OP_OR search_key_a search_key1
{
	my $kb = delete $q->{xap};
	my $ka = pop @{$q->{stack}};
	my $xap = $q->{xap} = pop @{$q->{stack}};
	my $op = pop @$xap;
	$op eq 'OP_OR' or die "BAD expected OR: $op";
	$ka = @$ka > 1 ? '('.join(' ', @$ka).')' : $ka->[0];
	$kb = @$kb > 1 ? '('.join(' ', @$kb).')' : $kb->[0];
	push @$xap, "($ka OR $kb)";
}
HEADER_field_name_string : "HEADER" field_name string
{
	$q->ih2x($item{field_name}, $item{string});
}
FROM_string : "FROM" string { $q->ih2x('FROM', $item{string}) }
TO_string : "TO" string { $q->ih2x('TO', $item{string}) }
CC_string : "CC" string { $q->ih2x('CC', $item{string}) }
BCC_string : "BCC" string { $q->ih2x('BCC', $item{string}) }
SUBJECT_string : "SUBJECT" string { $q->ih2x('SUBJECT', $item{string}) }
BODY_string : "BODY" string { $q->ih2x('BODY', $item{string}) }
TEXT_string : "TEXT" string { $q->ih2x(undef, $item{string}) }
op_subq_enter : '(' { $q->subq_enter }
sub_query : op_subq_enter search_key1(s) ')' { $q->subq_leave }

field_name : /[\x21-\x39\x3b-\x7e]+/
string : quoted | literal
literal : /[^"\(\) \t]+/ # bogus, I know
quoted : /"[^"]*"/
number : /[0-9]+/
date : /[0123]?[0-9]-[A-Z]{3}-[0-9]{4,}/
sequence_set : /\A[0-9][0-9,:]*[0-9\*]?\z/
EOG

sub parse {
	my ($imap, $query) = @_;
	my $sql = '';
	%$q = (sql => \$sql, imap => $imap); # imap = PublicInbox::IMAP obj
	# $::RD_TRACE = 1;
	my $res = eval { $prd->search_key(uc($query)) };
	return $@ if $@ && $@ =~ /\A(?:BAD|NO) /;
	return 'BAD unexpected result' if !$res || $res != $q;
	if (exists $q->{sql}) {
		delete $q->{xap};
		if (my $uid = delete $q->{uid}) {
			my @u;
			for my $uid_set (@$uid) {
				my $u = $q->{imap}->range_step(\$uid_set);
				return $u if !ref($u);
				push @u, "num >= $u->[0] AND num <= $u->[1]";
			}
			$sql .= ' AND ('.join(' OR ', @u).')';
		}
	} else {
		$q->{xap} = join(' ', @{$q->{xap}});
	}
	delete @$q{qw(imap msn2uid)};
	$q;
}

1
public-inbox-1.6.1/lib/PublicInbox/IdxStack.pm000066400000000000000000000024671377346120300211650ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 

# temporary stack for public-inbox-index
package PublicInbox::IdxStack;
use v5.10.1;
use strict;
use Fcntl qw(:seek);
use constant FMT => eval { pack('Q', 1) } ? 'A1QQH*' : 'A1IIH*';

# start off in write-only mode
sub new {
	open(my $io, '+>', undef) or die "open: $!";
	bless { wr => $io, latest_cmt => $_[1] }, __PACKAGE__
}

# file_char = [d|m]
sub push_rec {
	my ($self, $file_char, $at, $ct, $blob_oid) = @_;
	my $rec = pack(FMT, $file_char, $at, $ct, $blob_oid);
	$self->{rec_size} //= length($rec);
	print { $self->{wr} } $rec or die "print: $!";
	$self->{tot_size} += length($rec);
}

sub num_records {
	my ($self) = @_;
	$self->{rec_size} ? $self->{tot_size} / $self->{rec_size} : 0;
}

# switch into read-only mode and returns self
sub read_prepare {
	my ($self) = @_;
	my $io = $self->{rd} = delete($self->{wr});
	$io->flush or die "flush: $!";
	$self;
}

sub pop_rec {
	my ($self) = @_;
	my $sz = $self->{rec_size} or return;
	my $rec_pos = $self->{tot_size} -= $sz;
	return if $rec_pos < 0;
	my $io = $self->{rd};
	seek($io, $rec_pos, SEEK_SET) or die "seek: $!";
	my $r = read($io, my $buf, $sz);
	defined($r) or die "read: $!";
	$r == $sz or die "read($r != $sz)";
	unpack(FMT, $buf);
}

1;
public-inbox-1.6.1/lib/PublicInbox/Import.pm000066400000000000000000000475471377346120300207350ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 
#
# git fast-import-based ssoma-mda MDA replacement
# This is only ever run by public-inbox-mda, public-inbox-learn
# and public-inbox-watch. Not the WWW or NNTP code which only
# requires read-only access.
package PublicInbox::Import;
use strict;
use parent qw(PublicInbox::Lock);
use v5.10.1;
use PublicInbox::Spawn qw(spawn popen_rd);
use PublicInbox::MID qw(mids mid2path);
use PublicInbox::Address;
use PublicInbox::Smsg;
use PublicInbox::MsgTime qw(msg_datestamp);
use PublicInbox::ContentHash qw(content_digest);
use PublicInbox::MDA;
use PublicInbox::Eml;
use POSIX qw(strftime);

sub new {
	# we can't change arg order, this is documented in POD
	# and external projects may rely on it:
	my ($class, $git, $name, $email, $ibx) = @_;
	my $ref = 'refs/heads/master';
	if ($ibx) {
		$ref = $ibx->{ref_head} // 'refs/heads/master';
		$name //= $ibx->{name};
		$email //= $ibx->{-primary_address};
		$git //= $ibx->git;
	}
	bless {
		git => $git,
		ident => "$name <$email>",
		mark => 1,
		ref => $ref,
		ibx => $ibx,
		path_type => '2/38', # or 'v2'
		lock_path => "$git->{git_dir}/ssoma.lock", # v2 changes this
		bytes_added => 0,
	}, $class
}

# idempotent start function
sub gfi_start {
	my ($self) = @_;

	return ($self->{in}, $self->{out}) if $self->{pid};

	my (@ret, $out_r, $out_w);
	pipe($out_r, $out_w) or die "pipe failed: $!";

	$self->lock_acquire;
	eval {
		my ($git, $ref) = @$self{qw(git ref)};
		local $/ = "\n";
		chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $ref));
		if ($self->{path_type} ne '2/38' && $self->{tip}) {
			local $/ = "\0";
			my @t = $git->qx(qw(ls-tree -r -z --name-only), $ref);
			chomp @t;
			$self->{-tree} = { map { $_ => 1 } @t };
		}
		my @cmd = ('git', "--git-dir=$git->{git_dir}",
			qw(fast-import --quiet --done --date-format=raw));
		my ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r });
		$out_w->autoflush(1);
		$self->{in} = $in_r;
		$self->{out} = $out_w;
		$self->{pid} = $pid;
		$self->{nchg} = 0;
		@ret = ($in_r, $out_w);
	};
	if ($@) {
		$self->lock_release;
		die $@;
	}
	@ret;
}

sub wfail () { die "write to fast-import failed: $!" }

sub now_raw () { time . ' +0000' }

sub norm_body ($) {
	my ($mime) = @_;
	my $b = $mime->body_raw;
	$b =~ s/(\r?\n)+\z//s;
	$b
}

# only used for v1 (ssoma) inboxes
sub _check_path ($$$$) {
	my ($r, $w, $tip, $path) = @_;
	return if $tip eq '';
	print $w "ls $tip $path\n" or wfail;
	local $/ = "\n";
	defined(my $info = <$r>) or die "EOF from fast-import: $!";
	$info =~ /\Amissing / ? undef : $info;
}

sub _cat_blob ($$$) {
	my ($r, $w, $oid) = @_;
	print $w "cat-blob $oid\n" or wfail;
	local $/ = "\n";
	my $info = <$r>;
	defined $info or die "EOF from fast-import / cat-blob: $!";
	$info =~ /\A[a-f0-9]{40,} blob ([0-9]+)\n\z/ or return;
	my $left = $1;
	my $offset = 0;
	my $buf = '';
	my $n;
	while ($left > 0) {
		$n = read($r, $buf, $left, $offset);
		defined($n) or die "read cat-blob failed: $!";
		$n == 0 and die 'fast-export (cat-blob) died';
		$left -= $n;
		$offset += $n;
	}
	$n = read($r, my $lf, 1);
	defined($n) or die "read final byte of cat-blob failed: $!";
	die "bad read on final byte: <$lf>" if $lf ne "\n";

	# fixup some bugginess in old versions:
	$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
	\$buf;
}

sub cat_blob {
	my ($self, $oid) = @_;
	my ($r, $w) = $self->gfi_start;
	_cat_blob($r, $w, $oid);
}

sub check_remove_v1 {
	my ($r, $w, $tip, $path, $mime) = @_;

	my $info = _check_path($r, $w, $tip, $path) or return ('MISSING',undef);
	$info =~ m!\A100644 blob ([a-f0-9]{40,})\t!s or die "not blob: $info";
	my $oid = $1;
	my $msg = _cat_blob($r, $w, $oid) or die "BUG: cat-blob $1 failed";
	my $cur = PublicInbox::Eml->new($msg);
	my $cur_s = $cur->header('Subject');
	$cur_s = '' unless defined $cur_s;
	my $cur_m = $mime->header('Subject');
	$cur_m = '' unless defined $cur_m;
	if ($cur_s ne $cur_m || norm_body($cur) ne norm_body($mime)) {
		return ('MISMATCH', $cur);
	}
	(undef, $cur);
}

sub checkpoint {
	my ($self) = @_;
	return unless $self->{pid};
	print { $self->{out} } "checkpoint\n" or wfail;
	undef;
}

sub progress {
	my ($self, $msg) = @_;
	return unless $self->{pid};
	print { $self->{out} } "progress $msg\n" or wfail;
	readline($self->{in}) eq "progress $msg\n" or die
		"progress $msg not received\n";
	undef;
}

sub _update_git_info ($$) {
	my ($self, $do_gc) = @_;
	# for compatibility with existing ssoma installations
	# we can probably remove this entirely by 2020
	my $git_dir = $self->{git}->{git_dir};
	my @cmd = ('git', "--git-dir=$git_dir");
	my $index = "$git_dir/ssoma.index";
	if (-e $index && !$ENV{FAST}) {
		my $env = { GIT_INDEX_FILE => $index };
		run_die([@cmd, qw(read-tree -m -v -i), $self->{ref}], $env);
	}
	eval { run_die([@cmd, 'update-server-info']) };
	my $ibx = $self->{ibx};
	if ($ibx && $ibx->version == 1 && -d "$ibx->{inboxdir}/public-inbox" &&
				eval { require PublicInbox::SearchIdx }) {
		eval {
			my $s = PublicInbox::SearchIdx->new($ibx);
			$s->index_sync({ ref => $self->{ref} });
		};
		warn "$ibx->{inboxdir} index failed: $@\n" if $@;
	}
	eval { run_die([@cmd, qw(gc --auto)]) } if $do_gc;
}

sub barrier {
	my ($self) = @_;

	# For safety, we ensure git checkpoint is complete before because
	# the data in git is still more important than what is in Xapian
	# in v2.  Performance may be gained by delaying the ->progress
	# call but we lose safety
	if ($self->{nchg}) {
		$self->checkpoint;
		$self->progress('checkpoint');
		_update_git_info($self, 0);
		$self->{nchg} = 0;
	}
}

# used for v2
sub get_mark {
	my ($self, $mark) = @_;
	die "not active\n" unless $self->{pid};
	my ($r, $w) = $self->gfi_start;
	print $w "get-mark $mark\n" or wfail;
	defined(my $oid = <$r>) or die "get-mark failed, need git 2.6.0+\n";
	chomp($oid);
	$oid;
}

# returns undef on non-existent
# ('MISMATCH', PublicInbox::Eml) on mismatch
# (:MARK, PublicInbox::Eml) on success
#
# v2 callers should check with Xapian before calling this as
# it is not idempotent.
sub remove {
	my ($self, $mime, $msg) = @_; # mime = PublicInbox::Eml or Email::MIME

	my $path_type = $self->{path_type};
	my ($path, $err, $cur, $blob);

	my ($r, $w) = $self->gfi_start;
	my $tip = $self->{tip};
	if ($path_type eq '2/38') {
		$path = mid2path(v1_mid0($mime));
		($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime);
		return ($err, $cur) if $err;
	} else {
		my $sref;
		if (ref($mime) eq 'SCALAR') { # optimization used by V2Writable
			$sref = $mime;
		} else { # XXX should not be necessary:
			my $str = $mime->as_string;
			$sref = \$str;
		}
		my $len = length($$sref);
		$blob = $self->{mark}++;
		print $w "blob\nmark :$blob\ndata $len\n",
			$$sref, "\n" or wfail;
	}

	my $ref = $self->{ref};
	my $commit = $self->{mark}++;
	my $parent = $tip =~ /\A:/ ? $tip : undef;
	unless ($parent) {
		print $w "reset $ref\n" or wfail;
	}
	my $ident = $self->{ident};
	my $now = now_raw();
	$msg //= 'rm';
	my $len = length($msg) + 1;
	print $w "commit $ref\nmark :$commit\n",
		"author $ident $now\n",
		"committer $ident $now\n",
		"data $len\n$msg\n\n",
		'from ', ($parent ? $parent : $tip), "\n" or wfail;
	if (defined $path) {
		print $w "D $path\n\n" or wfail;
	} else {
		clean_tree_v2($self, $w, 'd');
		print $w "M 100644 :$blob d\n\n" or wfail;
	}
	$self->{nchg}++;
	(($self->{tip} = ":$commit"), $cur);
}

sub git_timestamp ($) {
	my ($ts, $zone) = @{$_[0]};
	$ts = 0 if $ts < 0; # git uses unsigned times
	"$ts $zone";
}

sub extract_cmt_info ($;$) {
	my ($mime, $smsg) = @_;
	# $mime is PublicInbox::Eml, but remains Email::MIME-compatible
	$smsg //= bless {}, 'PublicInbox::Smsg';

	$smsg->populate($mime);

	my $sender = '';
	my $from = delete($smsg->{From}) // '';
	my ($email) = PublicInbox::Address::emails($from);
	my ($name) = PublicInbox::Address::names($from);
	if (!defined($name) || !defined($email)) {
		$sender = $mime->header('Sender') // '';
		$name //= (PublicInbox::Address::names($sender))[0];
		$email //= (PublicInbox::Address::emails($sender))[0];
	}
	if (defined $email) {
		# Email::Address::XS may leave quoted '<' in addresses,
		# which git-fast-import doesn't like
		$email =~ tr/<>//d;

		# quiet down wide character warnings with utf8::encode
		utf8::encode($email);
	} else {
		$email = '';
		warn "no email in From: $from or Sender: $sender\n";
	}

	# git gets confused with:
	#  "'A U Thor ' via foo" 
	# ref:
	# 
	if (defined $name) {
		$name =~ tr/<>//d;
		utf8::encode($name);
	} else {
		$name = '';
		warn "no name in From: $from or Sender: $sender\n";
	}

	my $subject = delete($smsg->{Subject}) // '(no subject)';
	utf8::encode($subject);
	my $at = git_timestamp(delete $smsg->{-ds});
	my $ct = git_timestamp(delete $smsg->{-ts});
	("$name <$email>", $at, $ct, $subject);
}

# kill potentially confusing/misleading headers
our @UNWANTED_HEADERS = (qw(Bytes Lines Content-Length),
			qw(Status X-Status));
sub drop_unwanted_headers ($) {
	my ($eml) = @_;
	for (@UNWANTED_HEADERS, @PublicInbox::MDA::BAD_HEADERS) {
		$eml->header_set($_);
	}
}

# used by V2Writable, too
sub append_mid ($$) {
	my ($hdr, $mid0) = @_;
	# @cur is likely empty if we need to call this sub, but it could
	# have random unparseable crap which we'll preserve, too.
	my @cur = $hdr->header_raw('Message-ID');
	$hdr->header_set('Message-ID', @cur, "<$mid0>");
}

sub v1_mid0 ($) {
	my ($eml) = @_;
	my $mids = mids($eml);

	if (!scalar(@$mids)) { # spam often has no Message-ID
		my $mid0 = digest2mid(content_digest($eml), $eml);
		append_mid($eml, $mid0);
		return $mid0;
	}
	$mids->[0];
}
sub clean_tree_v2 ($$$) {
	my ($self, $w, $keep) = @_;
	my $tree = $self->{-tree} or return; #v2 only
	delete $tree->{$keep};
	foreach (keys %$tree) {
		print $w "D $_\n" or wfail;
	}
	%$tree = ($keep => 1);
}

# returns undef on duplicate
# returns the :MARK of the most recent commit
sub add {
	my ($self, $mime, $check_cb, $smsg) = @_;

	my ($author, $at, $ct, $subject) = extract_cmt_info($mime, $smsg);
	my $path_type = $self->{path_type};
	my $path;
	if ($path_type eq '2/38') {
		$path = mid2path(v1_mid0($mime));
	} else { # v2 layout, one file:
		$path = 'm';
	}

	my ($r, $w) = $self->gfi_start;
	my $tip = $self->{tip};
	if ($path_type eq '2/38') {
		_check_path($r, $w, $tip, $path) and return;
	}

	drop_unwanted_headers($mime);

	# spam check:
	if ($check_cb) {
		$mime = $check_cb->($mime, $self->{ibx}) or return;
	}

	my $blob = $self->{mark}++;
	my $raw_email = $mime->{-public_inbox_raw} // $mime->as_string;
	my $n = length($raw_email);
	$self->{bytes_added} += $n;
	print $w "blob\nmark :$blob\ndata ", $n, "\n" or wfail;
	print $w $raw_email, "\n" or wfail;

	# v2: we need this for Xapian
	if ($smsg) {
		$smsg->{blob} = $self->get_mark(":$blob");
		$smsg->{raw_bytes} = $n;
		$smsg->{-raw_email} = \$raw_email;
	}
	my $ref = $self->{ref};
	my $commit = $self->{mark}++;
	my $parent = $tip =~ /\A:/ ? $tip : undef;

	unless ($parent) {
		print $w "reset $ref\n" or wfail;
	}

	print $w "commit $ref\nmark :$commit\n",
		"author $author $at\n",
		"committer $self->{ident} $ct\n" or wfail;
	print $w "data ", (length($subject) + 1), "\n",
		$subject, "\n\n" or wfail;
	if ($tip ne '') {
		print $w 'from ', ($parent ? $parent : $tip), "\n" or wfail;
	}
	clean_tree_v2($self, $w, $path);
	print $w "M 100644 :$blob $path\n\n" or wfail;
	$self->{nchg}++;
	$self->{tip} = ":$commit";
}

sub run_die ($;$$) {
	my ($cmd, $env, $rdr) = @_;
	my $pid = spawn($cmd, $env, $rdr);
	waitpid($pid, 0) == $pid or die join(' ', @$cmd) .' did not finish';
	$? == 0 or die join(' ', @$cmd) . " failed: $?\n";
}

my @INIT_FILES = ('HEAD' => "ref: refs/heads/master\n",
		'description' => < <{git}->{git_dir} if ref($dir);
	require File::Path;
	File::Path::mkpath([ map { "$dir/$_" } qw(objects/info refs/heads) ]);
	for (my $i = 0; $i < @INIT_FILES; $i++) {
		my $f = $dir.'/'.$INIT_FILES[$i++];
		next if -f $f;
		open my $fh, '>', $f or die "open $f: $!";
		print $fh $INIT_FILES[$i] or die "print $f: $!";
		close $fh or die "close $f: $!";
	}
}

# true if locked and active
sub active { !!$_[0]->{out} }

sub done {
	my ($self) = @_;
	my $w = delete $self->{out} or return;
	eval {
		my $r = delete $self->{in} or die 'BUG: missing {in} when done';
		print $w "done\n" or wfail;
		my $pid = delete $self->{pid} or
				die 'BUG: missing {pid} when done';
		waitpid($pid, 0) == $pid or die 'fast-import did not finish';
		$? == 0 or die "fast-import failed: $?";
	};
	my $wait_err = $@;
	my $nchg = delete $self->{nchg};
	if ($nchg && !$wait_err) {
		eval { _update_git_info($self, 1) };
		warn "E: $self->{git}->{git_dir} update info: $@\n" if $@;
	}
	$self->lock_release(!!$nchg);
	$self->{git}->cleanup;
	die $wait_err if $wait_err;
}

sub atfork_child {
	my ($self) = @_;
	foreach my $f (qw(in out)) {
		next unless defined($self->{$f});
		close $self->{$f} or die "failed to close import[$f]: $!\n";
	}
}

sub digest2mid ($$) {
	my ($dig, $hdr) = @_;
	my $b64 = $dig->clone->b64digest;
	# Make our own URLs nicer:
	# See "Base 64 Encoding with URL and Filename Safe Alphabet" in RFC4648
	$b64 =~ tr!+/=!-_!d;

	# Add a date prefix to prevent a leading '-' in case that trips
	# up some tools (e.g. if a Message-ID were a expected as a
	# command-line arg)
	my $dt = msg_datestamp($hdr);
	$dt = POSIX::strftime('%Y%m%d%H%M%S', gmtime($dt));
	"$dt.$b64" . '@z';
}

sub rewrite_commit ($$$$) {
	my ($self, $oids, $buf, $mime) = @_;
	my ($author, $at, $ct, $subject);
	if ($mime) {
		($author, $at, $ct, $subject) = extract_cmt_info($mime);
	} else {
		$author = '<>';
		$subject = 'purged '.join(' ', @$oids);
	}
	@$oids = ();
	$subject .= "\n";
	foreach my $i (0..$#$buf) {
		my $l = $buf->[$i];
		if ($l =~ /^author .* ([0-9]+ [\+-]?[0-9]+)$/) {
			$at //= $1;
			$buf->[$i] = "author $author $at\n";
		} elsif ($l =~ /^committer .* ([0-9]+ [\+-]?[0-9]+)$/) {
			$ct //= $1;
			$buf->[$i] = "committer $self->{ident} $ct\n";
		} elsif ($l =~ /^data ([0-9]+)/) {
			$buf->[$i++] = "data " . length($subject) . "\n";
			$buf->[$i] = $subject;
			last;
		}
	}
}

# returns the new commit OID if a replacement was done
# returns undef if nothing was done
sub replace_oids {
	my ($self, $mime, $replace_map) = @_; # oid => raw string
	my $tmp = "refs/heads/replace-".((keys %$replace_map)[0]);
	my $old = $self->{'ref'};
	my $git = $self->{git};
	my @export = (qw(fast-export --no-data --use-done-feature), $old);
	my $rd = $git->popen(@export);
	my ($r, $w) = $self->gfi_start;
	my @buf;
	my $nreplace = 0;
	my @oids;
	my ($done, $mark);
	my $tree = $self->{-tree};
	while (<$rd>) {
		if (/^reset (?:.+)/) {
			push @buf, "reset $tmp\n";
		} elsif (/^commit (?:.+)/) {
			if (@buf) {
				print $w @buf or wfail;
				@buf = ();
			}
			push @buf, "commit $tmp\n";
		} elsif (/^data ([0-9]+)/) {
			# only commit message, so $len is small:
			my $len = $1; # + 1 for trailing "\n"
			push @buf, $_;
			my $n = read($rd, my $buf, $len) or die "read: $!";
			$len == $n or die "short read ($n < $len)";
			push @buf, $buf;
		} elsif (/^M 100644 ([a-f0-9]+) (\w+)/) {
			my ($oid, $path) = ($1, $2);
			$tree->{$path} = 1;
			my $sref = $replace_map->{$oid};
			if (defined $sref) {
				push @oids, $oid;
				my $n = length($$sref);
				push @buf, "M 100644 inline $path\ndata $n\n";
				push @buf, $$sref; # hope CoW works...
				push @buf, "\n";
			} else {
				push @buf, $_;
			}
		} elsif (/^D (\w+)/) {
			my $path = $1;
			push @buf, $_ if $tree->{$path};
		} elsif ($_ eq "\n") {
			if (@oids) {
				if (!$mime) {
					my $out = join('', @buf);
					$out =~ s/^/# /sgm;
					warn "purge rewriting\n", $out, "\n";
				}
				rewrite_commit($self, \@oids, \@buf, $mime);
				$nreplace++;
			}
			print $w @buf, "\n" or wfail;
			@buf = ();
		} elsif ($_ eq "done\n") {
			$done = 1;
		} elsif (/^mark :([0-9]+)$/) {
			push @buf, $_;
			$mark = $1;
		} else {
			push @buf, $_;
		}
	}
	close $rd or die "close fast-export failed: $?";
	if (@buf) {
		print $w @buf or wfail;
	}
	die 'done\n not seen from fast-export' unless $done;
	chomp(my $cmt = $self->get_mark(":$mark")) if $nreplace;
	$self->{nchg} = 0; # prevent _update_git_info until update-ref:
	$self->done;
	my @git = ('git', "--git-dir=$git->{git_dir}");

	run_die([@git, qw(update-ref), $old, $tmp]) if $nreplace;

	run_die([@git, qw(update-ref -d), $tmp]);

	return if $nreplace == 0;

	run_die([@git, qw(-c gc.reflogExpire=now gc --prune=all --quiet)]);

	# check that old OIDs are gone
	my $err = 0;
	foreach my $oid (keys %$replace_map) {
		my @info = $git->check($oid);
		if (@info) {
			warn "$oid not replaced\n";
			$err++;
		}
	}
	_update_git_info($self, 0);
	die "Failed to replace $err object(s)\n" if $err;
	$cmt;
}

1;
__END__
=pod

=head1 NAME

PublicInbox::Import - message importer for public-inbox v1 inboxes

=head1 VERSION

version 1.0

=head1 SYNOPSIS

	use PublicInbox::Eml;
	# PublicInbox::Eml exists as of public-inbox 1.5.0,
	# Email::MIME was used in older versions

	use PublicInbox::Git;
	use PublicInbox::Import;

	chomp(my $git_dir = `git rev-parse --git-dir`);
	$git_dir or die "GIT_DIR= must be specified\n";
	my $git = PublicInbox::Git->new($git_dir);
	my @committer = ('inbox', 'inbox@example.org');
	my $im = PublicInbox::Import->new($git, @committer);

	# to add a message:
	my $message = "From: \n".
		"Subject: test message \n" .
		"Date: Thu, 01 Jan 1970 00:00:00 +0000\n" .
		"Message-ID: \n".
		"\ntest message";
	my $parsed = PublicInbox::Eml->new($message);
	my $ret = $im->add($parsed);
	if (!defined $ret) {
		warn "duplicate: ", $parsed->header_raw('Message-ID'), "\n";
	} else {
		print "imported at mark $ret\n";
	}
	$im->done;

	# to remove a message
	my $junk = PublicInbox::Eml->new($message);
	my ($mark, $orig) = $im->remove($junk);
	if ($mark eq 'MISSING') {
		print "not found\n";
	} elsif ($mark eq 'MISMATCH') {
		print "Message exists but does not match\n\n",
			$orig->as_string, "\n",;
	} else {
		print "removed at mark $mark\n\n",
			$orig->as_string, "\n";
	}
	$im->done;

=head1 DESCRIPTION

An importer and remover for public-inboxes which takes C
or L messages as input and stores them in a git repository as
documented in L,
except it does not allow duplicate Message-IDs.

It requires L and L to be installed.

=head1 METHODS

=cut

=head2 new

	my $im = PublicInbox::Import->new($git, @committer);

Initialize a new PublicInbox::Import object.

=head2 add

	my $parsed = PublicInbox::Eml->new($message);
	$im->add($parsed);

Adds a message to to the git repository.  This will acquire
C<$GIT_DIR/ssoma.lock> and start L if necessary.

Messages added will not be visible to other processes until L
is called, but L may be called on them.

=head2 remove

	my $junk = PublicInbox::Eml->new($message);
	my ($code, $orig) = $im->remove($junk);

Removes a message from the repository.  On success, it returns
a ':'-prefixed numeric code representing the git-fast-import
mark and the original messages as a PublicInbox::Eml
(or Email::MIME) object.
If the message could not be found, the code is "MISSING"
and the original message is undef.  If there is a mismatch where
the "Message-ID" is matched but the subject and body do not match,
the returned code is "MISMATCH" and the conflicting message
is returned as orig.

=head2 done

Finalizes the L and unlocks the repository.
Calling this is required to finalize changes to a repository.

=head1 SEE ALSO

L

=head1 CONTACT

All feedback welcome via plain-text mail to L

The mail archives are hosted at L

=head1 COPYRIGHT

Copyright (C) 2016-2020 all contributors L

License: AGPL-3.0+ L

=cut
public-inbox-1.6.1/lib/PublicInbox/In2Tie.pm000066400000000000000000000012461377346120300205370ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 

# used to ensure PublicInbox::DS can call fileno() as a function
# on Linux::Inotify2 objects
package PublicInbox::In2Tie;
use strict;
use Symbol qw(gensym);

sub io {
	my $in2 = $_[0];
	$in2->blocking(0);
	if ($in2->can('on_overflow')) {
		# broadcasts everything on overflow
		$in2->on_overflow(undef);
	}
	my $io = gensym;
	tie *$io, __PACKAGE__, $in2;
	$io;
}

sub TIEHANDLE {
	my ($class, $in2) = @_;
	bless \$in2, $class; # a scalar reference to an existing reference
}

# this calls Linux::Inotify2::fileno
sub FILENO { ${$_[0]}->fileno }

1;
public-inbox-1.6.1/lib/PublicInbox/Inbox.pm000066400000000000000000000242511377346120300205250ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 
#
# Represents a public-inbox (which may have multiple mailing addresses)
package PublicInbox::Inbox;
use strict;
use warnings;
use PublicInbox::Git;
use PublicInbox::MID qw(mid2path);
use PublicInbox::Eml;

# Long-running "git-cat-file --batch" processes won't notice
# unlinked packs, so we need to restart those processes occasionally.
# Xapian and SQLite file handles are mostly stable, but sometimes an
# admin will attempt to replace them atomically after compact/vacuum
# and we need to be prepared for that.
my $cleanup_timer;
my $cleanup_avail = -1; # 0, or 1
my $have_devel_peek;
my $CLEANUP = {}; # string(inbox) -> inbox

sub git_cleanup ($) {
	my ($self) = @_;
	my $git = $self->{git} or return;
	$git->cleanup;
}

sub cleanup_task () {
	$cleanup_timer = undef;
	my $next = {};
	for my $ibx (values %$CLEANUP) {
		my $again;
		if ($have_devel_peek) {
			foreach my $f (qw(search)) {
				# we bump refcnt by assigning tmp, here:
				my $tmp = $ibx->{$f} or next;
				next if Devel::Peek::SvREFCNT($tmp) > 2;
				delete $ibx->{$f};
				# refcnt is zero when tmp is out-of-scope
			}
		}
		git_cleanup($ibx);
		if (my $gits = $ibx->{-repo_objs}) {
			foreach my $git (@$gits) {
				$again = 1 if $git->cleanup;
			}
		}
		check_inodes($ibx);
		if ($have_devel_peek) {
			$again ||= !!$ibx->{search};
		}
		$next->{"$ibx"} = $ibx if $again;
	}
	$CLEANUP = $next;
}

sub cleanup_possible () {
	# no need to require DS, here, if it were enabled another
	# module would've require'd it, already
	eval { PublicInbox::DS::in_loop() } or return 0;

	eval {
		require Devel::Peek; # needs separate package in Fedora
		$have_devel_peek = 1;
	};
	1;
}

sub _cleanup_later ($) {
	my ($self) = @_;
	$cleanup_avail = cleanup_possible() if $cleanup_avail < 0;
	return if $cleanup_avail != 1;
	$cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
	$CLEANUP->{"$self"} = $self;
}

sub _set_uint ($$$) {
	my ($opts, $field, $default) = @_;
	my $val = $opts->{$field};
	if (defined $val) {
		$val = $val->[-1] if ref($val) eq 'ARRAY';
		$val = undef if $val !~ /\A[0-9]+\z/;
	}
	$opts->{$field} = $val || $default;
}

sub _set_limiter ($$$) {
	my ($self, $pi_config, $pfx) = @_;
	my $lkey = "-${pfx}_limiter";
	$self->{$lkey} ||= do {
		# full key is: publicinbox.$NAME.httpbackendmax
		my $mkey = $pfx.'max';
		my $val = $self->{$mkey} or return;
		my $lim;
		if ($val =~ /\A[0-9]+\z/) {
			require PublicInbox::Qspawn;
			$lim = PublicInbox::Qspawn::Limiter->new($val);
		} elsif ($val =~ /\A[a-z][a-z0-9]*\z/) {
			$lim = $pi_config->limiter($val);
			warn "$mkey limiter=$val not found\n" if !$lim;
		} else {
			warn "$mkey limiter=$val not understood\n";
		}
		$lim;
	}
}

sub new {
	my ($class, $opts) = @_;
	my $v = $opts->{address} ||= [ 'public-inbox@example.com' ];
	my $p = $opts->{-primary_address} = ref($v) eq 'ARRAY' ? $v->[0] : $v;
	$opts->{domain} = ($p =~ /\@(\S+)\z/) ? $1 : 'localhost';
	my $pi_config = delete $opts->{-pi_config};
	_set_limiter($opts, $pi_config, 'httpbackend');
	_set_uint($opts, 'feedmax', 25);
	$opts->{nntpserver} ||= $pi_config->{'publicinbox.nntpserver'};
	my $dir = $opts->{inboxdir};
	if (defined $dir && -f "$dir/inbox.lock") {
		$opts->{version} = 2;
	}

	# allow any combination of multi-line or comma-delimited hide entries
	my $hide = {};
	if (defined(my $h = $opts->{hide})) {
		foreach my $v (@$h) {
			$hide->{$_} = 1 foreach (split(/\s*,\s*/, $v));
		}
		$opts->{-hide} = $hide;
	}
	bless $opts, $class;
}

sub version { $_[0]->{version} // 1 }

sub git_epoch {
	my ($self, $epoch) = @_;
	$self->version == 2 or return;
	$self->{"$epoch.git"} ||= do {
		my $git_dir = "$self->{inboxdir}/git/$epoch.git";
		my $g = PublicInbox::Git->new($git_dir);
		$g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
		# no cleanup needed, we never cat-file off this, only clone
		$g;
	};
}

sub git {
	my ($self) = @_;
	$self->{git} ||= do {
		my $git_dir = $self->{inboxdir};
		$git_dir .= '/all.git' if $self->version == 2;
		my $g = PublicInbox::Git->new($git_dir);
		$g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
		_cleanup_later($self);
		$g;
	};
}

sub max_git_epoch {
	my ($self) = @_;
	return if $self->version < 2;
	my $cur = $self->{-max_git_epoch};
	my $changed = git($self)->alternates_changed;
	if (!defined($cur) || $changed) {
		git_cleanup($self) if $changed;
		my $gits = "$self->{inboxdir}/git";
		if (opendir my $dh, $gits) {
			my $max = -1;
			while (defined(my $git_dir = readdir($dh))) {
				$git_dir =~ m!\A([0-9]+)\.git\z! or next;
				$max = $1 if $1 > $max;
			}
			$cur = $self->{-max_git_epoch} = $max if $max >= 0;
		} else {
			warn "opendir $gits failed: $!\n";
		}
	}
	$cur;
}

sub mm {
	my ($self) = @_;
	$self->{mm} ||= eval {
		require PublicInbox::Msgmap;
		my $dir = $self->{inboxdir};
		if ($self->version >= 2) {
			PublicInbox::Msgmap->new_file("$dir/msgmap.sqlite3");
		} else {
			PublicInbox::Msgmap->new($dir);
		}
	};
}

sub search ($;$$) {
	my ($self, $over_only, $ctx) = @_;
	my $srch = $self->{search} ||= eval {
		_cleanup_later($self);
		require PublicInbox::Search;
		PublicInbox::Search->new($self);
	};
	($over_only || eval { $srch->xdb }) ? $srch : do {
		$ctx and $ctx->{env}->{'psgi.errors'}->print(<{name}' search went away unexpectedly
EOF
		undef;
	};
}

sub over {
	$_[0]->{over} //= eval {
		my $srch = search($_[0], 1) or return;
		my $over = PublicInbox::Over->new("$srch->{xpfx}/over.sqlite3");
		$over->dbh; # may fail
		$over;
	};
}

sub try_cat {
	my ($path) = @_;
	my $rv = '';
	if (open(my $fh, '<', $path)) {
		local $/;
		$rv = <$fh>;
	}
	$rv;
}

sub description {
	my ($self) = @_;
	($self->{description} //= do {
		my $desc = try_cat("$self->{inboxdir}/description");
		local $/ = "\n";
		chomp $desc;
		utf8::decode($desc);
		$desc =~ s/\s+/ /smg;
		$desc eq '' ? undef : $desc;
	}) // '($INBOX_DIR/description missing)';
}

sub cloneurl {
	my ($self) = @_;
	($self->{cloneurl} //= do {
		my $s = try_cat("$self->{inboxdir}/cloneurl");
		my @urls = split(/\s+/s, $s);
		scalar(@urls) ? \@urls : undef
	}) // [];
}

sub base_url {
	my ($self, $env) = @_; # env - PSGI env
	if ($env) {
		my $url = PublicInbox::Git::host_prefix_url($env, '');
		# for mount in Plack::Builder
		$url .= '/' if $url !~ m!/\z!;
		return $url .= $self->{name} . '/';
	}
	# called from a non-PSGI environment (e.g. NNTP/POP3):
	$self->{-base_url} ||= do {
		my $url = $self->{url}->[0] or return undef;
		# expand protocol-relative URLs to HTTPS if we're
		# not inside a web server
		$url = "https:$url" if $url =~ m!\A//!;
		$url .= '/' if $url !~ m!/\z!;
		$url;
	};
}

sub nntp_url {
	my ($self) = @_;
	$self->{-nntp_url} ||= do {
		# no checking for nntp_usable here, we can point entirely
		# to non-local servers or users run by a different user
		my $ns = $self->{nntpserver};
		my $group = $self->{newsgroup};
		my @urls;
		if ($ns && $group) {
			$ns = [ $ns ] if ref($ns) ne 'ARRAY';
			@urls = map {
				my $u = m!\Anntps?://! ? $_ : "nntp://$_";
				$u .= '/' if $u !~ m!/\z!;
				$u.$group;
			} @$ns;
		}

		my $mirrors = $self->{nntpmirror};
		if ($mirrors) {
			my @m;
			foreach (@$mirrors) {
				my $u = m!\Anntps?://! ? $_ : "nntp://$_";
				if ($u =~ m!\Anntps?://[^/]+/?\z!) {
					if ($group) {
						$u .= '/' if $u !~ m!/\z!;
						$u .= $group;
					} else {
						warn
"publicinbox.$self->{name}.nntpmirror=$_ missing newsgroup name\n";
					}
				}
				# else: allow full URLs like:
				# nntp://news.example.com/alt.example
				push @m, $u;
			}

			# List::Util::uniq requires Perl 5.26+, maybe we
			# can use it by 2030 or so
			my %seen;
			@urls = grep { !$seen{$_}++ } (@urls, @m);
		}
		\@urls;
	};
}

sub nntp_usable {
	my ($self) = @_;
	my $ret = mm($self) && over($self);
	$self->{mm} = $self->{over} = $self->{search} = undef;
	$ret;
}

# for v1 users w/o SQLite only
sub msg_by_path ($$) {
	my ($self, $path) = @_;
	git($self)->cat_file('HEAD:'.$path);
}

sub msg_by_smsg ($$) {
	my ($self, $smsg) = @_;

	# ghosts may have undef smsg (from SearchThread.node) or
	# no {blob} field
	return unless defined $smsg;
	defined(my $blob = $smsg->{blob}) or return;

	git($self)->cat_file($blob);
}

sub smsg_eml {
	my ($self, $smsg) = @_;
	my $bref = msg_by_smsg($self, $smsg) or return;
	my $eml = PublicInbox::Eml->new($bref);
	$smsg->populate($eml) unless exists($smsg->{num}); # v1 w/o SQLite
	$eml;
}

sub mid2num($$) {
	my ($self, $mid) = @_;
	my $mm = mm($self) or return;
	$mm->num_for($mid);
}

sub smsg_by_mid ($$) {
	my ($self, $mid) = @_;
	my $over = over($self) or return;
	# favor the Message-ID we used for the NNTP article number:
	defined(my $num = mid2num($self, $mid)) or return;
	my $smsg = $over->get_art($num) or return;
	PublicInbox::Smsg::psgi_cull($smsg);
}

sub msg_by_mid ($$) {
	my ($self, $mid) = @_;

	over($self) or
		return msg_by_path($self, mid2path($mid));

	my $smsg = smsg_by_mid($self, $mid);
	$smsg ? msg_by_smsg($self, $smsg) : undef;
}

sub recent {
	my ($self, $opts, $after, $before) = @_;
	over($self)->recent($opts, $after, $before);
}

sub modified {
	my ($self) = @_;
	if (my $over = over($self)) {
		my $msgs = $over->recent({limit => 1});
		if (my $smsg = $msgs->[0]) {
			return $smsg->{ts};
		}
		return time;
	}
	git($self)->modified; # v1
}

# returns prefix => pathname mapping
# (pathname is NOT public, but prefix is used for Xapian queries)
sub altid_map ($) {
	my ($self) = @_;
	$self->{-altid_map} //= eval {
		require PublicInbox::AltId;
		my $altid = $self->{altid} or return {};
		my %h = map {;
			my $x = PublicInbox::AltId->new($self, $_);
			"$x->{prefix}" => $x->{filename}
		} @$altid;
		\%h;
	} // {};
}

# $obj must respond to ->on_inbox_unlock, which takes Inbox ($self) as an arg
sub subscribe_unlock {
	my ($self, $ident, $obj) = @_;
	$self->{unlock_subs}->{$ident} = $obj;
}

sub unsubscribe_unlock {
	my ($self, $ident) = @_;
	delete $self->{unlock_subs}->{$ident};
}

sub check_inodes ($) {
	my ($self) = @_;
	for (qw(over mm)) { # TODO: search
		$self->{$_}->check_inodes if $self->{$_};
	}
}

# called by inotify
sub on_unlock {
	my ($self) = @_;
	check_inodes($self);
	my $subs = $self->{unlock_subs} or return;
	for my $obj (values %$subs) {
		eval { $obj->on_inbox_unlock($self) };
		warn "E: $@ ($self->{inboxdir})\n" if $@;
	}
}

1;
public-inbox-1.6.1/lib/PublicInbox/InboxIdle.pm000066400000000000000000000061321377346120300213210ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 

# fields:
# pi_config: PublicInbox::Config ref
# inot: Linux::Inotify2-like object
# pathmap => { inboxdir => [ ibx, watch1, watch2, watch3... ] } mapping
package PublicInbox::InboxIdle;
use strict;
use parent qw(PublicInbox::DS);
use Cwd qw(abs_path);
use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
my $IN_MODIFY = 0x02; # match Linux inotify
my $ino_cls;
if ($^O eq 'linux' && eval { require Linux::Inotify2; 1 }) {
	$IN_MODIFY = Linux::Inotify2::IN_MODIFY();
	$ino_cls = 'Linux::Inotify2';
} elsif (eval { require PublicInbox::KQNotify }) {
	$IN_MODIFY = PublicInbox::KQNotify::NOTE_WRITE();
	$ino_cls = 'PublicInbox::KQNotify';
}
require PublicInbox::In2Tie if $ino_cls;

sub in2_arm ($$) { # PublicInbox::Config::each_inbox callback
	my ($ibx, $self) = @_;
	my $dir = abs_path($ibx->{inboxdir});
	if (!defined($dir)) {
		warn "W: $ibx->{inboxdir} not watched: $!\n";
		return;
	}
	my $inot = $self->{inot};
	my $cur = $self->{pathmap}->{$dir} //= [];
	my $lock = "$dir/".($ibx->version >= 2 ? 'inbox.lock' : 'ssoma.lock');

	# transfer old subscriptions to the current inbox, cancel the old watch
	my $old_ibx = $cur->[0];
	$cur->[0] = $ibx;
	if ($old_ibx) {
		$ibx->{unlock_subs} and
			die "BUG: $dir->{unlock_subs} should not exist";
		$ibx->{unlock_subs} = $old_ibx->{unlock_subs};

		# Linux::Inotify2::Watch::name matches if watches are the
		# same, no point in replacing a watch of the same name
		if ($cur->[1]->name eq $lock) {
			$self->{on_unlock}->{$lock} = $ibx;
			return;
		}
		# rare, name changed (v1 inbox converted to v2)
		$cur->[1]->cancel; # Linux::Inotify2::Watch::cancel
	}

	if (my $w = $cur->[1] = $inot->watch($lock, $IN_MODIFY)) {
		$self->{on_unlock}->{$w->name} = $ibx;
	} else {
		warn "E: ".ref($inot)."->watch($lock, IN_MODIFY) failed: $!\n";
		if ($!{ENOSPC} && $^O eq 'linux') {
			warn <<"";
I: consider increasing /proc/sys/fs/inotify/max_user_watches

		}
	}

	# TODO: detect deleted packs (and possibly other files)
}

sub refresh {
	my ($self, $pi_config) = @_;
	$pi_config->each_inbox(\&in2_arm, $self);
}

sub new {
	my ($class, $pi_config) = @_;
	my $self = bless {}, $class;
	my $inot;
	if ($ino_cls) {
		$inot = $ino_cls->new or die "E: $ino_cls->new: $!";
		my $io = PublicInbox::In2Tie::io($inot);
		$self->SUPER::new($io, EPOLLIN | EPOLLET);
	} else {
		require PublicInbox::FakeInotify;
		$inot = PublicInbox::FakeInotify->new;
	}
	$self->{inot} = $inot;
	$self->{pathmap} = {}; # inboxdir => [ ibx, watch1, watch2, watch3...]
	$self->{on_unlock} = {}; # lock path => ibx
	refresh($self, $pi_config);
	PublicInbox::FakeInotify::poll_once($self) if !$ino_cls;
	$self;
}

sub event_step {
	my ($self) = @_;
	eval {
		my @events = $self->{inot}->read; # Linux::Inotify2::read
		my $on_unlock = $self->{on_unlock};
		for my $ev (@events) {
			if (my $ibx = $on_unlock->{$ev->fullname}) {
				$ibx->on_unlock;
			}
		}
	};
	warn "{inot}->read err: $@\n" if $@;
}

# for graceful shutdown in PublicInbox::Daemon,
# just ensure the FD gets closed ASAP and subscribers
sub busy { 0 }

1;
public-inbox-1.6.1/lib/PublicInbox/InboxWritable.pm000066400000000000000000000164651377346120300222270ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors 
# License: AGPL-3.0+ 

# Extends read-only Inbox for writing
package PublicInbox::InboxWritable;
use strict;
use v5.10.1;
use parent qw(PublicInbox::Inbox Exporter);
use PublicInbox::Import;
use PublicInbox::Filter::Base qw(REJECT);
use Errno qw(ENOENT);
our @EXPORT_OK = qw(eml_from_path warn_ignore_cb);

use constant {
	PERM_UMASK => 0,
	OLD_PERM_GROUP => 1,
	OLD_PERM_EVERYBODY => 2,
	PERM_GROUP => 0660,
	PERM_EVERYBODY => 0664,
};

sub new {
	my ($class, $ibx, $creat_opt) = @_;
	return $ibx if ref($ibx) eq $class;
	my $self = bless $ibx, $class;

	# TODO: maybe stop supporting this
	if ($creat_opt) { # for { nproc => $N }
		$self->{-creat_opt} = $creat_opt;
		init_inbox($self) if $self->version == 1;
	}
	$self;
}

sub assert_usable_dir {
	my ($self) = @_;
	my $dir = $self->{inboxdir};
	return $dir if defined($dir) && $dir ne '';
	die "no inboxdir defined for $self->{name}\n";
}

sub _init_v1 {
	my ($self, $skip_artnum) = @_;
	if (defined($self->{indexlevel}) || defined($skip_artnum)) {
		require PublicInbox::SearchIdx;
		require PublicInbox::Msgmap;
		my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create
		$sidx->begin_txn_lazy;
		if (defined $skip_artnum) {
			my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1);
			$mm->{dbh}->begin_work;
			$mm->skip_artnum($skip_artnum);
			$mm->{dbh}->commit;
		}
		$sidx->commit_txn_lazy;
	} else {
		open my $fh, '>>', "$self->{inboxdir}/ssoma.lock" or
			die "$self->{inboxdir}/ssoma.lock: $!\n";
	}
}

sub init_inbox {
	my ($self, $shards, $skip_epoch, $skip_artnum) = @_;
	if ($self->version == 1) {
		my $dir = assert_usable_dir($self);
		PublicInbox::Import::init_bare($dir);
		$self->umask_prepare;
		$self->with_umask(\&_init_v1, $self, $skip_artnum);
	} else {
		my $v2w = importer($self);
		$v2w->init_inbox($shards, $skip_epoch, $skip_artnum);
	}
}

sub importer {
	my ($self, $parallel) = @_;
	my $v = $self->version;
	if ($v == 2) {
		eval { require PublicInbox::V2Writable };
		die "v2 not supported: $@\n" if $@;
		my $opt = $self->{-creat_opt};
		my $v2w = PublicInbox::V2Writable->new($self, $opt);
		$v2w->{parallel} = $parallel if defined $parallel;
		$v2w;
	} elsif ($v == 1) {
		my @arg = (undef, undef, undef, $self);
		PublicInbox::Import->new(@arg);
	} else {
		$! = 78; # EX_CONFIG 5.3.5 local configuration error
		die "unsupported inbox version: $v\n";
	}
}

sub filter {
	my ($self, $im) = @_;
	my $f = $self->{filter};
	if ($f && $f =~ /::/) {
		# v2 keeps msgmap open, which causes conflicts for filters
		# such as PublicInbox::Filter::RubyLang which overload msgmap
		# for a predictable serial number.
		if ($im && $self->version >= 2 && $self->{altid}) {
			$im->done;
		}

		my @args = (-inbox => $self);
		# basic line splitting, only
		# Perhaps we can have proper quote splitting one day...
		($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;

		eval "require $f";
		if ($@) {
			warn $@;
		} else {
			# e.g: PublicInbox::Filter::Vger->new(@args)
			return $f->new(@args);
		}
	}
	undef;
}

sub is_maildir_basename ($) {
	my ($bn) = @_;
	return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/;
	if ($bn =~ /:2,([A-Z]+)\z/i) {
		my $flags = $1;
		return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail
	}
	1;
}

sub is_maildir_path ($) {
	my ($path) = @_;
	my @p = split(m!/+!, $path);
	(is_maildir_basename($p[-1]) && -f $path) ? 1 : 0;
}

sub eml_from_path ($) {
	my ($path) = @_;
	if (open my $fh, '<', $path) {
		my $str = do { local $/; <$fh> } or return;
		PublicInbox::Eml->new(\$str);
	} else { # ENOENT is common with Maildir
		warn "failed to open $path: $!\n" if $! != ENOENT;
		undef;
	}
}

sub import_maildir {
	my ($self, $dir) = @_;
	my $im = $self->importer(1);

	foreach my $sub (qw(cur new tmp)) {
		-d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
	}
	foreach my $sub (qw(cur new)) {
		opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n";
		while (defined(my $fn = readdir($dh))) {
			next unless is_maildir_basename($fn);
			my $mime = eml_from_path("$dir/$fn") or next;

			if (my $filter = $self->filter($im)) {
				my $ret = $filter->scrub($mime) or return;
				return if $ret == REJECT();
				$mime = $ret;
			}
			$im->add($mime);
		}
	}
	$im->done;
}

# asctime: From example@example.com Fri Jun 23 02:56:55 2000
my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/;

sub mb_add ($$$$) {
	my ($im, $variant, $filter, $msg) = @_;
	$$msg =~ s/(\r?\n)+\z/$1/s;
	if ($variant eq 'mboxrd') {
		$$msg =~ s/^>(>*From )/$1/gms;
	} elsif ($variant eq 'mboxo') {
		$$msg =~ s/^>From /From /gms;
	}
	my $mime = PublicInbox::Eml->new($msg);
	if ($filter) {
		my $ret = $filter->scrub($mime) or return;
		return if $ret == REJECT();
		$mime = $ret;
	}
	$im->add($mime)
}

sub import_mbox {
	my ($self, $fh, $variant) = @_;
	if ($variant !~ /\A(?:mboxrd|mboxo)\z/) {
		die "variant must be 'mboxrd' or 'mboxo'\n";
	}
	my $im = $self->importer(1);
	my $prev = undef;
	my $msg = '';
	my $filter = $self->filter;
	while (defined(my $l = <$fh>)) {
		if ($l =~ /$from_strict/o) {
			if (!defined($prev) || $prev =~ /^\r?$/) {
				mb_add($im, $variant, $filter, \$msg) if $msg;
				$msg = '';
				$prev = $l;
				next;
			}
			warn "W[$.] $l\n";
		}
		$prev = $l;
		$msg .= $l;
	}
	mb_add($im, $variant, $filter, \$msg) if $msg;
	$im->done;
}

sub _read_git_config_perm {
	my ($self) = @_;
	chomp(my $perm = $self->git->qx('config', 'core.sharedRepository'));
	$perm;
}

sub _git_config_perm {
	my $self = shift;
	my $perm = scalar @_ ? $_[0] : _read_git_config_perm($self);
	return PERM_UMASK if (!defined($perm) || $perm eq '');
	return PERM_UMASK if ($perm eq 'umask');
	return PERM_GROUP if ($perm eq 'group');
	if ($perm =~ /\A(?:all|world|everybody)\z/) {
		return PERM_EVERYBODY;
	}
	return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/);
	return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/);

	my $i = oct($perm);
	return PERM_UMASK if ($i == PERM_UMASK);
	return PERM_GROUP if ($i == OLD_PERM_GROUP);
	return PERM_EVERYBODY if ($i == OLD_PERM_EVERYBODY);

	if (($i & 0600) != 0600) {
		die "core.sharedRepository mode invalid: ".
		    sprintf('%.3o', $i) . "\nOwner must have permissions\n";
	}
	($i & 0666);
}

sub _umask_for {
	my ($perm) = @_; # _git_config_perm return value
	my $rv = $perm;
	return umask if $rv == 0;

	# set +x bit if +r or +w were set
	$rv |= 0100 if ($rv & 0600);
	$rv |= 0010 if ($rv & 0060);
	$rv |= 0001 if ($rv & 0006);
	(~$rv & 0777);
}

sub with_umask {
	my ($self, $cb, @arg) = @_;
	my $old = umask $self->{umask};
	my $rv = eval { $cb->(@arg) };
	my $err = $@;
	umask $old;
	die $err if $err;
	$rv;
}

sub umask_prepare {
	my ($self) = @_;
	my $perm = _git_config_perm($self);
	my $umask = _umask_for($perm);
	$self->{umask} = $umask;
}

sub cleanup ($) {
	delete @{$_[0]}{qw(over mm git search)};
}

# warnings to ignore when handling spam mailboxes and maybe other places
sub warn_ignore {
	my $s = "@_";
	# Email::Address::XS warnings
	$s =~ /^Argument contains empty address at /
	|| $s =~ /^Element at index [0-9]+ contains /
	# PublicInbox::MsgTime
	|| $s =~ /^bogus TZ offset: .+?, ignoring and assuming \+0000/
	|| $s =~ /^bad Date: .+? in /
}

# this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..."
sub warn_ignore_cb {
	my $cb = $SIG{__WARN__} // sub { print STDERR @_ };
	sub {
		return if warn_ignore(@_);
		$cb->(@_);
	}
}

1;
public-inbox-1.6.1/lib/PublicInbox/KQNotify.pm000066400000000000000000000047411377346120300211540ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 

# implements the small subset of Linux::Inotify2 functionality we use
# using IO::KQueue on *BSD systems.
package PublicInbox::KQNotify;
use strict;
use IO::KQueue;
use PublicInbox::DSKQXS; # wraps IO::KQueue for fork-safe DESTROY
use PublicInbox::FakeInotify;
use Time::HiRes qw(stat);

# NOTE_EXTEND detects rename(2), NOTE_WRITE detects link(2)
sub MOVED_TO_OR_CREATE () { NOTE_EXTEND|NOTE_WRITE }

sub new {
	my ($class) = @_;
	bless { dskq => PublicInbox::DSKQXS->new, watch => {} }, $class;
}

sub watch {
	my ($self, $path, $mask) = @_;
	my ($fh, $watch);
	if (-d $path) {
		opendir($fh, $path) or return;
		my @st = stat($fh);
		$watch = bless [ $fh, $path, $st[10] ],
			'PublicInbox::KQNotify::Watchdir';
	} else {
		open($fh, '<', $path) or return;
		$watch = bless [ $fh, $path ],
			'PublicInbox::KQNotify::Watch';
	}
	my $ident = fileno($fh);
	$self->{dskq}->{kq}->EV_SET($ident, # ident
		EVFILT_VNODE, # filter
		EV_ADD | EV_CLEAR, # flags
		$mask, # fflags
		0, 0); # data, udata
	if ($mask == NOTE_WRITE || $mask == MOVED_TO_OR_CREATE) {
		$self->{watch}->{$ident} = $watch;
	} else {
		die "TODO Not implemented: $mask";
	}
	$watch;
}

# emulate Linux::Inotify::fileno
sub fileno { ${$_[0]->{dskq}->{kq}} }

# noop for Linux::Inotify2 compatibility.  Unlike inotify,
# kqueue doesn't seem to overflow since it's limited by the number of
# open FDs the process has
sub on_overflow {}

# noop for Linux::Inotify2 compatibility, we use `0' timeout for ->kevent
sub blocking {}

# behave like Linux::Inotify2->read
sub read {
	my ($self) = @_;
	my @kevents = $self->{dskq}->{kq}->kevent(0);
	my $events = [];
	for my $kev (@kevents) {
		my $ident = $kev->[KQ_IDENT];
		my $mask = $kev->[KQ_FFLAGS];
		my ($dh, $path, $old_ctime) = @{$self->{watch}->{$ident}};
		if (!defined($old_ctime)) {
			push @$events,
				bless(\$path, 'PublicInbox::FakeInotify::Event')
		} elsif ($mask & MOVED_TO_OR_CREATE) {
			my @new_st = stat($path) or next;
			$self->{watch}->{$ident}->[3] = $new_st[10]; # ctime
			rewinddir($dh);
			PublicInbox::FakeInotify::on_new_files($events, $dh,
							$path, $old_ctime);
		}
	}
	@$events;
}

package PublicInbox::KQNotify::Watch;
use strict;

sub name { $_[0]->[1] }

sub cancel { close $_[0]->[0] or die "close: $!" }

package PublicInbox::KQNotify::Watchdir;
use strict;

sub name { $_[0]->[1] }

sub cancel { closedir $_[0]->[0] or die "closedir: $!" }

1;
public-inbox-1.6.1/lib/PublicInbox/Linkify.pm000066400000000000000000000061551377346120300210560ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors 
# License: AGPL-3.0+ 

# two-step linkification.
# intended usage is in the following order:
#
#   linkify_1
#   
#   linkify_2
#
# Maybe this could be done more efficiently...
package PublicInbox::Linkify;
use strict;
use warnings;
use Digest::SHA qw/sha1_hex/;
use PublicInbox::Hval qw(ascii_html mid_href);
use PublicInbox::MID qw($MID_EXTRACT);

my $SALT = rand;
my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|imaps?|s?news|gopher)://
		 [\@:\w\.-]+(?:/
		 (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
		 (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
		 (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)?
		 )?
		)}xi;

sub new { bless {}, $_[0] }

# try to distinguish paired punctuation chars from the URL itself
# Maybe other languages/formats can be supported here, too...
my %pairs = (
	"(" => qr/(\)[\.,;\+]?)\z/, # Markdown (,), Ruby (+) (, for arrays)
	"'" => qr/('[\.,;\+]?)\z/, # Perl / Ruby
	"!" => qr/(![\.,;\+]?)\z/, # Perl / Ruby
);

sub linkify_1 {
	$_[1] =~ s^$LINK_RE^
		my $beg = $1 || '';
		my $url = $2;
		my $end = '';

		# it's fairly common to end URLs in messages with
		# '.', ',' or ';' to denote the end of a statement;
		# assume the intent was to end the statement/sentence
		# in English
		if (defined(my $re = $pairs{$beg})) {
			if ($url =~ s/$re//) {
				$end = $1;
			}
		} elsif ($url =~ s/(\))?([\.,;])\z//) {
			$end = $2;
			# require ')' to be paired with '('
			if (defined $1) { # ')'
				if (index($url, '(') < 0) {
					$end = ")$end";
				} else {
					$url .= ')';
				}
			}
		} elsif ($url !~ /\(/ && $url =~ s/\)\z//) {
			$end = ')';
		}

		$url = ascii_html($url); # for IDN

		# salt this, as this could be exploited to show
		# links in the HTML which don't show up in the raw mail.
		my $key = sha1_hex($url . $SALT);

		$_[0]->{$key} = $url;
		$beg . 'PI-LINK-'. $key . $end;
	^geo;
	$_[1];
}

sub linkify_2 {
	# Added "PI-LINK-" prefix to avoid false-positives on git commits
	$_[1] =~ s!\bPI-LINK-([a-f0-9]{40})\b!
		my $key = $1;
		my $url = $_[0]->{$key};
		if (defined $url) {
			"$url";
		} else {
			# false positive or somebody tried to mess with us
			$key;
		}
	!ge;
	$_[1];
}

# single pass linkification of  within $str
# with $pfx being the URL prefix
sub linkify_mids {
	my ($self, $pfx, $str, $raw) = @_;
	$$str =~ s!$MID_EXTRACT!
		my $mid = $1;
		my $html = ascii_html($mid);
		my $href = mid_href($mid);

		# salt this, as this could be exploited to show
		# links in the HTML which don't show up in the raw mail.
		my $key = sha1_hex($html . $SALT);
		my $repl = qq(<$html>);
		$repl .= qq{ (raw)} if $raw;
		$self->{$key} = $repl;
		'PI-LINK-'. $key;
		!ge;
	$$str = ascii_html($$str);
	$$str =~ s!\bPI-LINK-([a-f0-9]{40})\b!
		my $key = $1;
		my $repl = $_[0]->{$key};
		if (defined $repl) {
			$repl;
		} else {
			# false positive or somebody tried to mess with us
			$key;
		}
	!ge;
}

sub to_html { linkify_2($_[0], ascii_html(linkify_1(@_))) }

1;
public-inbox-1.6.1/lib/PublicInbox/Listener.pm000066400000000000000000000036061377346120300212340ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors 
# License: AGPL-3.0+ 
#
# Used by -nntpd for listen sockets
package PublicInbox::Listener;
use strict;
use parent 'PublicInbox::DS';
use Socket qw(SOL_SOCKET SO_KEEPALIVE IPPROTO_TCP TCP_NODELAY);
use IO::Handle;
use PublicInbox::Syscall qw(EPOLLIN EPOLLEXCLUSIVE EPOLLET);
use Errno qw(EAGAIN ECONNABORTED EPERM);

# Warn on transient errors, mostly resource limitations.
# EINTR would indicate the failure to set NonBlocking in systemd or similar
my %ERR_WARN = map {;
	eval("Errno::$_()") => $_
} qw(EMFILE ENFILE ENOBUFS ENOMEM EINTR);

sub new ($$$) {
	my ($class, $s, $cb) = @_;
	setsockopt($s, SOL_SOCKET, SO_KEEPALIVE, 1);
	setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1); # ignore errors on non-TCP
	listen($s, 1024);
	my $self = bless { post_accept => $cb }, $class;
	$self->SUPER::new($s, EPOLLIN|EPOLLET|EPOLLEXCLUSIVE);
}

sub event_step {
	my ($self) = @_;
	my $sock = $self->{sock} or return;

	# no loop here, we want to fairly distribute clients
	# between multiple processes sharing the same socket
	# XXX our event loop needs better granularity for
	# a single accept() here to be, umm..., acceptable
	# on high-traffic sites.
	if (my $addr = accept(my $c, $sock)) {
		IO::Handle::blocking($c, 0); # no accept4 :<
		eval { $self->{post_accept}->($c, $addr, $sock) };
		warn "E: $@\n" if $@;
		$self->requeue;
	} elsif ($! == EAGAIN || $! == ECONNABORTED || $! == EPERM) {
		# EAGAIN is common and likely
		# ECONNABORTED is common with bad connections
		# EPERM happens if firewall rules prevent a connection
		# on Linux (and everything that emulates Linux).
		# Firewall rules are sometimes intentional, so we don't
		# warn on EPERM to avoid being too noisy...
		return;
	} elsif (my $sym = $ERR_WARN{int($!)}) {
		warn "W: accept(): $! ($sym)\n";
	} else {
		warn "BUG?: accept(): $!\n";
	}
}

1;
public-inbox-1.6.1/lib/PublicInbox/Lock.pm000066400000000000000000000021111377346120300203250ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors 
# License: AGPL-3.0+ 

# Base class for per-inbox locking
package PublicInbox::Lock;
use strict;
use warnings;
use Fcntl qw(:flock :DEFAULT);
use Carp qw(croak);

# we only acquire the flock if creating or reindexing;
# PublicInbox::Import already has the lock on its own.
sub lock_acquire {
	my ($self) = @_;
	my $lock_path = $self->{lock_path};
	croak 'already locked '.($lock_path // '(undef)') if $self->{lockfh};
	return unless defined($lock_path);
	sysopen(my $lockfh, $lock_path, O_WRONLY|O_CREAT) or
		croak "failed to open $lock_path: $!\n";
	flock($lockfh, LOCK_EX) or croak "lock $lock_path failed: $!\n";
	$self->{lockfh} = $lockfh;
}

sub lock_release {
	my ($self, $wake) = @_;
	defined(my $lock_path = $self->{lock_path}) or return;
	my $lockfh = delete $self->{lockfh} or croak "not locked: $lock_path";

	syswrite($lockfh, '.') if $wake;

	flock($lockfh, LOCK_UN) or croak "unlock $lock_path failed: $!\n";
	close $lockfh or croak "close $lock_path failed: $!\n";
}

1;
public-inbox-1.6.1/lib/PublicInbox/MDA.pm000066400000000000000000000054101377346120300200430ustar00rootroot00000000000000# Copyright (C) 2013-2020 all contributors 
# License: AGPL-3.0+ 
#
# For the -mda script (mail delivery agent)
package PublicInbox::MDA;
use strict;
use warnings;
use PublicInbox::MsgTime;
use PublicInbox::Address;
use constant MAX_SIZE => 1024 * 500; # same as spamc default, should be tunable
use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian

our @BAD_HEADERS = (
	# postfix
	qw(delivered-to x-original-to), # prevent training loops

	# The rest are taken from Mailman 2.1.15:
	# could contain passwords:
	qw(approved approve x-approved x-approve urgent),
	# could be used phishing:
	qw(return-receipt-to disposition-notification-to x-confirm-reading-to),
	# Pegasus mail:
	qw(x-pmrqc)
);

# drop plus addressing for matching
sub __drop_plus {
	my ($str_addr) = @_;
	$str_addr =~ s/\+.*\@/\@/;
	$str_addr;
}

# do not allow Bcc, only Cc and To if recipient is set
sub precheck {
	my ($klass, $simple, $address) = @_;
	my @mid = $simple->header('Message-ID');
	return 0 if scalar(@mid) != 1;
	my $mid = $mid[0];
	return 0 if (length($mid) > MAX_MID_SIZE);
	return 0 unless usable_str(length(''), $mid) && $mid =~ /\@/;
	return 0 unless usable_str(length('u@h'), $simple->header("From"));
	return 0 unless usable_str(length(':o'), $simple->header("Subject"));
	return 0 unless usable_date($simple->header("Date"));
	return 0 if length($simple->as_string) > MAX_SIZE;
	alias_specified($simple, $address);
}

sub usable_str {
	my ($len, $str) = @_;
	defined($str) && length($str) >= $len;
}

sub usable_date {
	defined(eval { PublicInbox::MsgTime::str2date_zone($_[0]) });
}

sub alias_specified {
	my ($simple, $address) = @_;

	my @address = ref($address) eq 'ARRAY' ? @$address : ($address);
	my %ok = map {
		lc(__drop_plus($_)) => 1;
	} @address;

	foreach my $line ($simple->header('Cc'), $simple->header('To')) {
		my @addrs = PublicInbox::Address::emails($line);
		foreach my $addr (@addrs) {
			if ($ok{lc(__drop_plus($addr))}) {
				return 1;
			}
		}
	}
	return 0;
}

sub set_list_headers {
	my ($class, $simple, $dst) = @_;
	unless (defined $simple->header('List-Id')) {
		my $pa = $dst->{-primary_address};
		$pa =~ tr/@/./; # RFC2919
		$simple->header_set("List-Id", "<$pa>");
	}
}

sub inboxes_for_list_id ($$) {
	my ($klass, $config, $simple) = @_;

	# newer Email::Simple allows header_raw, as does Email::MIME:
	my @list_ids = $simple->can('header_raw') ?
			$simple->header_raw('List-Id') :
			$simple->header('List-Id');
	my @dests;
	for my $list_id (@list_ids) {
		$list_id =~ /<[ \t]*(.+)?[ \t]*>/ or next;
		if (my $ibx = $config->lookup_list_id($1)) {
			push @dests, $ibx;
		}
	}
	if (scalar(@list_ids) > 1) {
		warn "W: multiple List-IDs in message:\n";
		warn "W: List-ID: $_\n" for @list_ids
	}
	\@dests;
}

1;
public-inbox-1.6.1/lib/PublicInbox/MID.pm000066400000000000000000000062161377346120300200600ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors 
# License: AGPL-3.0+ 
#
# Various Message-ID-related functions.
package PublicInbox::MID;
use strict;
use warnings;
use base qw/Exporter/;
our @EXPORT_OK = qw(mid_clean id_compress mid2path mid_escape MID_ESC
	mids references mids_for_index $MID_EXTRACT);
use URI::Escape qw(uri_escape_utf8);
use Digest::SHA qw/sha1_hex/;
require PublicInbox::Address;
use constant {
	ID_MAX => 40, # SHA-1 hex length for HTML id anchors
	MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q')
};

our $MID_EXTRACT = qr/<([^>]+)>/s;

sub mid_clean {
	my ($mid) = @_;
	defined($mid) or die "no Message-ID";
	# MDA->precheck did more checking for us
	if ($mid =~ $MID_EXTRACT) {
		$mid = $1;
	}
	$mid;
}

# this is idempotent, used for HTML anchor/ids and such
sub id_compress {
	my ($id, $force) = @_;

	if ($force || $id =~ /[^a-zA-Z0-9_\-]/ || length($id) > ID_MAX) {
		utf8::encode($id);
		return sha1_hex($id);
	}
	$id;
}

sub mid2path {
	my ($mid) = @_;
	my ($x2, $x38) = ($mid =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/);

	unless (defined $x38) {
		# compatibility with old links (or short Message-IDs :)
		$mid = mid_clean($mid);
		utf8::encode($mid);
		$mid = sha1_hex($mid);
		($x2, $x38) = ($mid =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/);
	}
	"$x2/$x38";
}

# only intended for Message-ID and X-Alt-Message-ID
sub extract_mids {
	my @mids;
	for my $v (@_) {
		my @cur = ($v =~ /$MID_EXTRACT/g);
		if (@cur) {
			push(@mids, @cur);
		} else {
			push(@mids, $v);
		}
	}
	\@mids;
}

sub mids ($) {
	my ($hdr) = @_;
	my @mids = $hdr->header_raw('Message-ID');
	uniq_mids(extract_mids(@mids));
}

# we allow searching on X-Alt-Message-ID since PublicInbox::NNTP uses them
# to placate some clients, and we want to ensure NNTP-only clients can
# import and index without relying on HTTP endpoints
sub mids_for_index ($) {
	my ($hdr) = @_;
	my @mids = $hdr->header_raw('Message-ID');
	my @alts = $hdr->header_raw('X-Alt-Message-ID');
	uniq_mids(extract_mids(@mids, @alts));
}

# last References should be IRT, but some mail clients do things
# out of order, so trust IRT over References iff IRT exists
sub references ($) {
	my ($hdr) = @_;
	my @mids;
	foreach my $f (qw(References In-Reply-To)) {
		my @v = $hdr->header_raw($f);
		foreach my $v (@v) {
			push(@mids, ($v =~ /$MID_EXTRACT/g));
		}
	}

	# old versions of git-send-email would prompt users for
	# In-Reply-To and users' muscle memory would use 'y' or 'n'
	# as responses:
	my %addr = ( y => 1, n => 1 );

	foreach my $f (qw(To From Cc)) {
		my @v = $hdr->header_raw($f);
		foreach my $v (@v) {
			$addr{$_} = 1 for (PublicInbox::Address::emails($v));
		}
	}
	uniq_mids(\@mids, \%addr);
}

sub uniq_mids ($;$) {
	my ($mids, $seen) = @_;
	my @ret;
	$seen ||= {};
	foreach my $mid (@$mids) {
		$mid =~ tr/\n\t\r//d;
		if (length($mid) > MAX_MID_SIZE) {
			warn "Message-ID: <$mid> too long, truncating\n";
			$mid = substr($mid, 0, MAX_MID_SIZE);
		}
		push(@ret, $mid) unless $seen->{$mid}++;
	}
	\@ret;
}

# RFC3986, section 3.3:
sub MID_ESC () { '^A-Za-z0-9\-\._~!\$\&\'\(\)\*\+,;=:@' }
sub mid_escape ($) { uri_escape_utf8($_[0], MID_ESC) }

1;
public-inbox-1.6.1/lib/PublicInbox/MIME.pm000066400000000000000000000071531377346120300201770ustar00rootroot00000000000000# This library is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
#
# The license for this file differs from the rest of public-inbox.
#
# We no longer load this in any of our code outside of maintainer
# tests for compatibility.  PublicInbox::Eml is favored throughout
# our codebase for performance and safety reasons, though we maintain
# Email::MIME-compatibility in mail injection and indexing code paths.
#
# It monkey patches the "parts_multipart" subroutine with patches
# from Matthew Horsfall  at:
#
# git clone --mirror https://github.com/rjbs/Email-MIME.git refs/pull/28/head
#
# commit fe0eb870ab732507aa39a1070a2fd9435c7e4877
# ("Make sure we don't modify the body of a message when injecting a header.")
# commit 981d8201a7239b02114489529fd366c4c576a146
# ("GH #14 - Handle CRLF emails properly.")
# commit 2338d93598b5e8432df24bda8dfdc231bdeb666e
# ("GH #14 - Support multipart messages without content-type in subparts.")
#
# For Email::MIME >= 1.923 && < 1.935,
# commit dcef9be66c49ae89c7a5027a789bbbac544499ce
# ("removing all trailing newlines was too much")
# is also included
package PublicInbox::MIME;
use strict;
use warnings;
use base qw(Email::MIME);
use Email::MIME::ContentType;
use PublicInbox::MsgIter ();
$Email::MIME::ContentType::STRICT_PARAMS = 0;

if ($Email::MIME::VERSION <= 1.937) {
sub parts_multipart {
  my $self     = shift;
  my $boundary = $self->{ct}->{attributes}->{boundary};

  # Take a message, join all its lines together.  Now try to Email::MIME->new
  # it with 1.861 or earlier.  Death!  It tries to recurse endlessly on the
  # body, because every time it splits on boundary it gets itself. Obviously
  # that means it's a bogus message, but a mangled result (or exception) is
  # better than endless recursion. -- rjbs, 2008-01-07
  return $self->parts_single_part
    unless $boundary and $self->body_raw =~ /^--\Q$boundary\E\s*$/sm;

  $self->{body_raw} = Email::Simple::body($self);

  # rfc1521 7.2.1
  my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2;

  # Split on boundaries, but keep blank lines after them intact
  my @bits = split /^--\Q$boundary\E\s*?(?=$self->{mycrlf})/m, ($body || '');

  Email::Simple::body_set($self, undef);

  # If there are no headers in the potential MIME part, it's just part of the
  # body.  This is a horrible hack, although it's debatable whether it was
  # better or worse when it was $self->{body} = shift @bits ... -- rjbs,
  # 2006-11-27
  Email::Simple::body_set($self, shift @bits) if ($bits[0] || '') !~ /.*:.*/;

  my $bits = @bits;

  my @parts;
  for my $bit (@bits) {
    # Parts don't need headers. If they don't have them, they look like this:
    #
    #   --90e6ba6e8d06f1723604fc1b809a
    #
    #   Part 2
    #
    #   Part 2a
    #
    # $bit will contain two new lines before Part 2.
    #
    # Anything with headers will only have one new line.
    #
    # RFC 1341 Section 7.2 says parts without headers are to be considered
    # plain US-ASCII text. -- alh
    # 2016-08-01
    my $added_header;

    if ($bit =~ /^(?:$self->{mycrlf}){2}/) {
      $bit = "Content-type: text/plain; charset=us-ascii" . $bit;

      $added_header = 1;
    }

    $bit =~ s/\A[\n\r]+//smg;
    $bit =~ s/(?{mycrlf}\Z//sm;

    my $email = (ref $self)->new($bit);

    if ($added_header) {
      # Remove our changes so we don't change the raw email content
      $email->header_str_set('Content-Type');
    }

    push @parts, $email;
  }

  $self->{parts} = \@parts;

  return @{ $self->{parts} };
}
}

no warnings 'once';
*each_part = \&PublicInbox::MsgIter::em_each_part;
1;
public-inbox-1.6.1/lib/PublicInbox/ManifestJsGz.pm000066400000000000000000000074321377346120300220140ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 

# generates manifest.js.gz for grokmirror(1)
package PublicInbox::ManifestJsGz;
use strict;
use v5.10.1;
use parent qw(PublicInbox::WwwListing);
use Digest::SHA ();
use File::Spec ();
use bytes (); # length
use PublicInbox::Inbox;
use PublicInbox::Git;
use IO::Compress::Gzip qw(gzip);
use HTTP::Date qw(time2str);
*try_cat = \&PublicInbox::Inbox::try_cat;

our $json;
for my $mod (qw(JSON::MaybeXS JSON JSON::PP)) {
	eval "require $mod" or next;
	# ->ascii encodes non-ASCII to "\uXXXX"
	$json = $mod->new->ascii(1) and last;
}

# called by WwwListing
sub url_regexp {
	my ($ctx) = @_;
	# grokmirror uses relative paths, so it's domain-dependent
	# SUPER calls PublicInbox::WwwListing::url_regexp
	$ctx->SUPER::url_regexp('publicInbox.grokManifest', 'match=domain');
}

sub fingerprint ($) {
	my ($git) = @_;
	# TODO: convert to qspawn for fairness when there's
	# thousands of repos
	my ($fh, $pid) = $git->popen('show-ref');
	my $dig = Digest::SHA->new(1);
	while (read($fh, my $buf, 65536)) {
		$dig->add($buf);
	}
	close $fh;
	waitpid($pid, 0);
	return if $?; # empty, uninitialized git repo
	$dig->hexdigest;
}

sub manifest_add ($$;$$) {
	my ($ctx, $ibx, $epoch, $default_desc) = @_;
	my $url_path = "/$ibx->{name}";
	my $git_dir = $ibx->{inboxdir};
	if (defined $epoch) {
		$git_dir .= "/git/$epoch.git";
		$url_path .= "/git/$epoch.git";
	}
	return unless -d $git_dir;
	my $git = PublicInbox::Git->new($git_dir);
	my $fingerprint = fingerprint($git) or return; # no empty repos

	chomp(my $owner = $git->qx('config', 'gitweb.owner'));
	chomp(my $desc = try_cat("$git_dir/description"));
	utf8::decode($owner);
	utf8::decode($desc);
	$owner = undef if $owner eq '';
	$desc = 'Unnamed repository' if $desc eq '';

	# templates/hooks--update.sample and git-multimail in git.git
	# only match "Unnamed repository", not the full contents of
	# templates/this--description in git.git
	if ($desc =~ /\AUnnamed repository/) {
		$desc = "$default_desc [epoch $epoch]" if defined($epoch);
	}

	my $reference;
	chomp(my $alt = try_cat("$git_dir/objects/info/alternates"));
	if ($alt) {
		# n.b.: GitPython doesn't seem to handle comments or C-quoted
		# strings like native git does; and we don't for now, either.
		my @alt = split(/\n+/, $alt);

		# grokmirror only supports 1 alternate for "reference",
		if (scalar(@alt) == 1) {
			my $objdir = "$git_dir/objects";
			$reference = File::Spec->rel2abs($alt[0], $objdir);
			$reference =~ s!/[^/]+/?\z!!; # basename
		}
	}
	$ctx->{-abs2urlpath}->{$git_dir} = $url_path;
	my $modified = $git->modified;
	if ($modified > ($ctx->{-mtime} // 0)) {
		$ctx->{-mtime} = $modified;
	}
	$ctx->{manifest}->{$url_path} = {
		owner => $owner,
		reference => $reference,
		description => $desc,
		modified => $modified,
		fingerprint => $fingerprint,
	};
}

sub ibx_entry {
	my ($ctx, $ibx) = @_;
	eval {
		if (defined(my $max = $ibx->max_git_epoch)) {
			my $desc = $ibx->description;
			for my $epoch (0..$max) {
				manifest_add($ctx, $ibx, $epoch, $desc);
			}
		} else {
			manifest_add($ctx, $ibx);
		}
	};
	warn "E: $@" if $@;
}

sub hide_key { 'manifest' }

# overrides WwwListing->psgi_triple
sub psgi_triple {
	my ($ctx) = @_;
	my $abs2urlpath = delete($ctx->{-abs2urlpath}) // {};
	my $manifest = delete($ctx->{manifest}) // {};
	while (my ($url_path, $repo) = each %$manifest) {
		defined(my $abs = $repo->{reference}) or next;
		$repo->{reference} = $abs2urlpath->{$abs};
	}
	$manifest = $json->encode($manifest);
	gzip(\$manifest => \(my $out));
	[ 200, [ qw(Content-Type application/gzip),
		 'Last-Modified', time2str($ctx->{-mtime}),
		 'Content-Length', bytes::length($out) ], [ $out ] ]
}

sub per_inbox {
	my ($ctx) = @_;
	ibx_entry($ctx, $ctx->{-inbox});
	psgi_triple($ctx);
}

1;
public-inbox-1.6.1/lib/PublicInbox/Mbox.pm000066400000000000000000000165251377346120300203600ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors 
# License: AGPL-3.0+ 

# Streaming interface for mboxrd HTTP responses
# See PublicInbox::GzipFilter for details.
package PublicInbox::Mbox;
use strict;
use parent 'PublicInbox::GzipFilter';
use PublicInbox::MID qw/mid_escape/;
use PublicInbox::Hval qw/to_filename/;
use PublicInbox::Smsg;
use PublicInbox::Eml;

# called by PSGI server as body response
# this gets called twice for every message, once to return the header,
# once to retrieve the body
sub getline {
	my ($ctx) = @_; # ctx
	my $smsg = $ctx->{smsg} or return;
	my $ibx = $ctx->{-inbox};
	my $eml = $ibx->smsg_eml($smsg) or return;
	my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
	$ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
	if ($n) {
		$ctx->translate(msg_body($eml));
	} else { # last message
		$ctx->zmore(msg_body($eml));
		$ctx->zflush;
	}
}

# called by PublicInbox::DS::write after http->next_step
sub async_next {
	my ($http) = @_; # PublicInbox::HTTP
	my $ctx = $http->{forward} or return; # client aborted
	eval {
		my $smsg = $ctx->{smsg} or return $ctx->close;
		$ctx->smsg_blob($smsg);
	};
	warn "E: $@" if $@;
}

sub async_eml { # for async_blob_cb
	my ($ctx, $eml) = @_;
	my $smsg = delete $ctx->{smsg};
	# next message
	$ctx->{smsg} = $ctx->{-inbox}->over->next_by_mid(@{$ctx->{next_arg}});

	$ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
	$ctx->{http_out}->write($ctx->translate(msg_body($eml)));
}

sub res_hdr ($$) {
	my ($ctx, $subject) = @_;
	my $fn = $subject // '';
	$fn =~ s/^re:\s+//i;
	$fn = to_filename($fn) // 'no-subject';
	my @hdr = ('Content-Type');
	if ($ctx->{-inbox}->{obfuscate}) {
		# obfuscation is stupid, but maybe scrapers are, too...
		push @hdr, 'application/mbox';
		$fn .= '.mbox';
	} else {
		push @hdr, 'text/plain';
		$fn .= '.txt';
	}
	push @hdr, 'Content-Disposition', "inline; filename=$fn";
	\@hdr;
}

# for rare cases where v1 inboxes aren't indexed w/ ->over at all
sub no_over_raw ($) {
	my ($ctx) = @_;
	my $mref = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return;
	my $eml = PublicInbox::Eml->new($mref);
	[ 200, res_hdr($ctx, $eml->header_str('Subject')),
		[ msg_hdr($ctx, $eml, $ctx->{mid}) . msg_body($eml) ] ]
}

# /$INBOX/$MESSAGE_ID/raw
sub emit_raw {
	my ($ctx) = @_;
	$ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
	my $over = $ctx->{-inbox}->over or return no_over_raw($ctx);
	my ($id, $prev);
	my $mip = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
	my $smsg = $ctx->{smsg} = $over->next_by_mid(@$mip) or return;
	my $res_hdr = res_hdr($ctx, $smsg->{subject});
	bless $ctx, __PACKAGE__;
	$ctx->psgi_response(200, $res_hdr);
}

sub msg_hdr ($$;$) {
	my ($ctx, $eml, $mid) = @_;
	my $header_obj = $eml->header_obj;

	# drop potentially confusing headers, ssoma already should've dropped
	# Lines and Content-Length
	foreach my $d (qw(Lines Bytes Content-Length Status)) {
		$header_obj->header_set($d);
	}
	my $ibx = $ctx->{-inbox};
	my $base = $ctx->{base_url};
	$mid = $ctx->{mid} unless defined $mid;
	$mid = mid_escape($mid);
	my @append = (
		'Archived-At', "<$base$mid/>",
		'List-Archive', "<$base>",
		'List-Post', "{-primary_address}>",
	);
	my $crlf = $header_obj->crlf;
	my $buf = $header_obj->as_string;
	# fixup old bug from import (pre-a0c07cba0e5d8b6a)
	$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
	$buf = "From mboxrd\@z Thu Jan  1 00:00:00 1970" . $crlf . $buf;

	for (my $i = 0; $i < @append; $i += 2) {
		my $k = $append[$i];
		my $v = $append[$i + 1];
		my @v = $header_obj->header_raw($k);
		foreach (@v) {
			if ($v eq $_) {
				$v = undef;
				last;
			}
		}
		$buf .= "$k: $v$crlf" if defined $v;
	}
	$buf .= $crlf;
}

sub msg_body ($) {
	my $bdy = $_[0]->{bdy} // return "\n";
	# mboxrd quoting style
	# https://en.wikipedia.org/wiki/Mbox#Modified_mbox
	# https://www.loc.gov/preservation/digital/formats/fdd/fdd000385.shtml
	# https://web.archive.org/http://www.qmail.org/man/man5/mbox.html
	$$bdy =~ s/^(>*From )/>$1/gm;
	$$bdy .= "\n";
}

sub thread_cb {
	my ($ctx) = @_;
	my $msgs = $ctx->{msgs};
	while (1) {
		if (my $smsg = shift @$msgs) {
			return $smsg;
		}
		# refill result set
		$ctx->{msgs} = $msgs = $ctx->{over}->get_thread($ctx->{mid},
								$ctx->{prev});
		return unless @$msgs;
		$ctx->{prev} = $msgs->[-1];
	}
}

sub thread_mbox {
	my ($ctx, $over, $sfx) = @_;
	my $msgs = $ctx->{msgs} = $over->get_thread($ctx->{mid}, {});
	return [404, [qw(Content-Type text/plain)], []] if !@$msgs;
	$ctx->{prev} = $msgs->[-1];
	$ctx->{over} = $over; # bump refcnt
	require PublicInbox::MboxGz;
	PublicInbox::MboxGz::mbox_gz($ctx, \&thread_cb, $msgs->[0]->{subject});
}

sub emit_range {
	my ($ctx, $range) = @_;

	my $q;
	if ($range eq 'all') { # TODO: YYYY[-MM]
		$q = '';
	} else {
		return [404, [qw(Content-Type text/plain)], []];
	}
	mbox_all($ctx, { q => $q });
}

sub all_ids_cb {
	my ($ctx) = @_;
	my $ids = $ctx->{ids};
	do {
		while ((my $num = shift @$ids)) {
			my $smsg = $ctx->{over}->get_art($num) or next;
			return $smsg;
		}
		$ctx->{ids} = $ids = $ctx->{mm}->ids_after(\($ctx->{prev}));
	} while (@$ids);
}

sub mbox_all_ids {
	my ($ctx) = @_;
	my $ibx = $ctx->{-inbox};
	my $prev = 0;
	my $mm = $ctx->{mm} = $ibx->mm;
	my $ids = $mm->ids_after(\$prev) or return
		[404, [qw(Content-Type text/plain)], ["No results found\n"]];
	$ctx->{over} = $ibx->over or
		return PublicInbox::WWW::need($ctx, 'Overview');
	$ctx->{ids} = $ids;
	$ctx->{prev} = $prev;
	require PublicInbox::MboxGz;
	PublicInbox::MboxGz::mbox_gz($ctx, \&all_ids_cb, 'all');
}

sub results_cb {
	my ($ctx) = @_;
	my $over = $ctx->{-inbox}->over or return;
	while (1) {
		while (defined(my $num = shift(@{$ctx->{ids}}))) {
			my $smsg = $over->get_art($num) or next;
			return $smsg;
		}
		# refill result set
		my $srch = $ctx->{-inbox}->search(undef, $ctx) or return;
		my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
		my $size = $mset->size or return;
		$ctx->{qopts}->{offset} += $size;
		$ctx->{ids} = $srch->mset_to_artnums($mset);
	}
}

sub results_thread_cb {
	my ($ctx) = @_;

	my $over = $ctx->{-inbox}->over or return;
	while (1) {
		while (defined(my $num = shift(@{$ctx->{xids}}))) {
			my $smsg = $over->get_art($num) or next;
			return $smsg;
		}

		# refills ctx->{xids}
		next if $over->expand_thread($ctx);

		# refill result set
		my $srch = $ctx->{-inbox}->search(undef, $ctx) or return;
		my $mset = $srch->mset($ctx->{query}, $ctx->{qopts});
		my $size = $mset->size or return;
		$ctx->{qopts}->{offset} += $size;
		$ctx->{ids} = $srch->mset_to_artnums($mset);
	}

}

sub mbox_all {
	my ($ctx, $q) = @_;
	my $q_string = $q->{'q'};
	return mbox_all_ids($ctx) if $q_string !~ /\S/;
	my $srch = $ctx->{-inbox}->search or
		return PublicInbox::WWW::need($ctx, 'Search');
	my $over = $ctx->{-inbox}->over or
		return PublicInbox::WWW::need($ctx, 'Overview');

	my $qopts = $ctx->{qopts} = { mset => 2 }; # order by docid
	$qopts->{thread} = 1 if $q->{t};
	my $mset = $srch->mset($q_string, $qopts);
	$qopts->{offset} = $mset->size or
			return [404, [qw(Content-Type text/plain)],
				["No results found\n"]];
	$ctx->{query} = $q_string;
	$ctx->{ids} = $srch->mset_to_artnums($mset);
	require PublicInbox::MboxGz;
	my $fn;
	if ($q->{t} && $srch->has_threadid) {
		$fn = 'results-thread-'.$q_string;
		PublicInbox::MboxGz::mbox_gz($ctx, \&results_thread_cb, $fn);
	} else {
		$fn = 'results-'.$q_string;
		PublicInbox::MboxGz::mbox_gz($ctx, \&results_cb, $fn);
	}
}

1;
public-inbox-1.6.1/lib/PublicInbox/MboxGz.pm000066400000000000000000000030251377346120300206500ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors 
# License: AGPL-3.0+ 
package PublicInbox::MboxGz;
use strict;
use parent 'PublicInbox::GzipFilter';
use PublicInbox::Eml;
use PublicInbox::Hval qw/to_filename/;
use PublicInbox::Mbox;
*msg_hdr = \&PublicInbox::Mbox::msg_hdr;
*msg_body = \&PublicInbox::Mbox::msg_body;

sub async_next ($) {
	my ($http) = @_; # PublicInbox::HTTP
	my $ctx = $http->{forward} or return;
	eval {
		$ctx->{smsg} = $ctx->{cb}->($ctx) or return $ctx->close;
		$ctx->smsg_blob($ctx->{smsg});
	};
	warn "E: $@" if $@;
}

sub mbox_gz {
	my ($self, $cb, $fn) = @_;
	$self->{cb} = $cb;
	$self->{base_url} = $self->{-inbox}->base_url($self->{env});
	$self->{gz} = PublicInbox::GzipFilter::gzip_or_die();
	$fn = to_filename($fn // '') // 'no-subject';
	# http://www.iana.org/assignments/media-types/application/gzip
	bless $self, __PACKAGE__;
	my $res_hdr = [ 'Content-Type' => 'application/gzip',
		'Content-Disposition' => "inline; filename=$fn.mbox.gz" ];
	$self->psgi_response(200, $res_hdr);
}

# called by Plack::Util::foreach or similar (generic PSGI)
sub getline {
	my ($self) = @_;
	my $cb = $self->{cb} or return;
	while (my $smsg = $cb->($self)) {
		my $eml = $self->{-inbox}->smsg_eml($smsg) or next;
		$self->zmore(msg_hdr($self, $eml, $smsg->{mid}));
		return $self->translate(msg_body($eml));
	}
	# signal that we're done and can return undef next call:
	delete $self->{cb};
	$self->zflush;
}

no warnings 'once';
*async_eml = \&PublicInbox::Mbox::async_eml;
1;
public-inbox-1.6.1/lib/PublicInbox/MsgIter.pm000066400000000000000000000060241377346120300210160ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# read-only utilities for Email::MIME
package PublicInbox::MsgIter;
use strict;
use warnings;
use base qw(Exporter);
our @EXPORT = qw(msg_iter msg_part_text);

# This becomes PublicInbox::MIME->each_part:
# Like Email::MIME::walk_parts, but this is:
# * non-recursive
# * passes depth and indices to the iterator callback
sub em_each_part ($$;$$) {
	my ($mime, $cb, $cb_arg, $do_undef) = @_;
	my @parts = $mime->subparts;
	if (@parts) {
		$mime = $_[0] = undef if $do_undef; # saves some memory
		my $i = 0;
		@parts = map { [ $_, 1, ++$i ] } @parts;
		while (my $p = shift @parts) {
			my ($part, $depth, $idx) = @$p;
			my @sub = $part->subparts;
			if (@sub) {
				$depth++;
				$i = 0;
				@sub = map {
					[ $_, $depth, "$idx.".(++$i) ]
				} @sub;
				@parts = (@sub, @parts);
			} else {
				$cb->($p, $cb_arg);
			}
		}
	} else {
		$cb->([$mime, 0, 1], $cb_arg);
	}
}

# Use this when we may accept Email::MIME from user scripts
# (not just PublicInbox::MIME)
sub msg_iter ($$;$$) { # $_[0] = PublicInbox::MIME/Email::MIME-like obj
	my (undef, $cb, $cb_arg, $once) = @_;
	if (my $ep = $_[0]->can('each_part')) { # PublicInbox::{MIME,*}
		$ep->($_[0], $cb, $cb_arg, $once);
	} else { # for compatibility with existing Email::MIME users:
		em_each_part($_[0], $cb, $cb_arg, $once);
	}
}

sub msg_part_text ($$) {
	my ($part, $ct) = @_;

	# TODO: we may offer a separate sub for people who need to index
	# HTML-only mail, but the majority of HTML mail is multipart/alternative
	# with a text part which we don't have to waste cycles decoding
	return if $ct =~ m!\btext/x?html\b!;

	my $s = eval { $part->body_str };
	my $err = $@;

	# text/plain is the default, multipart/mixed happened a few
	# times when it should not have been:
	#   <87llgalspt.fsf@free.fr>
	#   <200308111450.h7BEoOu20077@mail.osdl.org>
	# But also do not try this with ->{is_submsg} (message/rfc822),
	# since a broken multipart/mixed inside a message/rfc822 part
	# has not been seen in the wild, yet...
	if ($err && ($ct =~ m!\btext/\b!i ||
			(!$part->{is_submsg} &&
				$ct =~ m!\bmultipart/mixed\b!i) ) ) {
		my $cte = $part->header_raw('Content-Transfer-Encoding');
		if (defined($cte) && $cte =~ /\b7bit\b/i) {
			$s = $part->body;
			$err = undef if $s =~ /\A[[:ascii:]]+\z/s;
		} else {
			# Try to assume UTF-8 because Alpine seems to
			# do wacky things and set charset=X-UNKNOWN
			$part->charset_set('UTF-8');
			$s = eval { $part->body_str };
		}

		# If forcing charset=UTF-8 failed,
		# caller will warn further down...
		$s = $part->body if $@;
	}
	($s, $err);
}

# returns an array of quoted or unquoted sections
sub split_quotes {
	# Quiet "Complex regular subexpression recursion limit" warning
	# in case an inconsiderate sender quotes 32K of text at once.
	# The warning from Perl is harmless for us since our callers can
	# tolerate less-than-ideal matches which work within Perl limits.
	no warnings 'regexp';
	split(/((?:^>[^\n]*\n)+)/sm, shift);
}

1;
public-inbox-1.6.1/lib/PublicInbox/MsgTime.pm000066400000000000000000000123701377346120300210120ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors 
# License: AGPL-3.0+ 

# Various date/time-related functions
package PublicInbox::MsgTime;
use strict;
use warnings;
use base qw(Exporter);
our @EXPORT_OK = qw(msg_timestamp msg_datestamp);
use Time::Local qw(timegm);
my @MoY = qw(january february march april may june
		july august september october november december);
my %MoY;
@MoY{@MoY} = (0..11);
@MoY{map { substr($_, 0, 3) } @MoY} = (0..11);

my %OBSOLETE_TZ = ( # RFC2822 4.3 (Obsolete Date and Time)
	EST => '-0500', EDT => '-0400',
	CST => '-0600', CDT => '-0500',
	MST => '-0700', MDT => '-0600',
	PST => '-0800', PDT => '-0700',
	UT => '+0000', GMT => '+0000', Z => '+0000',

	# RFC2822 states:
	#   The 1 character military time zones were defined in a non-standard
	#   way in [RFC822] and are therefore unpredictable in their meaning.
);
my $OBSOLETE_TZ = join('|', keys %OBSOLETE_TZ);

sub str2date_zone ($) {
	my ($date) = @_;
	my ($ts, $zone);

	# RFC822 is most likely for email, but we can tolerate an extra comma
	# or punctuation as long as all the data is there.
	# We'll use '\s' since Unicode spaces won't affect our parsing.
	# SpamAssassin ignores commas and redundant spaces, too.
	if ($date =~ /(?:[A-Za-z]+,?\s+)? # day-of-week
			([0-9]+),?\s+  # dd
			([A-Za-z]+)\s+ # mon
			([0-9]{2,4})\s+ # YYYY or YY (or YYY :P)
			([0-9]+)[:\.] # HH:
				((?:[0-9]{2})|(?:\s?[0-9])) # MM
				(?:[:\.]((?:[0-9]{2})|(?:\s?[0-9])))? # :SS
			\s+	# a TZ offset is required:
				([\+\-])? # TZ sign
				[\+\-]* # I've seen extra "-" e.g. "--500"
				([0-9]+|$OBSOLETE_TZ)(?:\s|$) # TZ offset
			/xo) {
		my ($dd, $m, $yyyy, $hh, $mm, $ss, $sign, $tz) =
					($1, $2, $3, $4, $5, $6, $7, $8);
		# don't accept non-English months
		defined(my $mon = $MoY{lc($m)}) or return;

		if (defined(my $off = $OBSOLETE_TZ{$tz})) {
			$sign = substr($off, 0, 1);
			$tz = substr($off, 1);
		}

		# Y2K problems: 3-digit years, follow RFC2822
		if (length($yyyy) <= 3) {
			$yyyy += 1900;

			# and 2-digit years from '09 (2009) (0..49)
			$yyyy += 100 if $yyyy < 1950;
		}

		$ts = timegm($ss // 0, $mm, $hh, $dd, $mon, $yyyy);

		# 4-digit dates in non-spam from 1900s and 1910s exist in
		# lore archives
		return if $ts < 0;

		# Compute the time offset from [+-]HHMM
		$tz //= 0;
		my ($tz_hh, $tz_mm);
		if (length($tz) == 1) {
			$tz_hh = $tz;
			$tz_mm = 0;
		} elsif (length($tz) == 2) {
			$tz_hh = 0;
			$tz_mm = $tz;
		} else {
			$tz_hh = $tz;
			$tz_hh =~ s/([0-9]{2})\z//;
			$tz_mm = $1;
		}
		while ($tz_mm >= 60) {
			$tz_mm -= 60;
			$tz_hh += 1;
		}
		$sign //= '+';
		my $off = $sign . ($tz_mm * 60 + ($tz_hh * 60 * 60));
		$ts -= $off;
		$sign = '+' if $off == 0;
		$zone = sprintf('%s%02d%02d', $sign, $tz_hh, $tz_mm);

	# Time::Zone and Date::Parse are part of the same distribution,
	# and we need Time::Zone to deal with tz names like "EDT"
	} elsif (eval { require Date::Parse }) {
		$ts = Date::Parse::str2time($date);
		return undef unless(defined $ts);

		# off is the time zone offset in seconds from GMT
		my ($ss,$mm,$hh,$day,$month,$year,$off) =
					Date::Parse::strptime($date);
		return unless defined($year);
		$off //= 0;

		# Compute the time zone from offset
		my $sign = ($off < 0) ? '-' : '+';
		my $hour = abs(int($off / 3600));
		my $min  = ($off / 60) % 60;

		# deal with weird offsets like '-0420' properly
		$min = 60 - $min if ($min && $off < 0);

		$zone = sprintf('%s%02d%02d', $sign, $hour, $min);
	} else {
		warn "Date::Parse missing for non-RFC822 date: $date\n";
		return undef;
	}

	# Note: we've already applied the offset to $ts at this point,
	# but we want to keep "git fsck" happy.
	# "-1200" is the furthest westermost zone offset,
	# but git fast-import is liberal so we use "-1400"
	if ($zone >= 1400 || $zone <= -1400) {
		warn "bogus TZ offset: $zone, ignoring and assuming +0000\n";
		$zone = '+0000';
	}
	[$ts, $zone];
}

sub time_response ($) {
	my ($ret) = @_;
	wantarray ? @$ret : $ret->[0];
}

sub msg_received_at ($) {
	my ($hdr) = @_; # PublicInbox::Eml
	my @recvd = $hdr->header_raw('Received');
	my ($ts);
	foreach my $r (@recvd) {
		$r =~ /\s*([0-9]+\s+[a-zA-Z]+\s+[0-9]{2,4}\s+
			[0-9]+[^0-9][0-9]+(?:[^0-9][0-9]+)
			\s+([\+\-][0-9]+))/sx or next;
		$ts = eval { str2date_zone($1) } and return $ts;
		my $mid = $hdr->header_raw('Message-ID');
		warn "no date in $mid Received: $r\n";
	}
	undef;
}

sub msg_date_only ($) {
	my ($hdr) = @_; # PublicInbox::Eml
	my @date = $hdr->header_raw('Date');
	my ($ts);
	foreach my $d (@date) {
		$ts = eval { str2date_zone($d) } and return $ts;
		if ($@) {
			my $mid = $hdr->header_raw('Message-ID');
			warn "bad Date: $d in $mid: $@\n";
		}
	}
	undef;
}

# Favors Received header for sorting globally
sub msg_timestamp ($;$) {
	my ($hdr, $fallback) = @_; # PublicInbox::Eml
	my $ret;
	$ret = msg_received_at($hdr) and return time_response($ret);
	$ret = msg_date_only($hdr) and return time_response($ret);
	time_response([ $fallback // time, '+0000' ]);
}

# Favors the Date: header for display and sorting within a thread
sub msg_datestamp ($;$) {
	my ($hdr, $fallback) = @_; # PublicInbox::Eml
	my $ret;
	$ret = msg_date_only($hdr) and return time_response($ret);
	$ret = msg_received_at($hdr) and return time_response($ret);
	time_response([ $fallback // time, '+0000' ]);
}

1;
public-inbox-1.6.1/lib/PublicInbox/Msgmap.pm000066400000000000000000000171311377346120300206710ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors 
# License: AGPL-3.0+ 

# bidirectional Message-ID <-> Article Number mapping for the NNTP
# and web interfaces.  This is required for implementing stable article
# numbers for NNTP and allows prefix lookups for partial Message-IDs
# in case URLs get truncated from copy-n-paste errors by users.
#
# This is maintained by ::SearchIdx (v1) and ::V2Writable (v2)
package PublicInbox::Msgmap;
use strict;
use v5.10.1;
use DBI;
use DBD::SQLite;
use PublicInbox::Over;
use PublicInbox::Spawn;

sub new {
	my ($class, $git_dir, $writable) = @_;
	my $d = "$git_dir/public-inbox";
	if ($writable && !-d $d && !mkdir $d) {
		my $err = $!;
		-d $d or die "$d not created: $err";
	}
	new_file($class, "$d/msgmap.sqlite3", $writable);
}

sub new_file {
	my ($class, $f, $rw) = @_;
	return if !$rw && !-r $f;

	my $self = bless { filename => $f }, $class;
	my $dbh = $self->{dbh} = PublicInbox::Over::dbh_new($self, $rw);
	if ($rw) {
		$dbh->begin_work;
		create_tables($dbh);
		$self->created_at(time) unless $self->created_at;

		my $max = $self->max // 0;
		$self->num_highwater($max);
		$dbh->commit;
	}
	$self;
}

# used to keep track of used numeric mappings for v2 reindex
sub tmp_clone {
	my ($self, $dir) = @_;
	require File::Temp;
	my $tmp = "mm_tmp-$$-XXXXXX";
	my ($fh, $fn) = File::Temp::tempfile($tmp, EXLOCK => 0, DIR => $dir);
	PublicInbox::Spawn::nodatacow_fd(fileno($fh));
	$self->{dbh}->sqlite_backup_to_file($fn);
	$tmp = ref($self)->new_file($fn, 2);
	$tmp->{dbh}->do('PRAGMA journal_mode = MEMORY');
	$tmp->{pid} = $$;
	$tmp;
}

# n.b. invoked directly by scripts/xhdr-num2mid
sub meta_accessor {
	my ($self, $key, $value) = @_;

	my $sql = 'SELECT val FROM meta WHERE key = ? LIMIT 1';
	my $dbh = $self->{dbh};
	my $prev;
	defined $value or return $dbh->selectrow_array($sql, undef, $key);

	$prev = $dbh->selectrow_array($sql, undef, $key);

	if (defined $prev) {
		$sql = 'UPDATE meta SET val = ? WHERE key = ?';
		$dbh->do($sql, undef, $value, $key);
	} else {
		$sql = 'INSERT INTO meta (key,val) VALUES (?,?)';
		$dbh->do($sql, undef, $key, $value);
	}
	$prev;
}

sub last_commit {
	my ($self, $commit) = @_;
	$self->meta_accessor('last_commit', $commit);
}

# v2 uses this to keep track of how up-to-date Xapian is
# old versions may be automatically GC'ed away in the future,
# but it's a trivial amount of storage.
sub last_commit_xap {
	my ($self, $version, $i, $commit) = @_;
	$self->meta_accessor("last_xap$version-$i", $commit);
}

# this is the UIDVALIDITY for IMAP (cf. RFC 3501 sec 2.3.1.1. item 3)
sub created_at {
	my ($self, $second) = @_;
	$self->meta_accessor('created_at', $second);
}

sub num_highwater {
	my ($self, $num) = @_;
	my $high = $self->{num_highwater} ||=
	    $self->meta_accessor('num_highwater');
	if (defined($num) && (!defined($high) || ($num > $high))) {
		$self->{num_highwater} = $num;
		$self->meta_accessor('num_highwater', $num);
	}
	$self->{num_highwater};
}

sub mid_insert {
	my ($self, $mid) = @_;
	my $dbh = $self->{dbh};
	my $sth = $dbh->prepare_cached(<<'');
INSERT INTO msgmap (mid) VALUES (?)

	return unless eval { $sth->execute($mid) };
	my $num = $dbh->last_insert_id(undef, undef, 'msgmap', 'num');
	$self->num_highwater($num) if defined($num);
	$num;
}

sub mid_for {
	my ($self, $num) = @_;
	my $dbh = $self->{dbh};
	my $sth = $self->{mid_for} ||=
		$dbh->prepare('SELECT mid FROM msgmap WHERE num = ? LIMIT 1');
	$sth->bind_param(1, $num);
	$sth->execute;
	$sth->fetchrow_array;
}

sub num_for {
	my ($self, $mid) = @_;
	my $dbh = $self->{dbh};
	my $sth = $self->{num_for} ||=
		$dbh->prepare('SELECT num FROM msgmap WHERE mid = ? LIMIT 1');
	$sth->bind_param(1, $mid);
	$sth->execute;
	$sth->fetchrow_array;
}

sub max {
	my $sth = $_[0]->{dbh}->prepare_cached('SELECT MAX(num) FROM msgmap',
						undef, 1);
	$sth->execute;
	$sth->fetchrow_array;
}

sub minmax {
	# breaking MIN and MAX into separate queries speeds up from 250ms
	# to around 700us with 2.7million messages.
	my $sth = $_[0]->{dbh}->prepare_cached('SELECT MIN(num) FROM msgmap',
						undef, 1);
	$sth->execute;
	($sth->fetchrow_array, max($_[0]));
}

sub mid_delete {
	my ($self, $mid) = @_;
	my $dbh = $self->{dbh};
	my $sth = $dbh->prepare('DELETE FROM msgmap WHERE mid = ?');
	$sth->bind_param(1, $mid);
	$sth->execute;
}

sub num_delete {
	my ($self, $num) = @_;
	my $dbh = $self->{dbh};
	my $sth = $dbh->prepare('DELETE FROM msgmap WHERE num = ?');
	$sth->bind_param(1, $num);
	$sth->execute;
}

sub create_tables {
	my ($dbh) = @_;

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS msgmap (
	num INTEGER PRIMARY KEY AUTOINCREMENT,
	mid VARCHAR(1000) NOT NULL,
	UNIQUE (mid)
)

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS meta (
	key VARCHAR(32) PRIMARY KEY,
	val VARCHAR(255) NOT NULL
)

}

# used by NNTP.pm
sub ids_after {
	my ($self, $num) = @_;
	my $ids = $self->{dbh}->selectcol_arrayref(<<'', undef, $$num);
SELECT num FROM msgmap WHERE num > ?
ORDER BY num ASC LIMIT 1000

	$$num = $ids->[-1] if @$ids;
	$ids;
}

sub msg_range {
	my ($self, $beg, $end, $cols) = @_;
	$cols //= 'num,mid';
	my $dbh = $self->{dbh};
	my $attr = { Columns => [] };
	my $mids = $dbh->selectall_arrayref(<<"", $attr, $$beg, $end);
SELECT $cols FROM msgmap WHERE num >= ? AND num <= ?
ORDER BY num ASC LIMIT 1000

	$$beg = $mids->[-1]->[0] + 1 if @$mids;
	$mids
}

# only used for mapping external serial numbers (e.g. articles from gmane)
# see scripts/xhdr-num2mid or PublicInbox::Filter::RubyLang for usage
sub mid_set {
	my ($self, $num, $mid) = @_;
	my $sth = $self->{mid_set} ||= do {
		$self->{dbh}->prepare(
			'INSERT OR IGNORE INTO msgmap (num,mid) VALUES (?,?)');
	};
	my $result = $sth->execute($num, $mid);
	$self->num_highwater($num) if (defined($result) && $result == 1);
	$result;
}

sub DESTROY {
	my ($self) = @_;
	my $dbh = $self->{dbh} or return;
	if (($self->{pid} // 0) == $$) {
		my $f = $dbh->sqlite_db_filename;
		unlink $f or warn "failed to unlink $f: $!\n";
	}
}

sub atfork_parent {
	my ($self) = @_;
	$self->{pid} or die 'BUG: not a temporary clone';
	$self->{dbh} and die 'BUG: tmp_clone dbh not prepared for parent';
	defined($self->{filename}) or die 'BUG: {filename} not defined';
	$self->{dbh} = PublicInbox::Over::dbh_new($self, 2);
	$self->{dbh}->do('PRAGMA journal_mode = MEMORY');
}

sub atfork_prepare {
	my ($self) = @_;
	my $pid = $self->{pid} or die 'BUG: not a temporary clone';
	$pid == $$ or die "BUG: atfork_prepare not called by $pid";
	my $dbh = $self->{dbh} or die 'BUG: temporary clone not open';

	# must clobber prepared statements
	%$self = (filename => $dbh->sqlite_db_filename, pid => $pid);
}

sub skip_artnum {
	my ($self, $skip_artnum) = @_;
	return meta_accessor($self, 'skip_artnum') if !defined($skip_artnum);

	my $cur = num_highwater($self) // 0;
	if ($skip_artnum < $cur) {
		die "E: current article number $cur ",
			"exceeds --skip-artnum=$skip_artnum\n";
	} else {
		my $ok;
		for (1..10) {
			my $mid = 'skip'.rand.'@'.rand.'.example.com';
			$ok = mid_set($self, $skip_artnum, $mid);
			if ($ok) {
				mid_delete($self, $mid);
				last;
			}
		}
		$ok or die '--skip-artnum failed';

		# in the future, the indexer may use this value for
		# new messages in old epochs
		meta_accessor($self, 'skip_artnum', $skip_artnum);
	}
}

sub check_inodes {
	my ($self) = @_;
	# no filename if in-:memory:
	my $f = $self->{dbh}->sqlite_db_filename // return;
	if (my @st = stat($f)) { # did st_dev, st_ino change?
		my $st = pack('dd', $st[0], $st[1]);
		if ($st ne ($self->{st} // $st)) {
			my $tmp = eval { ref($self)->new_file($f) };
			if ($@) {
				warn "E: DBI->connect($f): $@\n";
			} else {
				%$self = %$tmp;
			}
		}
	} else {
		warn "W: stat $f: $!\n";
	}
}

1;
public-inbox-1.6.1/lib/PublicInbox/NDC_PP.pm000066400000000000000000000016311377346120300204460ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 

# Pure-perl class for Linux non-Inline::C users to disable COW for btrfs
package PublicInbox::NDC_PP;
use strict;
use v5.10.1;

sub nodatacow_dir ($) {
	my ($path) = @_;
	open my $mh, '<', '/proc/self/mounts' or return;
	for (grep(/ btrfs /, <$mh>)) {
		my (undef, $mnt_path, $type) = split(/ /);
		next if $type ne 'btrfs'; # in case of false-positive from grep

		# weird chars are escaped as octal
		$mnt_path =~ s/\\(0[0-9]{2})/chr(oct($1))/egs;
		$mnt_path .= '/' unless $mnt_path =~ m!/\z!;
		if (index($path, $mnt_path) == 0) {
			# error goes to stderr, but non-fatal for us
			system('chattr', '+C', $path);
			last;
		}
	}
}

sub nodatacow_fd ($) {
	my ($fd) = @_;
	return if $^O ne 'linux';
	defined(my $path = readlink("/proc/self/fd/$fd")) or return;
	nodatacow_dir($path);
}

1;
public-inbox-1.6.1/lib/PublicInbox/NNTP.pm000066400000000000000000000647201377346120300202320ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors 
# License: AGPL-3.0+ 
#
# Each instance of this represents a NNTP client socket
# fields:
# nntpd: PublicInbox::NNTPD ref
# article: per-session current article number
# ng: PublicInbox::Inbox ref
# long_cb: long_response private data
package PublicInbox::NNTP;
use strict;
use parent qw(PublicInbox::DS);
use PublicInbox::MID qw(mid_escape $MID_EXTRACT);
use PublicInbox::Eml;
use POSIX qw(strftime);
use PublicInbox::DS qw(now);
use Digest::SHA qw(sha1_hex);
use Time::Local qw(timegm timelocal);
use PublicInbox::GitAsyncCat;
use constant {
	LINE_MAX => 512, # RFC 977 section 2.3
	r501 => '501 command syntax error',
	r502 => '502 Command unavailable',
	r221 => '221 Header follows',
	r224 => '224 Overview information follows (multi-line)',
	r225 =>	'225 Headers follow (multi-line)',
	r430 => '430 No article with that message-id',
};
use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
use Errno qw(EAGAIN);
my $ONE_MSGID = qr/\A$MID_EXTRACT\z/;
my @OVERVIEW = qw(Subject From Date Message-ID References);
my $OVERVIEW_FMT = join(":\r\n", @OVERVIEW, qw(Bytes Lines), '') .
		"Xref:full\r\n";
my $LIST_HEADERS = join("\r\n", @OVERVIEW,
			qw(:bytes :lines Xref To Cc)) . "\r\n";
my $CAPABILITIES = <<"";
101 Capability list:\r
VERSION 2\r
READER\r
NEWNEWS\r
LIST ACTIVE ACTIVE.TIMES NEWSGROUPS OVERVIEW.FMT\r
HDR\r
OVER\r
COMPRESS DEFLATE\r

sub greet ($) { $_[0]->write($_[0]->{nntpd}->{greet}) };

sub new ($$$) {
	my ($class, $sock, $nntpd) = @_;
	my $self = bless { nntpd => $nntpd }, $class;
	my $ev = EPOLLIN;
	my $wbuf;
	if ($sock->can('accept_SSL') && !$sock->accept_SSL) {
		return CORE::close($sock) if $! != EAGAIN;
		$ev = PublicInbox::TLS::epollbit() or return CORE::close($sock);
		$wbuf = [ \&PublicInbox::DS::accept_tls_step, \&greet ];
	}
	$self->SUPER::new($sock, $ev | EPOLLONESHOT);
	if ($wbuf) {
		$self->{wbuf} = $wbuf;
	} else {
		greet($self);
	}
	$self->update_idle_time;
	$self;
}

sub args_ok ($$) {
	my ($cb, $argc) = @_;
	my $tot = prototype $cb;
	my ($nreq, undef) = split(';', $tot);
	$nreq = ($nreq =~ tr/$//) - 1;
	$tot = ($tot =~ tr/$//) - 1;
	($argc <= $tot && $argc >= $nreq);
}

# returns 1 if we can continue, 0 if not due to buffered writes or disconnect
sub process_line ($$) {
	my ($self, $l) = @_;
	my ($req, @args) = split(/[ \t]+/, $l);
	return 1 unless defined($req); # skip blank line
	$req = $self->can('cmd_'.lc($req));
	return res($self, '500 command not recognized') unless $req;
	return res($self, r501) unless args_ok($req, scalar @args);

	my $res = eval { $req->($self, @args) };
	my $err = $@;
	if ($err && $self->{sock}) {
		local $/ = "\n";
		chomp($l);
		err($self, 'error from: %s (%s)', $l, $err);
		$res = '503 program fault - command not performed';
	}
	return 0 unless defined $res;
	res($self, $res);
}

# The keyword argument is not used (rfc3977 5.2.2)
sub cmd_capabilities ($;$) {
	my ($self, undef) = @_;
	my $res = $CAPABILITIES;
	if (!$self->{sock}->can('accept_SSL') &&
			$self->{nntpd}->{accept_tls}) {
		$res .= "STARTTLS\r\n";
	}
	$res .= '.';
}

sub cmd_mode ($$) {
	my ($self, $arg) = @_;
	$arg = uc $arg;
	return r501 unless $arg eq 'READER';
	'201 Posting prohibited';
}

sub cmd_slave ($) { '202 slave status noted' }

sub cmd_xgtitle ($;$) {
	my ($self, $wildmat) = @_;
	more($self, '282 list of groups and descriptions follows');
	list_newsgroups($self, $wildmat);
	'.'
}

sub list_overview_fmt ($) {
	my ($self) = @_;
	$self->msg_more($OVERVIEW_FMT);
}

sub list_headers ($;$) {
	my ($self) = @_;
	$self->msg_more($LIST_HEADERS);
}

sub list_active ($;$) {
	my ($self, $wildmat) = @_;
	wildmat2re($wildmat);
	foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
		$ng->{newsgroup} =~ $wildmat or next;
		group_line($self, $ng);
	}
}

sub list_active_times ($;$) {
	my ($self, $wildmat) = @_;
	wildmat2re($wildmat);
	foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
		$ng->{newsgroup} =~ $wildmat or next;
		my $c = eval { $ng->mm->created_at } || time;
		more($self, "$ng->{newsgroup} $c $ng->{-primary_address}");
	}
}

sub list_newsgroups ($;$) {
	my ($self, $wildmat) = @_;
	wildmat2re($wildmat);
	foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
		$ng->{newsgroup} =~ $wildmat or next;
		my $d = $ng->description;
		more($self, "$ng->{newsgroup} $d");
	}
}

# LIST SUBSCRIPTIONS, DISTRIB.PATS are not supported
sub cmd_list ($;$$) {
	my ($self, @args) = @_;
	if (scalar @args) {
		my $arg = shift @args;
		$arg =~ tr/A-Z./a-z_/;
		$arg = "list_$arg";
		$arg = $self->can($arg);
		return r501 unless $arg && args_ok($arg, scalar @args);
		more($self, '215 information follows');
		$arg->($self, @args);
	} else {
		more($self, '215 list of newsgroups follows');
		foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
			group_line($self, $ng);
		}
	}
	'.'
}

sub listgroup_range_i {
	my ($self, $beg, $end) = @_;
	my $r = $self->{ng}->mm->msg_range($beg, $end, 'num');
	scalar(@$r) or return;
	more($self, join("\r\n", map { $_->[0] } @$r));
	1;
}

sub listgroup_all_i {
	my ($self, $num) = @_;
	my $ary = $self->{ng}->mm->ids_after($num);
	scalar(@$ary) or return;
	more($self, join("\r\n", @$ary));
	1;
}

sub cmd_listgroup ($;$$) {
	my ($self, $group, $range) = @_;
	if (defined $group) {
		my $res = cmd_group($self, $group);
		return $res if ($res !~ /\A211 /);
		more($self, $res);
	}
	$self->{ng} or return '412 no newsgroup selected';
	if (defined $range) {
		my $r = get_range($self, $range);
		return $r unless ref $r;
		long_response($self, \&listgroup_range_i, @$r);
	} else { # grab every article number
		long_response($self, \&listgroup_all_i, \(my $num = 0));
	}
}

sub parse_time ($$;$) {
	my ($date, $time, $gmt) = @_;
	my ($hh, $mm, $ss) = unpack('A2A2A2', $time);
	if (defined $gmt) {
		$gmt =~ /\A(?:UTC|GMT)\z/i or die "GM invalid: $gmt";
		$gmt = 1;
	}
	my ($YYYY, $MM, $DD);
	if (bytes::length($date) == 8) { # RFC 3977 allows YYYYMMDD
		($YYYY, $MM, $DD) = unpack('A4A2A2', $date);
	} else { # legacy clients send YYMMDD
		my $YY;
		($YY, $MM, $DD) = unpack('A2A2A2', $date);
		my @now = $gmt ? gmtime : localtime;
		my $cur_year = $now[5] + 1900;
		my $cur_cent = int($cur_year / 100) * 100;
		$YYYY = (($YY + $cur_cent) > $cur_year) ?
			($YY + 1900) : ($YY + $cur_cent);
	}
	if ($gmt) {
		timegm($ss, $mm, $hh, $DD, $MM - 1, $YYYY);
	} else {
		timelocal($ss, $mm, $hh, $DD, $MM - 1, $YYYY);
	}
}

sub group_line ($$) {
	my ($self, $ng) = @_;
	my ($min, $max) = $ng->mm->minmax;
	more($self, "$ng->{newsgroup} $max $min n") if defined $min && defined $max;
}

sub cmd_newgroups ($$$;$$) {
	my ($self, $date, $time, $gmt, $dists) = @_;
	my $ts = eval { parse_time($date, $time, $gmt) };
	return r501 if $@;

	# TODO dists
	more($self, '231 list of new newsgroups follows');
	foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
		my $c = eval { $ng->mm->created_at } || 0;
		next unless $c > $ts;
		group_line($self, $ng);
	}
	'.'
}

sub wildmat2re (;$) {
	return $_[0] = qr/.*/ if (!defined $_[0] || $_[0] eq '*');
	my %keep;
	my $salt = rand;
	my $tmp = $_[0];

	$tmp =~ s#(? '.*', '?' => '.' );
	$tmp =~ s#(? '.*', ',' => '|');
	$_[0] =~ s!(.)!$map{$1} || "\Q$1"!ge;
	$_[0] = qr/\A(?:$_[0])\z/;
}

sub newnews_i {
	my ($self, $overs, $ts, $prev) = @_;
	my $over = $overs->[0];
	my $msgs = $over->query_ts($ts, $$prev);
	if (scalar @$msgs) {
		more($self, '<' .
			join(">\r\n<", map { $_->{mid} } @$msgs ).
			'>');
		$$prev = $msgs->[-1]->{num};
	} else {
		shift @$overs;
		if (@$overs) { # continue onto next newsgroup
			$$prev = 0;
			return 1;
		} else { # break out of the long response.
			return;
		}
	}
}

sub cmd_newnews ($$$$;$$) {
	my ($self, $newsgroups, $date, $time, $gmt, $dists) = @_;
	my $ts = eval { parse_time($date, $time, $gmt) };
	return r501 if $@;
	more($self, '230 list of new articles by message-id follows');
	my ($keep, $skip) = split('!', $newsgroups, 2);
	ngpat2re($keep);
	ngpat2re($skip);
	my @overs;
	foreach my $ng (@{$self->{nntpd}->{grouplist}}) {
		$ng->{newsgroup} =~ $keep or next;
		$ng->{newsgroup} =~ $skip and next;
		my $over = $ng->over or next;
		push @overs, $over;
	};
	return '.' unless @overs;

	my $prev = 0;
	long_response($self, \&newnews_i, \@overs, $ts, \$prev);
}

sub cmd_group ($$) {
	my ($self, $group) = @_;
	my $no_such = '411 no such news group';
	my $nntpd = $self->{nntpd};
	my $ng = $nntpd->{groups}->{$group} or return $no_such;
	$nntpd->idler_start;

	$self->{ng} = $ng;
	my ($min, $max) = $ng->mm->minmax;
	$min ||= 0;
	$max ||= 0;
	$self->{article} = $min;
	my $est_size = $max - $min;
	"211 $est_size $min $max $group";
}

sub article_adj ($$) {
	my ($self, $off) = @_;
	my $ng = $self->{ng} or return '412 no newsgroup selected';

	my $n = $self->{article};
	defined $n or return '420 no current article has been selected';

	$n += $off;
	my $mid = $ng->mm->mid_for($n);
	unless ($mid) {
		$n = $off > 0 ? 'next' : 'previous';
		return "421 no $n article in this group";
	}
	$self->{article} = $n;
	"223 $n <$mid> article retrieved - request text separately";
}

sub cmd_next ($) { article_adj($_[0], 1) }
sub cmd_last ($) { article_adj($_[0], -1) }

# We want to encourage using email and CC-ing everybody involved to avoid
# the single-point-of-failure a single server provides.
sub cmd_post ($) {
	my ($self) = @_;
	my $ng = $self->{ng};
	$ng ? "440 mailto:$ng->{-primary_address} to post"
		: '440 posting not allowed'
}

sub cmd_quit ($) {
	my ($self) = @_;
	res($self, '205 closing connection - goodbye!');
	$self->shutdn;
	undef;
}

sub header_append ($$$) {
	my ($hdr, $k, $v) = @_;
	my @v = $hdr->header_raw($k);
	foreach (@v) {
		return if $v eq $_;
	}
	$hdr->header_set($k, @v, $v);
}

sub xref ($$$$) {
	my ($self, $ng, $n, $mid) = @_;
	my $ret = $self->{nntpd}->{servername} . " $ng->{newsgroup}:$n";

	# num_for is pretty cheap and sometimes we'll lookup the existence
	# of an article without getting even the OVER info.  In other words,
	# I'm not sure if its worth optimizing by scanning To:/Cc: and
	# PublicInbox::ExtMsg on the PSGI end is just as expensive
	foreach my $other (@{$self->{nntpd}->{grouplist}}) {
		next if $ng eq $other;
		my $num = eval { $other->mm->num_for($mid) } or next;
		$ret .= " $other->{newsgroup}:$num";
	}
	$ret;
}

sub set_nntp_headers ($$) {
	my ($hdr, $smsg) = @_;
	my ($mid) = $smsg->{mid};

	# leafnode (and maybe other NNTP clients) have trouble dealing
	# with v2 messages which have multiple Message-IDs (either due
	# to our own content-based dedupe or buggy git-send-email versions).
	my @mids = $hdr->header_raw('Message-ID');
	if (scalar(@mids) > 1) {
		my $mid0 = "<$mid>";
		$hdr->header_set('Message-ID', $mid0);
		my @alt = $hdr->header_raw('X-Alt-Message-ID');
		my %seen = map { $_ => 1 } (@alt, $mid0);
		push(@alt, grep { !$seen{$_}++ } @mids);
		$hdr->header_set('X-Alt-Message-ID', @alt);
	}

	# clobber some existing headers
	my $ibx = $smsg->{-ibx};
	my $xref = xref($smsg->{nntp}, $ibx, $smsg->{num}, $mid);
	$hdr->header_set('Xref', $xref);

	# RFC 5536 3.1.4
	my ($server_name, $newsgroups) = split(/ /, $xref, 2);
	$newsgroups =~ s/:[0-9]+\b//g; # drop NNTP article numbers
	$newsgroups =~ tr/ /,/;
	$hdr->header_set('Newsgroups', $newsgroups);

	# *something* here is required for leafnode, try to follow
	# RFC 5536 3.1.5...
	$hdr->header_set('Path', $server_name . '!not-for-mail');

	header_append($hdr, 'List-Post', "{-primary_address}>");
	if (my $url = $ibx->base_url) {
		$mid = mid_escape($mid);
		header_append($hdr, 'Archived-At', "<$url$mid/>");
		header_append($hdr, 'List-Archive', "<$url>");
	}
}

sub art_lookup ($$$) {
	my ($self, $art, $code) = @_;
	my $ng = $self->{ng};
	my ($n, $mid);
	my $err;
	if (defined $art) {
		if ($art =~ /\A[0-9]+\z/) {
			$err = '423 no such article number in this group';
			$n = int($art);
			goto find_mid;
		} elsif ($art =~ $ONE_MSGID) {
			$mid = $1;
			$err = r430;
			$n = $ng->mm->num_for($mid) if $ng;
			goto found if defined $n;
			foreach my $g (values %{$self->{nntpd}->{groups}}) {
				$n = $g->mm->num_for($mid);
				if (defined $n) {
					$ng = $g;
					goto found;
				}
			}
			return $err;
		} else {
			return r501;
		}
	} else {
		$err = '420 no current article has been selected';
		$n = $self->{article};
		defined $n or return $err;
find_mid:
		$ng or return '412 no newsgroup has been selected';
		$mid = $ng->mm->mid_for($n);
		defined $mid or return $err;
	}
found:
	my $smsg = $ng->over->get_art($n) or return $err;
	$smsg->{-ibx} = $ng;
	if ($code == 223) { # STAT
		set_art($self, $n);
		"223 $n <$smsg->{mid}> article retrieved - " .
			"request text separately";
	} else { # HEAD | BODY | ARTICLE
		$smsg->{nntp} = $self;
		$smsg->{nntp_code} = $code;
		set_art($self, $art);
		# this dereferences to `undef'
		${git_async_cat($ng->git, $smsg->{blob}, \&blob_cb, $smsg)};
	}
}

sub msg_body_write ($$) {
	my ($self, $msg) = @_;

	# these can momentarily double the memory consumption :<
	$$msg =~ s/^\./../smg;
	$$msg =~ s/(?msg_more($$msg);
}

sub set_art {
	my ($self, $art) = @_;
	$self->{article} = $art if defined $art && $art =~ /\A[0-9]+\z/;
}

sub msg_hdr_write ($$) {
	my ($eml, $smsg) = @_;
	set_nntp_headers($eml, $smsg);

	my $hdr = $eml->{hdr} // \(my $x = '');
	# fixup old bug from import (pre-a0c07cba0e5d8b6a)
	$$hdr =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
	$$hdr =~ s/(?{nntp}->msg_more($$hdr);
}

sub blob_cb { # called by git->cat_async via git_async_cat
	my ($bref, $oid, $type, $size, $smsg) = @_;
	my $self = $smsg->{nntp};
	my $code = $smsg->{nntp_code};
	if (!defined($oid)) {
		# it's possible to have TOCTOU if an admin runs
		# public-inbox-(edit|purge), just move onto the next message
		warn "E: $smsg->{blob} missing in $smsg->{-ibx}->{inboxdir}\n";
		return $self->requeue;
	} elsif ($smsg->{blob} ne $oid) {
		$self->close;
		die "BUG: $smsg->{blob} != $oid";
	}
	my $r = "$code $smsg->{num} <$smsg->{mid}> article retrieved - ";
	my $eml = PublicInbox::Eml->new($bref);
	if ($code == 220) {
		more($self, $r .= 'head and body follow');
		msg_hdr_write($eml, $smsg);
		$self->msg_more("\r\n");
		msg_body_write($self, $bref);
	} elsif ($code == 221) {
		more($self, $r .= 'head follows');
		msg_hdr_write($eml, $smsg);
	} elsif ($code == 222) {
		more($self, $r .= 'body follows');
		msg_body_write($self, $bref);
	} else {
		$self->close;
		die "BUG: bad code: $r";
	}
	$self->write(\".\r\n"); # flushes (includes ->zflush)
	$self->requeue;
}

sub cmd_article ($;$) {
	my ($self, $art) = @_;
	art_lookup($self, $art, 220);
}

sub cmd_head ($;$) {
	my ($self, $art) = @_;
	art_lookup($self, $art, 221);
}

sub cmd_body ($;$) {
	my ($self, $art) = @_;
	art_lookup($self, $art, 222);
}

sub cmd_stat ($;$) {
	my ($self, $art) = @_;
	art_lookup($self, $art, 223); # art may be msgid
}

sub cmd_ihave ($) { '435 article not wanted - do not send it' }

sub cmd_date ($) { '111 '.strftime('%Y%m%d%H%M%S', gmtime(time)) }

sub cmd_help ($) {
	my ($self) = @_;
	more($self, '100 help text follows');
	'.'
}

sub get_range ($$) {
	my ($self, $range) = @_;
	my $ng = $self->{ng} or return '412 no news group has been selected';
	defined $range or return '420 No article(s) selected';
	my ($beg, $end);
	my ($min, $max) = $ng->mm->minmax;
	if ($range =~ /\A([0-9]+)\z/) {
		$beg = $end = $1;
	} elsif ($range =~ /\A([0-9]+)-\z/) {
		($beg, $end) = ($1, $max);
	} elsif ($range =~ /\A([0-9]+)-([0-9]+)\z/) {
		($beg, $end) = ($1, $2);
	} else {
		return r501;
	}
	$beg = $min if ($beg < $min);
	$end = $max if ($end > $max);
	return '420 No article(s) selected' if ($beg > $end);
	[ \$beg, $end ];
}

sub long_step {
	my ($self) = @_;
	# wbuf is unset or empty, here; {long} may add to it
	my ($fd, $cb, $t0, @args) = @{$self->{long_cb}};
	my $more = eval { $cb->($self, @args) };
	if ($@ || !$self->{sock}) { # something bad happened...
		delete $self->{long_cb};
		my $elapsed = now() - $t0;
		if ($@) {
			err($self,
			    "%s during long response[$fd] - %0.6f",
			    $@, $elapsed);
		}
		out($self, " deferred[$fd] aborted - %0.6f", $elapsed);
		$self->close;
	} elsif ($more) { # $self->{wbuf}:
		$self->update_idle_time;

		# COMPRESS users all share the same DEFLATE context.
		# Flush it here to ensure clients don't see
		# each other's data
		$self->zflush;

		# no recursion, schedule another call ASAP, but only after
		# all pending writes are done.  autovivify wbuf:
		my $new_size = push(@{$self->{wbuf}}, \&long_step);

		# wbuf may be populated by $cb, no need to rearm if so:
		$self->requeue if $new_size == 1;
	} else { # all done!
		delete $self->{long_cb};
		res($self, '.');
		my $elapsed = now() - $t0;
		my $fd = fileno($self->{sock});
		out($self, " deferred[$fd] done - %0.6f", $elapsed);
		my $wbuf = $self->{wbuf}; # do NOT autovivify
		$self->requeue unless $wbuf && @$wbuf;
	}
}

sub long_response ($$;@) {
	my ($self, $cb, @args) = @_; # cb returns true if more, false if done

	my $sock = $self->{sock} or return;
	# make sure we disable reading during a long response,
	# clients should not be sending us stuff and making us do more
	# work while we are stream a response to them
	$self->{long_cb} = [ fileno($sock), $cb, now(), @args ];
	long_step($self); # kick off!
	undef;
}

sub hdr_msgid_range_i {
	my ($self, $beg, $end) = @_;
	my $r = $self->{ng}->mm->msg_range($beg, $end);
	@$r or return;
	more($self, join("\r\n", map { "$_->[0] <$_->[1]>" } @$r));
	1;
}

sub hdr_message_id ($$$) { # optimize XHDR Message-ID [range] for slrnpull.
	my ($self, $xhdr, $range) = @_;

	if (defined $range && $range =~ $ONE_MSGID) {
		my ($ng, $n) = mid_lookup($self, $1);
		return r430 unless $n;
		hdr_mid_response($self, $xhdr, $ng, $n, $range, $range);
	} else { # numeric range
		$range = $self->{article} unless defined $range;
		my $r = get_range($self, $range);
		return $r unless ref $r;
		more($self, $xhdr ? r221 : r225);
		long_response($self, \&hdr_msgid_range_i, @$r);
	}
}

sub mid_lookup ($$) {
	my ($self, $mid) = @_;
	my $self_ng = $self->{ng};
	if ($self_ng) {
		my $n = $self_ng->mm->num_for($mid);
		return ($self_ng, $n) if defined $n;
	}
	foreach my $ng (values %{$self->{nntpd}->{groups}}) {
		next if defined $self_ng && $ng eq $self_ng;
		my $n = $ng->mm->num_for($mid);
		return ($ng, $n) if defined $n;
	}
	(undef, undef);
}

sub xref_range_i {
	my ($self, $beg, $end) = @_;
	my $ng = $self->{ng};
	my $r = $ng->mm->msg_range($beg, $end);
	@$r or return;
	more($self, join("\r\n", map {
		my $num = $_->[0];
		"$num ".xref($self, $ng, $num, $_->[1]);
	} @$r));
	1;
}

sub hdr_xref ($$$) { # optimize XHDR Xref [range] for rtin
	my ($self, $xhdr, $range) = @_;

	if (defined $range && $range =~ $ONE_MSGID) {
		my $mid = $1;
		my ($ng, $n) = mid_lookup($self, $mid);
		return r430 unless $n;
		hdr_mid_response($self, $xhdr, $ng, $n, $range,
				xref($self, $ng, $n, $mid));
	} else { # numeric range
		$range = $self->{article} unless defined $range;
		my $r = get_range($self, $range);
		return $r unless ref $r;
		more($self, $xhdr ? r221 : r225);
		long_response($self, \&xref_range_i, @$r);
	}
}

sub over_header_for {
	my ($over, $num, $field) = @_;
	my $smsg = $over->get_art($num) or return;
	return PublicInbox::Smsg::date($smsg) if $field eq 'date';
	$smsg->{$field};
}

sub smsg_range_i {
	my ($self, $beg, $end, $field) = @_;
	my $over = $self->{ng}->over;
	my $msgs = $over->query_xover($$beg, $end);
	scalar(@$msgs) or return;
	my $tmp = '';

	# ->{$field} is faster than ->$field invocations, so favor that.
	if ($field eq 'date') {
		for my $s (@$msgs) {
			$tmp .= "$s->{num} ".PublicInbox::Smsg::date($s)."\r\n"
		}
	} else {
		for my $s (@$msgs) {
			$tmp .= "$s->{num} $s->{$field}\r\n";
		}
	}
	utf8::encode($tmp);
	$self->msg_more($tmp);
	$$beg = $msgs->[-1]->{num} + 1;
}

sub hdr_smsg ($$$$) {
	my ($self, $xhdr, $field, $range) = @_;
	if (defined $range && $range =~ $ONE_MSGID) {
		my ($ng, $n) = mid_lookup($self, $1);
		return r430 unless defined $n;
		my $v = over_header_for($ng->over, $n, $field);
		hdr_mid_response($self, $xhdr, $ng, $n, $range, $v);
	} else { # numeric range
		$range = $self->{article} unless defined $range;
		my $r = get_range($self, $range);
		return $r unless ref $r;
		more($self, $xhdr ? r221 : r225);
		long_response($self, \&smsg_range_i, @$r, $field);
	}
}

sub do_hdr ($$$;$) {
	my ($self, $xhdr, $header, $range) = @_;
	my $sub = lc $header;
	if ($sub eq 'message-id') {
		hdr_message_id($self, $xhdr, $range);
	} elsif ($sub eq 'xref') {
		hdr_xref($self, $xhdr, $range);
	} elsif ($sub =~ /\A(?:subject|references|date|from|to|cc|
				bytes|lines)\z/x) {
		hdr_smsg($self, $xhdr, $sub, $range);
	} elsif ($sub =~ /\A:(bytes|lines)\z/) {
		hdr_smsg($self, $xhdr, $1, $range);
	} else {
		$xhdr ? (r221 . "\r\n.") : "503 HDR not permitted on $header";
	}
}

# RFC 3977
sub cmd_hdr ($$;$) {
	my ($self, $header, $range) = @_;
	do_hdr($self, 0, $header, $range);
}

# RFC 2980
sub cmd_xhdr ($$;$) {
	my ($self, $header, $range) = @_;
	do_hdr($self, 1, $header, $range);
}

sub hdr_mid_prefix ($$$$$) {
	my ($self, $xhdr, $ng, $n, $mid) = @_;
	return $mid if $xhdr;

	# HDR for RFC 3977 users
	if (my $self_ng = $self->{ng}) {
		($self_ng eq $ng) ? $n : '0';
	} else {
		'0';
	}
}

sub hdr_mid_response ($$$$$$) {
	my ($self, $xhdr, $ng, $n, $mid, $v) = @_;
	my $res = '';
	if ($xhdr) {
		$res .= r221 . "\r\n";
		$res .= "$mid $v\r\n";
	} else {
		$res .= r225 . "\r\n";
		my $pfx = hdr_mid_prefix($self, $xhdr, $ng, $n, $mid);
		$res .= "$pfx $v\r\n";
	}
	res($self, $res .= '.');
	undef;
}

sub xrover_i {
	my ($self, $beg, $end) = @_;
	my $h = over_header_for($self->{ng}->over, $$beg, 'references');
	more($self, "$$beg $h") if defined($h);
	$$beg++ < $end;
}

sub cmd_xrover ($;$) {
	my ($self, $range) = @_;
	my $ng = $self->{ng} or return '412 no newsgroup selected';
	(defined $range && $range =~ /[<>]/) and
		return '420 No article(s) selected'; # no message IDs

	$range = $self->{article} unless defined $range;
	my $r = get_range($self, $range);
	return $r unless ref $r;
	more($self, '224 Overview information follows');
	long_response($self, \&xrover_i, @$r);
}

sub over_line ($$$$) {
	my ($self, $ng, $num, $smsg) = @_;
	# n.b. field access and procedural calls can be
	# 10%-15% faster than OO method calls:
	my $s = join("\t", $num,
		$smsg->{subject},
		$smsg->{from},
		PublicInbox::Smsg::date($smsg),
		"<$smsg->{mid}>",
		$smsg->{references},
		$smsg->{bytes},
		$smsg->{lines},
		"Xref: " . xref($self, $ng, $num, $smsg->{mid}));
	utf8::encode($s);
	$s
}

sub cmd_over ($;$) {
	my ($self, $range) = @_;
	if ($range && $range =~ $ONE_MSGID) {
		my ($ng, $n) = mid_lookup($self, $1);
		defined $n or return r430;
		my $smsg = $ng->over->get_art($n) or return r430;
		more($self, '224 Overview information follows (multi-line)');

		# Only set article number column if it's the current group
		my $self_ng = $self->{ng};
		$n = 0 if (!$self_ng || $self_ng ne $ng);
		more($self, over_line($self, $ng, $n, $smsg));
		'.';
	} else {
		cmd_xover($self, $range);
	}
}

sub xover_i {
	my ($self, $beg, $end) = @_;
	my $ng = $self->{ng};
	my $msgs = $ng->over->query_xover($$beg, $end);
	my $nr = scalar @$msgs or return;

	# OVERVIEW.FMT
	more($self, join("\r\n", map {
		over_line($self, $ng, $_->{num}, $_);
		} @$msgs));
	$$beg = $msgs->[-1]->{num} + 1;
}

sub cmd_xover ($;$) {
	my ($self, $range) = @_;
	$range = $self->{article} unless defined $range;
	my $r = get_range($self, $range);
	return $r unless ref $r;
	my ($beg, $end) = @$r;
	more($self, "224 Overview information follows for $$beg to $end");
	long_response($self, \&xover_i, @$r);
}

sub compressed { undef }

sub cmd_starttls ($) {
	my ($self) = @_;
	my $sock = $self->{sock} or return;
	# RFC 4642 2.2.1
	return r502 if ($sock->can('accept_SSL') || $self->compressed);
	my $opt = $self->{nntpd}->{accept_tls} or
		return '580 can not initiate TLS negotiation';
	res($self, '382 Continue with TLS negotiation');
	$self->{sock} = IO::Socket::SSL->start_SSL($sock, %$opt);
	$self->requeue if PublicInbox::DS::accept_tls_step($self);
	undef;
}

# RFC 8054
sub cmd_compress ($$) {
	my ($self, $alg) = @_;
	return '503 Only DEFLATE is supported' if uc($alg) ne 'DEFLATE';
	return r502 if $self->compressed;
	PublicInbox::NNTPdeflate->enable($self);
	$self->requeue;
	undef
}

sub zflush {} # overridden by NNTPdeflate

sub cmd_xpath ($$) {
	my ($self, $mid) = @_;
	return r501 unless $mid =~ $ONE_MSGID;
	$mid = $1;
	my @paths;
	foreach my $ng (values %{$self->{nntpd}->{groups}}) {
		my $n = $ng->mm->num_for($mid);
		push @paths, "$ng->{newsgroup}/$n" if defined $n;
	}
	return '430 no such article on server' unless @paths;
	'223 '.join(' ', @paths);
}

sub res ($$) { do_write($_[0], $_[1] . "\r\n") }

sub more ($$) { $_[0]->msg_more($_[1] . "\r\n") }

sub do_write ($$) {
	my $self = $_[0];
	my $done = $self->write(\($_[1]));
	return 0 unless $self->{sock};

	$done;
}

sub err ($$;@) {
	my ($self, $fmt, @args) = @_;
	printf { $self->{nntpd}->{err} } $fmt."\n", @args;
}

sub out ($$;@) {
	my ($self, $fmt, @args) = @_;
	printf { $self->{nntpd}->{out} } $fmt."\n", @args;
}

# callback used by PublicInbox::DS for any (e)poll (in/out/hup/err)
sub event_step {
	my ($self) = @_;

	return unless $self->flush_write && $self->{sock} && !$self->{long_cb};

	$self->update_idle_time;
	# only read more requests if we've drained the write buffer,
	# otherwise we can be buffering infinitely w/o backpressure

	my $rbuf = $self->{rbuf} // \(my $x = '');
	my $line = index($$rbuf, "\n");
	while ($line < 0) {
		return $self->close if length($$rbuf) >= LINE_MAX;
		$self->do_read($rbuf, LINE_MAX, length($$rbuf)) or return;
		$line = index($$rbuf, "\n");
	}
	$line = substr($$rbuf, 0, $line + 1, '');
	$line =~ s/\r?\n\z//s;
	return $self->close if $line =~ /[[:cntrl:]]/s;

	my $t0 = now();
	my $fd = fileno($self->{sock});
	my $r = eval { process_line($self, $line) };
	my $pending = $self->{wbuf} ? ' pending' : '';
	out($self, "[$fd] %s - %0.6f$pending", $line, now() - $t0);
	return $self->close if $r < 0;
	$self->rbuf_idle($rbuf);
	$self->update_idle_time;

	# maybe there's more pipelined data, or we'll have
	# to register it for socket-readiness notifications
	$self->requeue unless $pending;
}

# for graceful shutdown in PublicInbox::Daemon:
sub busy {
	my ($self, $now) = @_;
	($self->{rbuf} || $self->{wbuf} || $self->not_idle_long($now));
}

1;
public-inbox-1.6.1/lib/PublicInbox/NNTPD.pm000066400000000000000000000040031377346120300203220ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# represents an NNTPD (currently a singleton),
# see script/public-inbox-nntpd for how it is used
package PublicInbox::NNTPD;
use strict;
use warnings;
use Sys::Hostname;
use PublicInbox::Config;
use PublicInbox::InboxIdle;

sub new {
	my ($class) = @_;
	my $pi_config = PublicInbox::Config->new;
	my $name = $pi_config->{'publicinbox.nntpserver'};
	if (!defined($name) or $name eq '') {
		$name = hostname;
	} elsif (ref($name) eq 'ARRAY') {
		$name = $name->[0];
	}

	bless {
		groups => {},
		err => \*STDERR,
		out => \*STDOUT,
		grouplist => [],
		pi_config => $pi_config,
		servername => $name,
		greet => \"201 $name ready - post via email\r\n",
		# accept_tls => { SSL_server => 1, ..., SSL_reuse_ctx => ... }
		# idler => PublicInbox::InboxIdle
	}, $class;
}

sub refresh_groups {
	my ($self, $sig) = @_;
	my $pi_config = $sig ? PublicInbox::Config->new : $self->{pi_config};
	my $new = {};
	my @list;
	$pi_config->each_inbox(sub {
		my ($ng) = @_;
		my $ngname = $ng->{newsgroup} or return;
		if (ref $ngname) {
			warn 'multiple newsgroups not supported: '.
				join(', ', @$ngname). "\n";
		# Newsgroup name needs to be compatible with RFC 3977
		# wildmat-exact and RFC 3501 (IMAP) ATOM-CHAR.
		# Leave out a few chars likely to cause problems or conflicts:
		# '|', '<', '>', ';', '#', '$', '&',
		} elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]!) {
			warn "newsgroup name invalid: `$ngname'\n";
		} elsif ($ng->nntp_usable) {
			# Only valid if msgmap and search works
			$new->{$ngname} = $ng;
			push @list, $ng;

			# preload to avoid fragmentation:
			$ng->description;
			$ng->base_url;
		}
	});
	@list =	sort { $a->{newsgroup} cmp $b->{newsgroup} } @list;
	$self->{grouplist} = \@list;
	$self->{pi_config} = $pi_config;
	# this will destroy old groups that got deleted
	%{$self->{groups}} = %$new;
}

sub idler_start {
	$_[0]->{idler} //= PublicInbox::InboxIdle->new($_[0]->{pi_config});
}

1;
public-inbox-1.6.1/lib/PublicInbox/NNTPdeflate.pm000066400000000000000000000072651377346120300215600ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors 
# License: AGPL-3.0+ 

# RFC 8054 NNTP COMPRESS DEFLATE implementation
#
# RSS usage for 10K idle-but-did-something NNTP clients on 64-bit:
#   TLS + DEFLATE[a] :  1.8 GB  (MemLevel=9, 1.2 GB with MemLevel=8)
#   TLS + DEFLATE[b] :  ~300MB
#   TLS only         :  <200MB
#   plain            :   <50MB
#
# [a] - initial implementation using per-client Deflate contexts and buffer
#
# [b] - memory-optimized implementation using a global deflate context.
#       It's less efficient in terms of compression, but way more
#       efficient in terms of server memory usage.
package PublicInbox::NNTPdeflate;
use strict;
use 5.010_001;
use parent qw(PublicInbox::NNTP);
use Compress::Raw::Zlib;

my %IN_OPT = (
	-Bufsize => PublicInbox::NNTP::LINE_MAX,
	-WindowBits => -15, # RFC 1951
	-AppendOutput => 1,
);

# global deflate context and buffer
my $zbuf = \(my $buf = '');
my $zout;
{
	my $err;
	($zout, $err) = Compress::Raw::Zlib::Deflate->new(
		# nnrpd (INN) and Compress::Raw::Zlib favor MemLevel=9,
		# the zlib C library and git use MemLevel=8 as the default
		# -MemLevel => 9,
		-Bufsize => 65536, # same as nnrpd
		-WindowBits => -15, # RFC 1951
		-AppendOutput => 1,
	);
	$err == Z_OK or die "Failed to initialize zlib deflate stream: $err";
}


sub enable {
	my ($class, $self) = @_;
	my ($in, $err) = Compress::Raw::Zlib::Inflate->new(%IN_OPT);
	if ($err != Z_OK) {
		$self->err("Inflate->new failed: $err");
		$self->res('403 Unable to activate compression');
		return;
	}
	$self->res('206 Compression active');
	bless $self, $class;
	$self->{zin} = $in;
}

# overrides PublicInbox::NNTP::compressed
sub compressed { 1 }

sub do_read ($$$$) {
	my ($self, $rbuf, $len, $off) = @_;

	my $zin = $self->{zin} or return; # closed
	my $doff;
	my $dbuf = delete($self->{dbuf}) // '';
	$doff = length($dbuf);
	my $r = PublicInbox::DS::do_read($self, \$dbuf, $len, $doff) or return;

	# Workaround inflate bug appending to OOK scalars:
	# 
	# We only have $off if the client is pipelining, and pipelining
	# is where our substr() OOK optimization in event_step makes sense.
	if ($off) {
		my $copy = $$rbuf;
		undef $$rbuf;
		$$rbuf = $copy;
	}

	# assert(length($$rbuf) == $off) as far as NNTP.pm is concerned
	# -ConsumeInput is true, so $dbuf is automatically emptied
	my $err = $zin->inflate($dbuf, $rbuf);
	if ($err == Z_OK) {
		$self->{dbuf} = $dbuf if $dbuf ne '';
		$r = length($$rbuf) and return $r;
		# nothing ready, yet, get more, later
		$self->requeue;
	} else {
		delete $self->{zin};
		$self->close;
	}
	0;
}

# override PublicInbox::DS::msg_more
sub msg_more ($$) {
	my $self = $_[0];

	# $_[1] may be a reference or not for ->deflate
	my $err = $zout->deflate($_[1], $zbuf);
	$err == Z_OK or die "->deflate failed $err";
	1;
}

sub zflush ($) {
	my ($self) = @_;

	my $deflated = $zbuf;
	$zbuf = \(my $next = '');

	my $err = $zout->flush($deflated, Z_FULL_FLUSH);
	$err == Z_OK or die "->flush failed $err";

	# We can still let the lower socket layer do buffering:
	PublicInbox::DS::msg_more($self, $$deflated);
}

# compatible with PublicInbox::DS::write, so $_[1] may be a reference or not
sub write ($$) {
	my $self = $_[0];
	return PublicInbox::DS::write($self, $_[1]) if ref($_[1]) eq 'CODE';

	my $deflated = $zbuf;
	$zbuf = \(my $next = '');

	# $_[1] may be a reference or not for ->deflate
	my $err = $zout->deflate($_[1], $deflated);
	$err == Z_OK or die "->deflate failed $err";
	$err = $zout->flush($deflated, Z_FULL_FLUSH);
	$err == Z_OK or die "->flush failed $err";

	# We can still let the socket layer do buffering:
	PublicInbox::DS::write($self, $deflated);
}

1;
public-inbox-1.6.1/lib/PublicInbox/NewsWWW.pm000066400000000000000000000043111377346120300207620ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 
#
# Plack app redirector for mapping /$NEWSGROUP requests to
# the appropriate /$INBOX in PublicInbox::WWW because some
# auto-linkifiers cannot handle nntp:// redirects properly.
# This is also used directly by PublicInbox::WWW
package PublicInbox::NewsWWW;
use strict;
use warnings;
use PublicInbox::Config;
use PublicInbox::MID qw(mid_escape);
use PublicInbox::Hval qw(prurl);

sub new {
	my ($class, $pi_config) = @_;
	$pi_config ||= PublicInbox::Config->new;
	bless { pi_config => $pi_config }, $class;
}

sub redirect ($$) {
	my ($code, $url) = @_;
	[ $code,
	  [ Location => $url, 'Content-Type' => 'text/plain' ],
	  [ "Redirecting to $url\n" ] ]
}

sub try_inbox {
	my ($ibx, $arg) = @_;
	return if scalar(@$arg) > 1;

	# do not pass $env since HTTP_HOST may differ
	my $url = $ibx->base_url or return;

	my ($mid) = @$arg;
	eval { $ibx->mm->num_for($mid) } or return;

	# 302 since the same message may show up on
	# multiple inboxes and inboxes can be added/reordered
	$arg->[1] = redirect(302, $url .= mid_escape($mid) . '/');
}

sub call {
	my ($self, $env) = @_;

	# some links may have the article number in them:
	# /inbox.foo.bar/123456
	my (undef, @parts) = split(m!/!, $env->{PATH_INFO});
	my ($ng, $article) = @parts;
	my $pi_config = $self->{pi_config};
	if (my $ibx = $pi_config->lookup_newsgroup($ng)) {
		my $url = prurl($env, $ibx->{url});
		my $code = 301;
		if (defined $article && $article =~ /\A[0-9]+\z/) {
			my $mid = eval { $ibx->mm->mid_for($article) };
			if (defined $mid) {
				# article IDs are not stable across clones,
				# do not encourage caching/bookmarking them
				$code = 302;
				$url .= mid_escape($mid) . '/';
			}
		}
		return redirect($code, $url);
	}

	my $res;
	my @try = (join('/', @parts));

	# trailing slash is in the rest of our WWW, so maybe some users
	# will assume it:
	if ($parts[-1] eq '') {
		pop @parts;
		push @try, join('/', @parts);
	}

	foreach my $mid (@try) {
		my $arg = [ $mid ];
		$pi_config->each_inbox(\&try_inbox, $arg);
		defined($res = $arg->[1]) and last;
	}
	$res || [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ];
}

1;
public-inbox-1.6.1/lib/PublicInbox/Over.pm000066400000000000000000000211741377346120300203620ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors 
# License: AGPL-3.0+ 

# for XOVER, OVER in NNTP, and feeds/homepage/threads in PSGI
# Unlike Msgmap, this is an _UNSTABLE_ database which can be
# tweaked/updated over time and rebuilt.
package PublicInbox::Over;
use strict;
use v5.10.1;
use DBI;
use DBD::SQLite;
use PublicInbox::Smsg;
use Compress::Zlib qw(uncompress);
use constant DEFAULT_LIMIT => 1000;

sub dbh_new {
	my ($self, $rw) = @_;
	my $f = delete $self->{filename};
	if (!-f $f) { # SQLite defaults mode to 0644, we want 0666
		if ($rw) {
			require PublicInbox::Spawn;
			open my $fh, '+>>', $f or die "failed to open $f: $!";
			PublicInbox::Spawn::nodatacow_fd(fileno($fh));
		} else {
			$self->{filename} = $f; # die on stat() below:
		}
	}
	my (@st, $st, $dbh);
	my $tries = 0;
	do {
		@st = stat($f) or die "failed to stat $f: $!";
		$st = pack('dd', $st[0], $st[1]); # 0: dev, 1: inode
		$dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', {
			AutoCommit => 1,
			RaiseError => 1,
			PrintError => 0,
			ReadOnly => !$rw,
			sqlite_use_immediate_transaction => 1,
		});
		$self->{st} = $st;
		@st = stat($f) or die "failed to stat $f: $!";
		$st = pack('dd', $st[0], $st[1]);
	} while ($st ne $self->{st} && $tries++ < 3);
	warn "W: $f: .st_dev, .st_ino unstable\n" if $st ne $self->{st};

	if ($rw) {
		# TRUNCATE reduces I/O compared to the default (DELETE).
		#
		# Do not use WAL by default since we expect the case
		# where any users may read via read-only daemons
		# (-httpd/-imapd/-nntpd); but only a single user has
		# write permissions for -watch/-mda.
		#
		# Read-only WAL support in SQLite 3.22.0 (2018-01-22)
		# doesn't do what we need: it is only intended for
		# immutable read-only media (e.g. CD-ROM) and not
		# usable for our use case described above.
		#
		# If an admin is willing to give read-only daemons R/W
		# permissions; they can enable WAL manually and we will
		# respect that by not clobbering it.
		my $jm = $dbh->selectrow_array('PRAGMA journal_mode');
		$dbh->do('PRAGMA journal_mode = TRUNCATE') if $jm ne 'wal';

		$dbh->do('PRAGMA synchronous = OFF') if $rw > 1;
	}
	$dbh;
}

sub new {
	my ($class, $f) = @_;
	bless { filename => $f }, $class;
}

sub dbh_close {
	my ($self) = @_;
	if (my $dbh = delete $self->{dbh}) {
		delete $self->{-get_art};
		$self->{filename} = $dbh->sqlite_db_filename;
	}
}

sub dbh ($) { $_[0]->{dbh} //= $_[0]->dbh_new } # dbh_new may be subclassed

sub load_from_row ($;$) {
	my ($smsg, $cull) = @_;
	bless $smsg, 'PublicInbox::Smsg';
	if (defined(my $data = delete $smsg->{ddd})) {
		$data = uncompress($data);
		PublicInbox::Smsg::load_from_data($smsg, $data);

		# saves over 600K for 1000+ message threads
		PublicInbox::Smsg::psgi_cull($smsg) if $cull;
	}
	$smsg
}

sub do_get {
	my ($self, $sql, $opts, @args) = @_;
	my $lim = (($opts->{limit} || 0) + 0) || DEFAULT_LIMIT;
	$sql .= "LIMIT $lim";
	my $msgs = dbh($self)->selectall_arrayref($sql, { Slice => {} }, @args);
	my $cull = $opts->{cull};
	load_from_row($_, $cull) for @$msgs;
	$msgs
}

sub query_xover {
	my ($self, $beg, $end) = @_;
	do_get($self, <<'', {}, $beg, $end);
SELECT num,ts,ds,ddd FROM over WHERE num >= ? AND num <= ?
ORDER BY num ASC

}

sub query_ts {
	my ($self, $ts, $prev) = @_;
	do_get($self, <<'', {}, $ts, $prev);
SELECT num,ddd FROM over WHERE ts >= ? AND num > ?
ORDER BY num ASC

}

sub get_all {
	my $self = shift;
	my $nr = scalar(@_) or return [];
	my $in = '?' . (',?' x ($nr - 1));
	do_get($self, <<"", { cull => 1, limit => $nr }, @_);
SELECT num,ts,ds,ddd FROM over WHERE num IN ($in)

}

sub nothing () { wantarray ? (0, []) : [] };

sub get_thread {
	my ($self, $mid, $prev) = @_;
	my $dbh = dbh($self);
	my $opts = { cull => 1 };

	my $id = $dbh->selectrow_array(<<'', undef, $mid);
SELECT id FROM msgid WHERE mid = ? LIMIT 1

	defined $id or return nothing;

	my $num = $dbh->selectrow_array(<<'', undef, $id);
SELECT num FROM id2num WHERE id = ? AND num > 0
ORDER BY num ASC LIMIT 1

	defined $num or return nothing;

	my ($tid, $sid) = $dbh->selectrow_array(<<'', undef, $num);
SELECT tid,sid FROM over WHERE num = ? LIMIT 1

	defined $tid or return nothing; # $sid may be undef

	my $cond_all = '(tid = ? OR sid = ?) AND num > ?';
	my $sort_col = 'ds';
	$num = 0;
	if ($prev) { # mboxrd stream, only
		$num = $prev->{num} || 0;
		$sort_col = 'num';
	}

	my $cols = 'num,ts,ds,ddd';
	unless (wantarray) {
		return do_get($self, <<"", $opts, $tid, $sid, $num);
SELECT $cols FROM over WHERE $cond_all
ORDER BY $sort_col ASC

	}

	# HTML view always wants an array and never uses $prev,
	# but the mbox stream never wants an array and always has $prev
	die '$prev not supported with wantarray' if $prev;
	my $nr = $dbh->selectrow_array(<<"", undef, $tid, $sid, $num);
SELECT COUNT(num) FROM over WHERE $cond_all

	# giant thread, prioritize strict (tid) matches and throw
	# in the loose (sid) matches at the end
	my $msgs = do_get($self, <<"", $opts, $tid, $num);
SELECT $cols FROM over WHERE tid = ? AND num > ?
ORDER BY $sort_col ASC

	# do we have room for loose matches? get the most recent ones, first:
	my $lim = DEFAULT_LIMIT - scalar(@$msgs);
	if ($lim > 0) {
		$opts->{limit} = $lim;
		my $loose = do_get($self, <<"", $opts, $tid, $sid, $num);
SELECT $cols FROM over WHERE tid != ? AND sid = ? AND num > ?
ORDER BY $sort_col DESC

		# TODO separate strict and loose matches here once --reindex
		# is fixed to preserve `tid' properly
		push @$msgs, @$loose;
	}
	($nr, $msgs);
}

# strict `tid' matches, only, for thread-expanded mbox.gz search results
# and future CLI interface
# returns true if we have IDs, undef if not
sub expand_thread {
	my ($self, $ctx) = @_;
	my $dbh = dbh($self);
	do {
		defined(my $num = $ctx->{ids}->[0]) or return;
		my ($tid) = $dbh->selectrow_array(<<'', undef, $num);
SELECT tid FROM over WHERE num = ?

		if (defined($tid)) {
			my $sql = <<'';
SELECT num FROM over WHERE tid = ? AND num > ?
ORDER BY num ASC LIMIT 1000

			my $xids = $dbh->selectcol_arrayref($sql, undef, $tid,
							$ctx->{prev} // 0);
			if (scalar(@$xids)) {
				$ctx->{prev} = $xids->[-1];
				$ctx->{xids} = $xids;
				return 1; # success
			}
		}
		$ctx->{prev} = 0;
		shift @{$ctx->{ids}};
	} while (1);
}

sub recent {
	my ($self, $opts, $after, $before) = @_;
	my ($s, @v);
	if (defined($before)) {
		if (defined($after)) {
			$s = '+num > 0 AND ts >= ? AND ts <= ? ORDER BY ts DESC';
			@v = ($after, $before);
		} else {
			$s = '+num > 0 AND ts <= ? ORDER BY ts DESC';
			@v = ($before);
		}
	} else {
		if (defined($after)) {
			$s = '+num > 0 AND ts >= ? ORDER BY ts ASC';
			@v = ($after);
		} else {
			$s = '+num > 0 ORDER BY ts DESC';
		}
	}
	do_get($self, <<"", $opts, @v);
SELECT ts,ds,ddd FROM over WHERE $s

}

sub get_art {
	my ($self, $num) = @_;
	# caching $sth ourselves is faster than prepare_cached
	my $sth = $self->{-get_art} //= dbh($self)->prepare(<<'');
SELECT num,tid,ds,ts,ddd FROM over WHERE num = ? LIMIT 1

	$sth->execute($num);
	my $smsg = $sth->fetchrow_hashref;
	$smsg ? load_from_row($smsg) : undef;
}

sub next_by_mid {
	my ($self, $mid, $id, $prev) = @_;
	my $dbh = dbh($self);

	unless (defined $$id) {
		my $sth = $dbh->prepare_cached(<<'', undef, 1);
	SELECT id FROM msgid WHERE mid = ? LIMIT 1

		$sth->execute($mid);
		$$id = $sth->fetchrow_array;
		defined $$id or return;
	}
	my $sth = $dbh->prepare_cached(<<"", undef, 1);
SELECT num FROM id2num WHERE id = ? AND num > ?
ORDER BY num ASC LIMIT 1

	$$prev ||= 0;
	$sth->execute($$id, $$prev);
	my $num = $sth->fetchrow_array or return;
	$$prev = $num;
	get_art($self, $num);
}

# IMAP search, this is limited by callers to UID_SLICE size (50K)
sub uid_range {
	my ($self, $beg, $end, $sql) = @_;
	my $dbh = dbh($self);
	my $q = 'SELECT num FROM over WHERE num >= ? AND num <= ?';

	# This is read-only, anyways; but caller should verify it's
	# only sending \A[0-9]+\z for ds and ts column ranges
	$q .= $$sql if $sql;
	$q .= ' ORDER BY num ASC';
	$dbh->selectcol_arrayref($q, undef, $beg, $end);
}

sub max {
	my ($self) = @_;
	my $sth = dbh($self)->prepare_cached(<<'', undef, 1);
SELECT MAX(num) FROM over WHERE num > 0

	$sth->execute;
	$sth->fetchrow_array // 0;
}

sub imap_exists {
	my ($self, $uid_base, $uid_end) = @_;
	my $sth = dbh($self)->prepare_cached(<<'', undef, 1);
SELECT COUNT(num) FROM over WHERE num > ? AND num <= ?

	$sth->execute($uid_base, $uid_end);
	$sth->fetchrow_array;
}

sub check_inodes {
	my ($self) = @_;
	my $dbh = $self->{dbh} or return;
	my $f = $dbh->sqlite_db_filename;
	if (my @st = stat($f)) { # did st_dev, st_ino change?
		my $st = pack('dd', $st[0], $st[1]);

		# don't actually reopen, just let {dbh} be recreated later
		dbh_close($self) if $st ne ($self->{st} // $st);
	} else {
		warn "W: stat $f: $!\n";
	}
}

1;
public-inbox-1.6.1/lib/PublicInbox/OverIdx.pm000066400000000000000000000321521377346120300210250ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors 
# License: AGPL-3.0+ 

# for XOVER, OVER in NNTP, and feeds/homepage/threads in PSGI
# Unlike Msgmap, this is an _UNSTABLE_ cache which can be
# tweaked/updated over time and rebuilt.
#
# Ghost messages (messages which are only referenced in References/In-Reply-To)
# are denoted by a negative NNTP article number.
package PublicInbox::OverIdx;
use strict;
use v5.10.1;
use parent qw(PublicInbox::Over);
use IO::Handle;
use DBI qw(:sql_types); # SQL_BLOB
use PublicInbox::MID qw/id_compress mids_for_index references/;
use PublicInbox::Smsg qw(subject_normalized);
use Compress::Zlib qw(compress);
use Carp qw(croak);

sub dbh_new {
	my ($self) = @_;
	my $dbh = $self->SUPER::dbh_new($self->{-no_fsync} ? 2 : 1);

	# 80000 pages (80MiB on SQLite <3.12.0, 320MiB on 3.12.0+)
	# was found to be good in 2018 during the large LKML import
	# at the time.  This ought to be configurable based on HW
	# and inbox size; I suspect it's overkill for many inboxes.
	$dbh->do('PRAGMA cache_size = 80000');

	create_tables($dbh);
	$dbh;
}

sub new {
	my ($class, $f) = @_;
	my $self = $class->SUPER::new($f);
	$self->{min_tid} = 0;
	$self;
}

sub get_counter ($$) {
	my ($dbh, $key) = @_;
	my $sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT val FROM counter WHERE key = ? LIMIT 1

	$sth->execute($key);
	$sth->fetchrow_array;
}

sub adj_counter ($$$) {
	my ($self, $key, $op) = @_;
	my $dbh = $self->{dbh};
	my $sth = $dbh->prepare_cached(<<"");
UPDATE counter SET val = val $op 1 WHERE key = ?

	$sth->execute($key);

	get_counter($dbh, $key);
}

sub next_tid { adj_counter($_[0], 'thread', '+') }
sub next_ghost_num { adj_counter($_[0], 'ghost', '-') }

sub id_for ($$$$$) {
	my ($self, $tbl, $id_col, $val_col, $val) = @_;
	my $dbh = $self->{dbh};
	my $in = $dbh->prepare_cached(<<"")->execute($val);
INSERT OR IGNORE INTO $tbl ($val_col) VALUES (?)

	if ($in == 0) {
		my $sth = $dbh->prepare_cached(<<"", undef, 1);
SELECT $id_col FROM $tbl WHERE $val_col = ? LIMIT 1

		$sth->execute($val);
		$sth->fetchrow_array;
	} else {
		$dbh->last_insert_id(undef, undef, $tbl, $id_col);
	}
}

sub sid {
	my ($self, $path) = @_;
	return unless defined $path && $path ne '';
	id_for($self, 'subject', 'sid', 'path' => $path);
}

sub mid2id {
	my ($self, $mid) = @_;
	id_for($self, 'msgid', 'id', 'mid' => $mid);
}

sub delete_by_num {
	my ($self, $num, $tid_ref) = @_;
	my $dbh = $self->{dbh};
	if ($tid_ref) {
		my $sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT tid FROM over WHERE num = ? LIMIT 1

		$sth->execute($num);
		$$tid_ref = $sth->fetchrow_array; # may be undef
	}
	foreach (qw(over id2num)) {
		$dbh->prepare_cached(<<"")->execute($num);
DELETE FROM $_ WHERE num = ?

	}
}

# this includes ghosts
sub each_by_mid {
	my ($self, $mid, $cols, $cb, @arg) = @_;
	my $dbh = $self->{dbh};

=over
	I originally wanted to stuff everything into a single query:

	SELECT over.* FROM over
	LEFT JOIN id2num ON over.num = id2num.num
	LEFT JOIN msgid ON msgid.id = id2num.id
	WHERE msgid.mid = ? AND over.num >= ?
	ORDER BY over.num ASC
	LIMIT 1000

	But it's faster broken out (and we're always in a
	transaction for subroutines in this file)
=cut

	my $sth = $dbh->prepare_cached(<<'', undef, 1);
SELECT id FROM msgid WHERE mid = ? LIMIT 1

	$sth->execute($mid);
	my $id = $sth->fetchrow_array;
	defined $id or return;

	push(@$cols, 'num');
	$cols = join(',', map { $_ } @$cols);
	my $lim = 10;
	my $prev = get_counter($dbh, 'ghost');
	while (1) {
		$sth = $dbh->prepare_cached(<<"", undef, 1);
SELECT num FROM id2num WHERE id = ? AND num >= ?
ORDER BY num ASC
LIMIT $lim

		$sth->execute($id, $prev);
		my $nums = $sth->fetchall_arrayref;
		my $nr = scalar(@$nums) or return;
		$prev = $nums->[-1]->[0];

		$sth = $dbh->prepare_cached(<<"", undef, 1);
SELECT $cols FROM over WHERE over.num = ? LIMIT 1

		foreach (@$nums) {
			$sth->execute($_->[0]);
			my $smsg = $sth->fetchrow_hashref;
			$smsg = PublicInbox::Over::load_from_row($smsg);
			$cb->($self, $smsg, @arg) or return;
		}
		return if $nr != $lim;
	}
}

sub _resolve_mid_to_tid {
	my ($self, $smsg, $tid) = @_;
	my $cur_tid = $smsg->{tid};
	if (defined $$tid) {
		merge_threads($self, $$tid, $cur_tid);
	} elsif ($cur_tid > $self->{min_tid}) {
		$$tid = $cur_tid;
	} else { # rethreading, queue up dead ghosts
		$$tid = next_tid($self);
		my $n = $smsg->{num};
		if ($n > 0) {
			$self->{dbh}->prepare_cached(<<'')->execute($$tid, $n);
UPDATE over SET tid = ? WHERE num = ?

		} elsif ($n < 0) {
			push(@{$self->{-ghosts_to_delete}}, $n);
		}
	}
	1;
}

# this will create a ghost as necessary
sub resolve_mid_to_tid {
	my ($self, $mid) = @_;
	my $tid;
	each_by_mid($self, $mid, ['tid'], \&_resolve_mid_to_tid, \$tid);
	if (my $del = delete $self->{-ghosts_to_delete}) {
		delete_by_num($self, $_) for @$del;
	}
	$tid // do { # create a new ghost
		my $id = mid2id($self, $mid);
		my $num = next_ghost_num($self);
		$num < 0 or die "ghost num is non-negative: $num\n";
		$tid = next_tid($self);
		my $dbh = $self->{dbh};
		$dbh->prepare_cached(<<'')->execute($num, $tid);
INSERT INTO over (num, tid) VALUES (?,?)

		$dbh->prepare_cached(<<'')->execute($id, $num);
INSERT INTO id2num (id, num) VALUES (?,?)

		$tid;
	};
}

sub merge_threads {
	my ($self, $winner_tid, $loser_tid) = @_;
	return if $winner_tid == $loser_tid;
	my $dbh = $self->{dbh};
	$dbh->prepare_cached(<<'')->execute($winner_tid, $loser_tid);
UPDATE over SET tid = ? WHERE tid = ?

}

sub link_refs {
	my ($self, $refs, $old_tid) = @_;
	my $tid;

	if (@$refs) {
		# first ref *should* be the thread root,
		# but we can never trust clients to do the right thing
		my $ref = $refs->[0];
		$tid = resolve_mid_to_tid($self, $ref);
		merge_threads($self, $tid, $old_tid) if defined $old_tid;

		# the rest of the refs should point to this tid:
		foreach my $i (1..$#$refs) {
			$ref = $refs->[$i];
			my $ptid = resolve_mid_to_tid($self, $ref);
			merge_threads($self, $tid, $ptid);
		}
	} else {
		$tid = $old_tid // next_tid($self);
	}
	$tid;
}

sub parse_references ($$$) {
	my ($smsg, $hdr, $mids) = @_;
	my $refs = references($hdr);
	push(@$refs, @$mids) if scalar(@$mids) > 1;
	return $refs if scalar(@$refs) == 0;

	# prevent circular references here:
	my %seen = ( $smsg->{mid} => 1 );
	my @keep;
	foreach my $ref (@$refs) {
		if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) {
			warn "References: <$ref> too long, ignoring\n";
			next;
		}
		push(@keep, $ref) unless $seen{$ref}++;
	}
	$smsg->{references} = '<'.join('> <', @keep).'>' if @keep;
	\@keep;
}

# normalize subjects so they are suitable as pathnames for URLs
# XXX: consider for removal
sub subject_path ($) {
	my ($subj) = @_;
	$subj = subject_normalized($subj);
	$subj =~ s![^a-zA-Z0-9_\.~/\-]+!_!g;
	lc($subj);
}

sub add_overview {
	my ($self, $eml, $smsg) = @_;
	$smsg->{lines} = $eml->body_raw =~ tr!\n!\n!;
	my $mids = mids_for_index($eml);
	my $refs = parse_references($smsg, $eml, $mids);
	my $subj = $smsg->{subject};
	my $xpath;
	if ($subj ne '') {
		$xpath = subject_path($subj);
		$xpath = id_compress($xpath);
	}
	my $dd = $smsg->to_doc_data;
	utf8::encode($dd);
	$dd = compress($dd);
	add_over($self, $smsg, $mids, $refs, $xpath, $dd);
}

sub _add_over {
	my ($self, $smsg, $mid, $refs, $old_tid, $v) = @_;
	my $cur_tid = $smsg->{tid};
	my $n = $smsg->{num};
	die "num must not be zero for $mid" if !$n;
	my $cur_valid = $cur_tid > $self->{min_tid};

	if ($n > 0) { # regular mail
		if ($cur_valid) {
			$$old_tid //= $cur_tid;
			merge_threads($self, $$old_tid, $cur_tid);
		} else {
			$$old_tid //= next_tid($self);
		}
	} elsif ($n < 0) { # ghost
		$$old_tid //= $cur_valid ? $cur_tid : next_tid($self);
		$$old_tid = link_refs($self, $refs, $$old_tid);
		delete_by_num($self, $n);
		$$v++;
	}
	1;
}

sub add_over {
	my ($self, $smsg, $mids, $refs, $xpath, $ddd) = @_;
	my $old_tid;
	my $vivified = 0;
	my $num = $smsg->{num};

	begin_lazy($self);
	delete_by_num($self, $num, \$old_tid);
	$old_tid = undef if ($old_tid // 0) <= $self->{min_tid};
	foreach my $mid (@$mids) {
		my $v = 0;
		each_by_mid($self, $mid, ['tid'], \&_add_over,
				$mid, $refs, \$old_tid, \$v);
		$v > 1 and warn "BUG: vivified multiple ($v) ghosts for $mid\n";
		$vivified += $v;
	}
	$smsg->{tid} = $vivified ? $old_tid : link_refs($self, $refs, $old_tid);
	$smsg->{sid} = sid($self, $xpath);
	my $dbh = $self->{dbh};
	my $sth = $dbh->prepare_cached(<<'');
INSERT INTO over (num, tid, sid, ts, ds, ddd)
VALUES (?,?,?,?,?,?)

	my $nc = 1;
	$sth->bind_param($nc, $num);
	$sth->bind_param(++$nc, $smsg->{$_}) for (qw(tid sid ts ds));
	$sth->bind_param(++$nc, $ddd, SQL_BLOB);
	$sth->execute;
	$sth = $dbh->prepare_cached(<<'');
INSERT INTO id2num (id, num) VALUES (?,?)

	foreach my $mid (@$mids) {
		my $id = mid2id($self, $mid);
		$sth->execute($id, $num);
	}
}

sub _remove_oid {
	my ($self, $smsg, $oid, $removed) = @_;
	if (!defined($oid) || $smsg->{blob} eq $oid) {
		delete_by_num($self, $smsg->{num});
		push @$removed, $smsg->{num};
	}
	1;
}

# returns number of removed messages in scalar context,
# array of removed article numbers in array context.
# $oid may be undef to match only on $mid
sub remove_oid {
	my ($self, $oid, $mid) = @_;
	my $removed = [];
	begin_lazy($self);
	each_by_mid($self, $mid, ['ddd'], \&_remove_oid, $oid, $removed);
	@$removed;
}

sub _num_mid0_for_oid {
	my ($self, $smsg, $oid, $res) = @_;
	my $blob = $smsg->{blob};
	return 1 if (!defined($blob) || $blob ne $oid); # continue;
	@$res = ($smsg->{num}, $smsg->{mid});
	0; # done
}

sub num_mid0_for_oid {
	my ($self, $oid, $mid) = @_;
	my $res = [];
	begin_lazy($self);
	each_by_mid($self, $mid, ['ddd'], \&_num_mid0_for_oid, $oid, $res);
	@$res, # ($num, $mid0);
}

sub create_tables {
	my ($dbh) = @_;

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS over (
	num INTEGER NOT NULL, /* NNTP article number == IMAP UID */
	tid INTEGER NOT NULL, /* THREADID (IMAP REFERENCES threading, JMAP) */
	sid INTEGER, /* Subject ID (IMAP ORDEREDSUBJECT "threading") */
	ts INTEGER, /* IMAP INTERNALDATE (Received: header, git commit time) */
	ds INTEGER, /* RFC-2822 sent Date: header, git author time */
	ddd VARBINARY, /* doc-data-deflated (->to_doc_data, ->load_from_data) */
	UNIQUE (num)
)

	$dbh->do('CREATE INDEX IF NOT EXISTS idx_tid ON over (tid)');
	$dbh->do('CREATE INDEX IF NOT EXISTS idx_sid ON over (sid)');
	$dbh->do('CREATE INDEX IF NOT EXISTS idx_ts ON over (ts)');
	$dbh->do('CREATE INDEX IF NOT EXISTS idx_ds ON over (ds)');

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS counter (
	key VARCHAR(8) PRIMARY KEY NOT NULL,
	val INTEGER DEFAULT 0,
	UNIQUE (key)
)

	$dbh->do("INSERT OR IGNORE INTO counter (key) VALUES ('thread')");
	$dbh->do("INSERT OR IGNORE INTO counter (key) VALUES ('ghost')");

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS subject (
	sid INTEGER PRIMARY KEY AUTOINCREMENT,
	path VARCHAR(40) NOT NULL, /* SHA-1 of normalized subject */
	UNIQUE (path)
)

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS id2num (
	id INTEGER NOT NULL, /* <=> msgid.id */
	num INTEGER NOT NULL,
	UNIQUE (id, num)
)

	# performance critical:
	$dbh->do('CREATE INDEX IF NOT EXISTS idx_inum ON id2num (num)');
	$dbh->do('CREATE INDEX IF NOT EXISTS idx_id ON id2num (id)');

	$dbh->do(<<'');
CREATE TABLE IF NOT EXISTS msgid (
	id INTEGER PRIMARY KEY AUTOINCREMENT, /* <=> id2num.id */
	mid VARCHAR(244) NOT NULL,
	UNIQUE (mid)
)

}

sub commit_lazy {
	my ($self) = @_;
	delete $self->{txn} or return;
	$self->{dbh}->commit;
}

sub begin_lazy {
	my ($self) = @_;
	return if $self->{txn};
	my $dbh = $self->dbh or return;
	$dbh->begin_work;
	# $dbh->{Profile} = 2;
	$self->{txn} = 1;
}

sub rollback_lazy {
	my ($self) = @_;
	delete $self->{txn} or return;
	$self->{dbh}->rollback;
}

sub dbh_close {
	my ($self) = @_;
	die "in transaction" if $self->{txn};
	$self->SUPER::dbh_close;
}

sub create {
	my ($self) = @_;
	unless (-r $self->{filename}) {
		require File::Path;
		require File::Basename;
		File::Path::mkpath(File::Basename::dirname($self->{filename}));
	}
	# create the DB:
	PublicInbox::Over::dbh($self);
	$self->dbh_close;
}

sub rethread_prepare {
	my ($self, $opt) = @_;
	return unless $opt->{rethread};
	begin_lazy($self);
	my $min = $self->{min_tid} = get_counter($self->{dbh}, 'thread') // 0;
	my $pr = $opt->{-progress};
	$pr->("rethread min THREADID ".($min + 1)."\n") if $pr && $min;
}

sub rethread_done {
	my ($self, $opt) = @_;
	return unless $opt->{rethread} && $self->{txn};
	defined(my $min = $self->{min_tid}) or croak('BUG: no min_tid');
	my $dbh = $self->{dbh} or croak('BUG: no dbh');
	my $rows = $dbh->selectall_arrayref(<<'', { Slice => {} }, $min);
SELECT num,tid FROM over WHERE num < 0 AND tid < ?

	my $show_id = $dbh->prepare('SELECT id FROM id2num WHERE num = ?');
	my $show_mid = $dbh->prepare('SELECT mid FROM msgid WHERE id = ?');
	my $pr = $opt->{-progress};
	my $total = 0;
	for my $r (@$rows) {
		my $exp = 0;
		$show_id->execute($r->{num});
		while (defined(my $id = $show_id->fetchrow_array)) {
			++$exp;
			$show_mid->execute($id);
			my $mid = $show_mid->fetchrow_array;
			if (!defined($mid)) {
				warn <{num} ID=$id THREADID=$r->{tid} has no Message-ID
EOF
				next;
			}
			$pr->(<{num} <$mid> THREADID=$r->{tid} culled
EOM
		}
		delete_by_num($self, $r->{num});
	}
	$pr->("I: rethread culled $total ghosts\n") if $pr && $total;
}

1;
public-inbox-1.6.1/lib/PublicInbox/ProcessPipe.pm000066400000000000000000000016231377346120300217000ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# a tied handle for auto reaping of children tied to a pipe, see perltie(1)
package PublicInbox::ProcessPipe;
use strict;
use warnings;

sub TIEHANDLE {
	my ($class, $pid, $fh) = @_;
	bless { pid => $pid, fh => $fh }, $class;
}

sub READ { read($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) }

sub READLINE { readline($_[0]->{fh}) }

sub CLOSE {
	my $fh = delete($_[0]->{fh});
	my $ret = defined $fh ? close($fh) : '';
	my $pid = delete $_[0]->{pid};
	if (defined $pid) {
		# PublicInbox::DS may not be loaded
		eval { PublicInbox::DS::dwaitpid($pid, undef, undef) };

		if ($@) { # ok, not in the event loop, work synchronously
			waitpid($pid, 0);
			$ret = '' if $?;
		}
	}
	$ret;
}

sub FILENO { fileno($_[0]->{fh}) }

sub DESTROY {
	CLOSE(@_);
	undef;
}

sub pid { $_[0]->{pid} }

1;
public-inbox-1.6.1/lib/PublicInbox/Qspawn.pm000066400000000000000000000265301377346120300207210ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# Like most Perl modules in public-inbox, this is internal and
# NOT subject to any stability guarantees!  It is only documented
# for other hackers.
#
# This is used to limit the number of processes spawned by the
# PSGI server, so it acts like a semaphore and queues up extra
# commands to be run if currently at the limit.  Multiple "limiters"
# may be configured which give inboxes different channels to
# operate in.  This can be useful to ensure smaller inboxes can
# be cloned while cloning of large inboxes is maxed out.
#
# This does not depend on PublicInbox::DS or any other external
# scheduling mechanism, you just need to call start() and finish()
# appropriately. However, public-inbox-httpd (which uses PublicInbox::DS)
# will be able to schedule this based on readability of stdout from
# the spawned process.  See GitHTTPBackend.pm and SolverGit.pm for
# usage examples.  It does not depend on any form of threading.
#
# This is useful for scheduling CGI execution of both long-lived
# git-http-backend(1) process (for "git clone") as well as short-lived
# processes such as git-apply(1).

package PublicInbox::Qspawn;
use strict;
use PublicInbox::Spawn qw(popen_rd);
use PublicInbox::GzipFilter;

# n.b.: we get EAGAIN with public-inbox-httpd, and EINTR on other PSGI servers
use Errno qw(EAGAIN EINTR);

my $def_limiter;

# declares a command to spawn (but does not spawn it).
# $cmd is the command to spawn
# $cmd_env is the environ for the child process (not PSGI env)
# $opt can include redirects and perhaps other process spawning options
sub new ($$$;) {
	my ($class, $cmd, $cmd_env, $opt) = @_;
	bless { args => [ $cmd, $cmd_env, $opt ] }, $class;
}

sub _do_spawn {
	my ($self, $start_cb, $limiter) = @_;
	my $err;
	my ($cmd, $cmd_env, $opt) = @{delete $self->{args}};
	my %o = %{$opt || {}};
	$self->{limiter} = $limiter;
	foreach my $k (@PublicInbox::Spawn::RLIMITS) {
		if (defined(my $rlimit = $limiter->{$k})) {
			$o{$k} = $rlimit;
		}
	}
	$self->{cmd} = $o{quiet} ? undef : $cmd;
	eval {
		# popen_rd may die on EMFILE, ENFILE
		($self->{rpipe}, $self->{pid}) = popen_rd($cmd, $cmd_env, \%o);

		die "E: $!" unless defined($self->{pid});

		$limiter->{running}++;
		$start_cb->($self); # EPOLL_CTL_ADD may ENOSPC/ENOMEM
	};
	finish($self, $@) if $@;
}

sub child_err ($) {
	my ($child_error) = @_; # typically $?
	my $exitstatus = ($child_error >> 8) or return;
	my $sig = $child_error & 127;
	my $msg = "exit status=$exitstatus";
	$msg .= " signal=$sig" if $sig;
	$msg;
}

sub log_err ($$) {
	my ($env, $msg) = @_;
	$env->{'psgi.errors'}->print($msg, "\n");
}

sub finalize ($$) {
	my ($self, $err) = @_;

	my ($env, $qx_cb, $qx_arg, $qx_buf) =
		delete @$self{qw(psgi_env qx_cb qx_arg qx_buf)};

	# done, spawn whatever's in the queue
	my $limiter = $self->{limiter};
	my $running = --$limiter->{running};

	if ($running < $limiter->{max}) {
		if (my $next = shift(@{$limiter->{run_queue}})) {
			_do_spawn(@$next, $limiter);
		}
	}

	if ($err) {
		if (defined $self->{err}) {
			$self->{err} .= "; $err";
		} else {
			$self->{err} = $err;
		}
		if ($env && $self->{cmd}) {
			log_err($env, join(' ', @{$self->{cmd}}) . ": $err");
		}
	}
	if ($qx_cb) {
		eval { $qx_cb->($qx_buf, $qx_arg) };
	} elsif (my $wcb = delete $env->{'qspawn.wcb'}) {
		# have we started writing, yet?
		require PublicInbox::WwwStatic;
		$wcb->(PublicInbox::WwwStatic::r(500));
	}
}

# callback for dwaitpid
sub waitpid_err ($$) {
	my ($self, $pid) = @_;
	my $xpid = delete $self->{pid};
	my $err;
	if (defined $pid) {
		if ($pid > 0) { # success!
			$err = child_err($?);
		} elsif ($pid < 0) { # ??? does this happen in our case?
			$err = "W: waitpid($xpid, 0) => $pid: $!";
		} # else should not be called with pid == 0
	}
	finalize($self, $err);
}

sub do_waitpid ($) {
	my ($self) = @_;
	my $pid = $self->{pid};
	# PublicInbox::DS may not be loaded
	eval { PublicInbox::DS::dwaitpid($pid, \&waitpid_err, $self) };
	# done if we're running in PublicInbox::DS::EventLoop
	if ($@) {
		# non public-inbox-{httpd,nntpd} callers may block:
		my $ret = waitpid($pid, 0);
		waitpid_err($self, $ret);
	}
}

sub finish ($;$) {
	my ($self, $err) = @_;
	if (delete $self->{rpipe}) {
		do_waitpid($self);
	} else {
		finalize($self, $err);
	}
}

sub start ($$$) {
	my ($self, $limiter, $start_cb) = @_;
	if ($limiter->{running} < $limiter->{max}) {
		_do_spawn($self, $start_cb, $limiter);
	} else {
		push @{$limiter->{run_queue}}, [ $self, $start_cb ];
	}
}

sub psgi_qx_init_cb {
	my ($self) = @_;
	my $async = delete $self->{async};
	my ($r, $buf);
	my $qx_fh = $self->{qx_fh};
reread:
	$r = sysread($self->{rpipe}, $buf, 65536);
	if ($async) {
		$async->async_pass($self->{psgi_env}->{'psgix.io'},
					$qx_fh, \$buf);
	} elsif (defined $r) {
		$r ? (print $qx_fh $buf) : event_step($self, undef);
	} else {
		return if $! == EAGAIN; # try again when notified
		goto reread if $! == EINTR;
		event_step($self, $!);
	}
}

sub psgi_qx_start {
	my ($self) = @_;
	if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) {
		# PublicInbox::HTTPD::Async->new(rpipe, $cb, cb_arg, $end_obj)
		$self->{async} = $async->($self->{rpipe},
					\&psgi_qx_init_cb, $self, $self);
		# init_cb will call ->async_pass or ->close
	} else { # generic PSGI
		psgi_qx_init_cb($self) while $self->{qx_fh};
	}
}

# Similar to `backtick` or "qx" ("perldoc -f qx"), it calls $qx_cb with
# the stdout of the given command when done; but respects the given limiter
# $env is the PSGI env.  As with ``/qx; only use this when output is small
# and safe to slurp.
sub psgi_qx {
	my ($self, $env, $limiter, $qx_cb, $qx_arg) = @_;
	$self->{psgi_env} = $env;
	my $qx_buf = '';
	open(my $qx_fh, '+>', \$qx_buf) or die; # PerlIO::scalar
	$self->{qx_cb} = $qx_cb;
	$self->{qx_arg} = $qx_arg;
	$self->{qx_fh} = $qx_fh;
	$self->{qx_buf} = \$qx_buf;
	$limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32);
	start($self, $limiter, \&psgi_qx_start);
}

# this is called on pipe EOF to reap the process, may be called
# via PublicInbox::DS event loop OR via GetlineBody for generic
# PSGI servers.
sub event_step {
	my ($self, $err) = @_; # $err: $!
	log_err($self->{psgi_env}, "psgi_{return,qx} $err") if defined($err);
	finish($self);
	my ($fh, $qx_fh) = delete(@$self{qw(fh qx_fh)});
	$fh->close if $fh; # async-only (psgi_return)
}

sub rd_hdr ($) {
	my ($self) = @_;
	# typically used for reading CGI headers
	# we must loop until EAGAIN for EPOLLET in HTTPD/Async.pm
	# We also need to check EINTR for generic PSGI servers.
	my $ret;
	my $total_rd = 0;
	my $hdr_buf = $self->{hdr_buf};
	my ($ph_cb, $ph_arg) = @{$self->{parse_hdr}};
	do {
		my $r = sysread($self->{rpipe}, $$hdr_buf, 4096,
				length($$hdr_buf));
		if (defined($r)) {
			$total_rd += $r;
			eval { $ret = $ph_cb->($total_rd, $hdr_buf, $ph_arg) };
			if ($@) {
				log_err($self->{psgi_env}, "parse_hdr: $@");
				$ret = [ 500, [], [ "Internal error\n" ] ];
			}
		} else {
			# caller should notify us when it's ready:
			return if $! == EAGAIN;
			next if $! == EINTR; # immediate retry
			log_err($self->{psgi_env}, "error reading header: $!");
			$ret = [ 500, [], [ "Internal error\n" ] ];
		}
	} until (defined $ret);
	delete $self->{parse_hdr}; # done parsing headers
	$ret;
}

sub psgi_return_init_cb {
	my ($self) = @_;
	my $r = rd_hdr($self) or return;
	my $env = $self->{psgi_env};
	my $filter = delete $env->{'qspawn.filter'} //
		PublicInbox::GzipFilter::qsp_maybe($r->[1], $env);

	my $wcb = delete $env->{'qspawn.wcb'};
	my $async = delete $self->{async};
	if (scalar(@$r) == 3) { # error
		if ($async) {
			# calls rpipe->close && ->event_step
			$async->close;
		} else {
			$self->{rpipe}->close;
			event_step($self);
		}
		$wcb->($r);
	} elsif ($async) {
		# done reading headers, handoff to read body
		my $fh = $wcb->($r); # scalar @$r == 2
		$fh = $filter->attach($fh) if $filter;
		$self->{fh} = $fh;
		$async->async_pass($env->{'psgix.io'}, $fh,
					delete($self->{hdr_buf}));
	} else { # for synchronous PSGI servers
		require PublicInbox::GetlineBody;
		$r->[2] = PublicInbox::GetlineBody->new($self->{rpipe},
					\&event_step, $self,
					${$self->{hdr_buf}}, $filter);
		$wcb->($r);
	}
}

sub psgi_return_start { # may run later, much later...
	my ($self) = @_;
	if (my $async = $self->{psgi_env}->{'pi-httpd.async'}) {
		# PublicInbox::HTTPD::Async->new(rpipe, $cb, $cb_arg, $end_obj)
		$self->{async} = $async->($self->{rpipe},
					\&psgi_return_init_cb, $self, $self);
	} else { # generic PSGI
		psgi_return_init_cb($self) while $self->{parse_hdr};
	}
}

# Used for streaming the stdout of one process as a PSGI response.
#
# $env is the PSGI env.
# optional keys in $env:
#   $env->{'qspawn.wcb'} - the write callback from the PSGI server
#                          optional, use this if you've already
#                          captured it elsewhere.  If not given,
#                          psgi_return will return an anonymous
#                          sub for the PSGI server to call
#
#   $env->{'qspawn.filter'} - filter object, responds to ->attach for
#                             pi-httpd.async and ->translate for generic
#                             PSGI servers
#
# $limiter - the Limiter object to use (uses the def_limiter if not given)
#
# $parse_hdr - Initial read function; often for parsing CGI header output.
#              It will be given the return value of sysread from the pipe
#              and a string ref of the current buffer.  Returns an arrayref
#              for PSGI responses.  2-element arrays in PSGI mean the
#              body will be streamed, later, via writes (push-based) to
#              psgix.io.  3-element arrays means the body is available
#              immediately (or streamed via ->getline (pull-based)).
sub psgi_return {
	my ($self, $env, $limiter, $parse_hdr, $hdr_arg) = @_;
	$self->{psgi_env} = $env;
	$self->{hdr_buf} = \(my $hdr_buf = '');
	$self->{parse_hdr} = [ $parse_hdr, $hdr_arg ];
	$limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32);

	# the caller already captured the PSGI write callback from
	# the PSGI server, so we can call ->start, here:
	$env->{'qspawn.wcb'} and
		return start($self, $limiter, \&psgi_return_start);

	# the caller will return this sub to the PSGI server, so
	# it can set the response callback (that is, for
	# PublicInbox::HTTP, the chunked_wcb or identity_wcb callback),
	# but other HTTP servers are supported:
	sub {
		$env->{'qspawn.wcb'} = $_[0];
		start($self, $limiter, \&psgi_return_start);
	}
}

package PublicInbox::Qspawn::Limiter;
use strict;
use warnings;

sub new {
	my ($class, $max) = @_;
	bless {
		# 32 is same as the git-daemon connection limit
		max => $max || 32,
		running => 0,
		run_queue => [],
		# RLIMIT_CPU => undef,
		# RLIMIT_DATA => undef,
		# RLIMIT_CORE => undef,
	}, $class;
}

sub setup_rlimit {
	my ($self, $name, $config) = @_;
	foreach my $rlim (@PublicInbox::Spawn::RLIMITS) {
		my $k = lc($rlim);
		$k =~ tr/_//d;
		$k = "publicinboxlimiter.$name.$k";
		defined(my $v = $config->{$k}) or next;
		my @rlimit = split(/\s*,\s*/, $v);
		if (scalar(@rlimit) == 1) {
			push @rlimit, $rlimit[0];
		} elsif (scalar(@rlimit) != 2) {
			warn "could not parse $k: $v\n";
		}
		eval { require BSD::Resource };
		if ($@) {
			warn "BSD::Resource missing for $rlim";
			next;
		}
		foreach my $i (0..$#rlimit) {
			next if $rlimit[$i] ne 'INFINITY';
			$rlimit[$i] = BSD::Resource::RLIM_INFINITY();
		}
		$self->{$rlim} = \@rlimit;
	}
}

1;
public-inbox-1.6.1/lib/PublicInbox/Reply.pm000066400000000000000000000061051377346120300205370ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors 
# License: AGPL-3.0+ 

# For reply instructions and address generation in WWW UI
package PublicInbox::Reply;
use strict;
use warnings;
use URI::Escape qw/uri_escape_utf8/;
use PublicInbox::Hval qw(ascii_html obfuscate_addrs mid_href);
use PublicInbox::Address;
use PublicInbox::MID qw(mid_clean);

sub squote_maybe ($) {
	my ($val) = @_;
	if ($val =~ m{([^\w@\./,\%\+\-])}) {
		$val =~ s/(['!])/'\\$1'/g; # '!' for csh
		return "'$val'";
	}
	$val;
}

sub add_addrs {
	my ($to, $cc, @addrs) = @_;
	foreach my $address (@addrs) {
		my $dst = lc($address);
		$cc->{$dst} ||= $address;
		$$to ||= $dst;
	}
}

my @reply_headers = qw(From To Cc Reply-To);
my $reply_headers = join('|', @reply_headers);

sub mailto_arg_link {
	my ($ibx, $hdr) = @_;
	my $cc = {}; # everyone else
	my $to; # this is the From address by defaultq
	my $reply_to_all = 'reply-to-all'; # the only good default :P
	my $reply_to_cfg = $ibx->{replyto};

	$reply_to_cfg ||= ':all';
	if ($reply_to_cfg =~ /\A:none=(.*)/) {
		my $msg = $1;
		$msg = 'replies disabled' if $msg eq '';
		return \$msg;
	}

	foreach my $rt (split(/\s*,\s*/, $reply_to_cfg)) {
		if ($rt eq ':all') {
			foreach my $h (@reply_headers) {
				my $v = $hdr->header($h);
				defined($v) && ($v ne '') or next;
				my @addrs = PublicInbox::Address::emails($v);
				add_addrs(\$to, $cc, @addrs);
			}
		} elsif ($rt eq ':list') {
			$reply_to_all = 'reply-to-list';
			add_addrs(\$to, $cc, $ibx->{-primary_address});
		} elsif ($rt =~ /\A(?:$reply_headers)\z/io) {
			# ugh, this is weird...
			my $v = $hdr->header($rt);
			if (defined($v) && ($v ne '')) {
				my @addrs = PublicInbox::Address::emails($v);
				add_addrs(\$to, $cc, @addrs);
			}
		} elsif ($rt =~ /@/) {
			add_addrs(\$to, $cc, $rt);
		} else {
			warn "Unrecognized replyto = '$rt' in config\n";
		}
	}

	my @arg;
	my $obfs = $ibx->{obfuscate};
	my $subj = $hdr->header('Subject') || '';
	$subj = "Re: $subj" unless $subj =~ /\bRe:/i;
	my $mid = $hdr->header_raw('Message-ID');
	push @arg, '--in-reply-to='.squote_maybe(mid_clean($mid));
	my $irt = mid_href($mid);
	delete $cc->{$to};
	if ($obfs) {
		my $arg_to = $to;
		obfuscate_addrs($ibx, $arg_to, '$(echo .)');
		push @arg, "--to=$arg_to";
		# no $subj for $href below
	} else {
		push @arg, "--to=$to";
		$to = uri_escape_utf8($to);
		$subj = uri_escape_utf8($subj);
	}
	my @cc = sort values %$cc;
	$cc = '';
	if (@cc) {
		if ($obfs) {
			push(@arg, map {
				my $addr = $_;
				obfuscate_addrs($ibx, $addr, '$(echo .)');
				"--cc=$addr";
			} @cc);
		} else {
			$cc = '&Cc=' . uri_escape_utf8(join(',', @cc));
			push(@arg, map { "--cc=$_" } @cc);
		}
	}

	# I'm not sure if address obfuscation and mailto: links can
	# be made compatible; and address obfuscation is misguided,
	# anyways.
	return (\@arg, '', $reply_to_all) if $obfs;

	# order matters, Subject is the least important header,
	# so it is last in case it's lost/truncated in a copy+paste
	my $href = "mailto:$to?In-Reply-To=$irt${cc}&Subject=$subj";

	(\@arg, ascii_html($href), $reply_to_all);
}

1;
public-inbox-1.6.1/lib/PublicInbox/SaPlugin/000077500000000000000000000000001377346120300206265ustar00rootroot00000000000000public-inbox-1.6.1/lib/PublicInbox/SaPlugin/ListMirror.pm000066400000000000000000000057201377346120300232760ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# SpamAssassin rules useful for running a mailing list mirror.  We want to:
# * ensure Received: headers are really from the list mail server
#   users expect.  This is to prevent malicious users from
#   injecting spam into mirrors without going through the expected
#   server
# * flag messages where the mailing list is Bcc:-ed since it is
#   common for spam to have wrong or non-existent To:/Cc: headers.

package PublicInbox::SaPlugin::ListMirror;
use strict;
use warnings;
use base qw(Mail::SpamAssassin::Plugin);

# constructor: register the eval rules
sub new {
	my ($class, $mail) = @_;

	# some boilerplate...
	$class = ref($class) || $class;
	my $self = $class->SUPER::new($mail);
	bless $self, $class;
	$mail->{conf}->{list_mirror_check} = [];
	$self->register_eval_rule('check_list_mirror_received');
	$self->register_eval_rule('check_list_mirror_bcc');
	$self->set_config($mail->{conf});
	$self;
}

sub check_list_mirror_received {
	my ($self, $pms) = @_;
	my $recvd = $pms->get('Received') || '';
	$recvd =~ s/\n.*\z//s;

	foreach my $cfg (@{$pms->{conf}->{list_mirror_check}}) {
		my ($hdr, $hval, $host_re, $addr_re) = @$cfg;
		my $v = $pms->get($hdr) or next;
		local $/ = "\n";
		chomp $v;
		next if $v ne $hval;
		return 1 if $recvd !~ $host_re;
	}

	0;
}

sub check_list_mirror_bcc {
	my ($self, $pms) = @_;
	my $tocc = $pms->get('ToCc');

	foreach my $cfg (@{$pms->{conf}->{list_mirror_check}}) {
		my ($hdr, $hval, $host_re, $addr_re) = @$cfg;
		defined $addr_re or next;
		my $v = $pms->get($hdr) or next;
		local $/ = "\n";
		chomp $v;
		next if $v ne $hval;
		return 1 if !$tocc || $tocc !~ $addr_re;
	}

	0;
}

# list_mirror HEADER HEADER_VALUE HOSTNAME_GLOB [LIST_ADDRESS]
# list_mirror X-Mailing-List git@vger.kernel.org *.kernel.org
# list_mirror List-Id  *.example.org foo@example.org
sub config_list_mirror {
	my ($self, $key, $value, $line) = @_;

	defined $value or
		return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;

	my ($hdr, $hval, $host_glob, @extra) = split(/\s+/, $value);
	my $addr = shift @extra;

	if (defined $addr) {
		$addr !~ /\@/ and
			return $Mail::SpamAssassin::Conf::INVALID_VALUE;
		$addr = join('|', map { quotemeta } split(/,/, $addr));
		$addr = qr/\b$addr\b/i;
	}

	@extra and return $Mail::SpamAssassin::Conf::INVALID_VALUE;

	defined $host_glob or
		return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;

	my %patmap = ('*' => '\S+', '?' => '.', '[' => '[', ']' => ']');
	$host_glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge;
	my $host_re = qr/\A\s*from\s+$host_glob(?:\s|$)/si;

	push @{$self->{list_mirror_check}}, [ $hdr, $hval, $host_re, $addr ];
}

sub set_config {
	my ($self, $conf) = @_;
	my @cmds;
	push @cmds, {
		setting => 'list_mirror',
		default => '',
		type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
		code => *config_list_mirror,
	};
	$conf->{parser}->register_commands(\@cmds);
}

1;
public-inbox-1.6.1/lib/PublicInbox/SaPlugin/ListMirror.pod000066400000000000000000000065701377346120300234500ustar00rootroot00000000000000=head1 NAME

PublicInbox::SaPlugin::ListMirror - SpamAssassin plugin for mailing list mirrors

=head1 SYNOPSIS

  loadplugin PublicInbox::SaPlugin::ListMirror

Declare some mailing lists based on the expected List-Id value,
expected servers, and mailing list address:

  list_mirror List-Id  *.example.com foo@example.com
  list_mirror List-Id  *.example.com bar@example.com

Bump the score for messages which come from unexpected servers:

  header LIST_MIRROR_RECEIVED eval:check_list_mirror_received()
  describe LIST_MIRROR_RECEIVED Received does not match expected
  score LIST_MIRROR_RECEIVED 10

Bump the score for messages which Bcc the list:

  header LIST_MIRROR_BCC eval:check_list_mirror_bcc()
  describe LIST_MIRROR_BCC Mailing list was Bcc-ed
  score LIST_MIRROR_BCC 5

=head1 DESCRIPTION

This plugin contains common functions to provide accurate, ongoing
mirrors of existing mailing lists.  It may be used independently of the
rest of public-inbox, it does not depend on any public-inbox code, only
SpamAssassin.

=head1 ADMINISTRATOR SETTINGS

This plugin has no administrator settings, aside from the need
to load it via C and enabling user rules
C

=head1 USER SETTINGS

=over 4

=item list_mirror HEADER HEADER_VALUE HOSTNAME_GLOB [LIST_ADDRESS]

Declare a list based on an expected C
matching C exactly coming from C. C is optional, but may specify the address of the mailing list being mirrored. C or C are common values of C
An example of C is Cfoo.example.orgE> if C
is C. C may be a wildcard match for machines where mail can come from or an exact match. C is only required if using the L eval rule C may be specified multiple times. =back =head1 EVAL FUNCTIONS =over 4 =item header LIST_MIRROR_RECEIVED eval:check_list_mirror_received() The C function implements C header checking based on L configuration values. This rule can be used to score and prevent messages from being injected directly into your mirror without going through the expected mailing list servers: ifplugin PublicInbox::SaPlugin::ListMirror header LIST_MIRROR_RECEIVED eval:check_list_mirror_received() describe LIST_MIRROR_RECEIVED Received does not match expected endif =item header LIST_MIRROR_BCC eval:check_list_mirror_bcc() The C function checks for Bcc to mailing lists declared with a C via L Spammers will often Bcc mailing lists; while it's uncommon and strange for valid messages to be Bcc-ed to any public mailing list. This rule allows users to assign a score to Bcc-ed messages ifplugin PublicInbox::SaPlugin::ListMirror header LIST_MIRROR_BCC eval:check_list_mirror_bcc() describe LIST_MIRROR_BCC Mailing list was Bcc-ed endif =back =head1 CONTACT Feedback welcome via plain-text mail to L The mail archives are hosted at L and L =head1 COPYRIGHT Copyright (C) 2016-2020 all contributors L License: AGPL-3.0+ L =head1 SEE ALSO L public-inbox-1.6.1/lib/PublicInbox/Search.pm000066400000000000000000000307531377346120300206570ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # based on notmuch, but with no concept of folders, files or flags # # Read-only search interface for use by the web and NNTP interfaces package PublicInbox::Search; use strict; use parent qw(Exporter); our @EXPORT_OK = qw(mdocid); use List::Util qw(max); # values for searching, changing the numeric value breaks # compatibility with old indices (so don't change them it) use constant { TS => 0, # Received: header in Unix time (IMAP INTERNALDATE) YYYYMMDD => 1, # Date: header for searching in the WWW UI DT => 2, # Date: YYYYMMDDHHMMSS # added for public-inbox 1.6.0+ BYTES => 3, # IMAP RFC822.SIZE UID => 4, # IMAP UID == NNTP article number == Xapian docid THREADID => 5, # RFC 8474, RFC 8621 # TODO # REPLYCNT => ?, # IMAP ANSWERED # SCHEMA_VERSION history # 0 - initial # 1 - subject_path is lower-cased # 2 - subject_path is id_compress in the index, only # 3 - message-ID is compressed if it includes '%' (hack!) # 4 - change "Re: " normalization, avoid circular Reference ghosts # 5 - subject_path drops trailing '.' # 6 - preserve References: order in document data # 7 - remove references and inreplyto terms # 8 - remove redundant/unneeded document data # 9 - disable Message-ID compression (SHA-1) # 10 - optimize doc for NNTP overviews # 11 - merge threads when vivifying ghosts # 12 - change YYYYMMDD value column to numeric # 13 - fix threading for empty References/In-Reply-To # (commit 83425ef12e4b65cdcecd11ddcb38175d4a91d5a0) # 14 - fix ghost root vivification # 15 - see public-inbox-v2-format(5) # further bumps likely unnecessary, we'll suggest in-place # "--reindex" use for further fixes and tweaks: # # public-inbox v1.5.0 adds (still SCHEMA_VERSION=15): # * "lid:" and "l:" for List-Id searches # # v1.6.0 adds BYTES, UID and THREADID values SCHEMA_VERSION => 15, }; use PublicInbox::Smsg; use PublicInbox::Over; my $QP_FLAGS; our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem); our $Xap; # 'Search::Xapian' or 'Xapian' my $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor') my $ENQ_ASCENDING; sub load_xapian () { return 1 if defined $Xap; # n.b. PI_XAPIAN is intended for development use only. We still # favor Search::Xapian since that's what's available in current # Debian stable (10.x) and derived distros. for my $x (($ENV{PI_XAPIAN} // 'Search::Xapian'), 'Xapian') { eval "require $x"; next if $@; $x->import(qw(:standard)); $Xap = $x; # `version_string' was added in Xapian 1.1 my $xver = eval('v'.eval($x.'::version_string()')) // eval('v'.eval($x.'::xapian_version_string()')); # NumberRangeProcessor was added in Xapian 1.3.6, # NumberValueRangeProcessor was removed for 1.5.0+, # favor the older /Value/ variant since that's what our # (currently) preferred Search::Xapian supports $NVRP = $x.'::'.($x eq 'Xapian' && $xver ge v1.5 ? 'NumberRangeProcessor' : 'NumberValueRangeProcessor'); $X{$_} = $Xap.'::'.$_ for (keys %X); # ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm, # so lets hope this part of the ABI is stable because it's # just an integer: $ENQ_ASCENDING = $x eq 'Xapian' ? 1 : Search::Xapian::ENQ_ASCENDING(); # for Smsg: *PublicInbox::Smsg::sortable_unserialise = $Xap.'::sortable_unserialise'; # n.b. FLAG_PURE_NOT is expensive not suitable for a public # website as it could become a denial-of-service vector # FLAG_PHRASE also seems to cause performance problems chert # (and probably earlier Xapian DBs). glass seems fine... # TODO: make this an option, maybe? # or make indexlevel=medium as default $QP_FLAGS = FLAG_PHRASE() | FLAG_BOOLEAN() | FLAG_LOVEHATE() | FLAG_WILDCARD(); return 1; } undef; } # This is English-only, everything else is non-standard and may be confused as # a prefix common in patch emails our $LANG = 'english'; # note: the non-X term prefix allocations are shared with # Xapian omega, see xapian-applications/omega/docs/termprefixes.rst my %bool_pfx_external = ( mid => 'Q', # Message-ID (full/exact), this is mostly uniQue lid => 'G', # newsGroup (or similar entity), just inside <> dfpre => 'XDFPRE', dfpost => 'XDFPOST', dfblob => 'XDFPRE XDFPOST', ); my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST'; my %prob_prefix = ( # for mairix compatibility s => 'S', m => 'XM', # 'mid:' (bool) is exact, 'm:' (prob) can do partial l => 'XL', # 'lid:' (bool) is exact, 'l:' (prob) can do partial f => 'A', t => 'XTO', tc => 'XTO XCC', c => 'XCC', tcf => 'XTO XCC A', a => 'XTO XCC A', b => $non_quoted_body . ' XQUOT', bs => $non_quoted_body . ' XQUOT S', n => 'XFN', q => 'XQUOT', nq => $non_quoted_body, dfn => 'XDFN', dfa => 'XDFA', dfb => 'XDFB', dfhh => 'XDFHH', dfctx => 'XDFCTX', # default: '' => 'XM S A XQUOT XFN ' . $non_quoted_body, ); # not documenting m: and mid: for now, the using the URLs works w/o Xapian # not documenting lid: for now, either, it is probably redundant with l:, # especially since we don't offer boolean searches for To/Cc/From # headers, either our @HELP = ( 's:' => 'match within Subject e.g. s:"a quick brown fox"', 'd:' => < < 'match within message body, including text attachments', 'nq:' => 'match non-quoted text within message body', 'q:' => 'match quoted text within message body', 'n:' => 'match filename of attachment(s)', 't:' => 'match within the To header', 'c:' => 'match within the Cc header', 'f:' => 'match within the From header', 'a:' => 'match within the To, Cc, and From headers', 'tc:' => 'match within the To and Cc headers', 'l:' => 'match contents of the List-Id header', 'bs:' => 'match within the Subject and body', 'dfn:' => 'match filename from diff', 'dfa:' => 'match diff removed (-) lines', 'dfb:' => 'match diff added (+) lines', 'dfhh:' => 'match diff hunk header context (usually a function name)', 'dfctx:' => 'match diff context lines', 'dfpre:' => 'match pre-image git blob ID', 'dfpost:' => 'match post-image git blob ID', 'dfblob:' => 'match either pre or post-image git blob ID', ); chomp @HELP; sub xdir ($;$) { my ($self, $rdonly) = @_; if ($rdonly || !defined($self->{shard})) { $self->{xpfx}; } else { # v2 only: "$self->{xpfx}/$self->{shard}"; } } sub _xdb ($) { my ($self) = @_; my $dir = xdir($self, 1); my ($xdb, $slow_phrase); my $qpf = \($self->{qp_flags} ||= $QP_FLAGS); if ($self->{ibx_ver} >= 2) { my @xdb; opendir(my $dh, $dir) or return; # not initialized yet # We need numeric sorting so shard[0] is first for reading # Xapian metadata, if needed my $last = max(grep(/\A[0-9]+\z/, readdir($dh))); return if !defined($last); for (0..$last) { my $shard_dir = "$dir/$_"; if (-d $shard_dir && -r _) { push @xdb, $X{Database}->new($shard_dir); $slow_phrase ||= -f "$shard_dir/iamchert"; } else { # gaps from missing epochs throw off mdocid() warn "E: $shard_dir missing or unreadable\n"; return; } } $self->{nshard} = scalar(@xdb); $xdb = shift @xdb; $xdb->add_database($_) for @xdb; } else { $slow_phrase = -f "$dir/iamchert"; $xdb = $X{Database}->new($dir); } $$qpf |= FLAG_PHRASE() unless $slow_phrase; $xdb; } # v2 Xapian docids don't conflict, so they're identical to # NNTP article numbers and IMAP UIDs. # https://trac.xapian.org/wiki/FAQ/MultiDatabaseDocumentID sub mdocid { my ($nshard, $mitem) = @_; my $docid = $mitem->get_docid; int(($docid - 1) / $nshard) + 1; } sub mset_to_artnums { my ($self, $mset) = @_; my $nshard = $self->{nshard} // 1; [ map { mdocid($nshard, $_) } $mset->items ]; } sub xdb ($) { my ($self) = @_; $self->{xdb} ||= do { load_xapian(); _xdb($self); }; } sub xpfx_init ($) { my ($self) = @_; if ($self->{ibx_ver} == 1) { $self->{xpfx} .= '/public-inbox/xapian' . SCHEMA_VERSION; } else { $self->{xpfx} .= '/xap'.SCHEMA_VERSION; } } sub new { my ($class, $ibx) = @_; ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; my $self = bless { xpfx => $ibx->{inboxdir}, # for xpfx_init altid => $ibx->{altid}, ibx_ver => $ibx->version, }, $class; xpfx_init($self); $self; } sub reopen { my ($self) = @_; if (my $xdb = $self->{xdb}) { $xdb->reopen; } $self; # make chaining easier } # read-only sub mset { my ($self, $query_string, $opts) = @_; $opts ||= {}; my $qp = $self->{qp} //= qparse_new($self); my $query = $qp->parse_query($query_string, $self->{qp_flags}); $opts->{relevance} = 1 unless exists $opts->{relevance}; _do_enquire($self, $query, $opts); } sub retry_reopen { my ($self, $cb, $arg) = @_; for my $i (1..10) { if (wantarray) { my @ret; eval { @ret = $cb->($arg) }; return @ret unless $@; } else { my $ret; eval { $ret = $cb->($arg) }; return $ret unless $@; } # Exception: The revision being read has been discarded - # you should call Xapian::Database::reopen() if (ref($@) =~ /\bDatabaseModifiedError\b/) { warn "reopen try #$i on $@\n"; reopen($self); } else { # let caller decide how to spew, because ExtMsg queries # get wonky and trigger: # "something terrible happened at .../Xapian/Enquire.pm" die; } } die "Too many Xapian database modifications in progress\n"; } sub _do_enquire { my ($self, $query, $opts) = @_; retry_reopen($self, \&_enquire_once, [ $self, $query, $opts ]); } # returns true if all docs have the THREADID value sub has_threadid ($) { my ($self) = @_; (xdb($self)->get_metadata('has_threadid') // '') eq '1'; } sub _enquire_once { # retry_reopen callback my ($self, $query, $opts) = @{$_[0]}; my $xdb = xdb($self); my $enquire = $X{Enquire}->new($xdb); $enquire->set_query($query); $opts ||= {}; my $desc = !$opts->{asc}; if (($opts->{mset} || 0) == 2) { # mset == 2: ORDER BY docid/UID $enquire->set_docid_order($ENQ_ASCENDING); $enquire->set_weighting_scheme($X{BoolWeight}->new); } elsif ($opts->{relevance}) { $enquire->set_sort_by_relevance_then_value(TS, $desc); } else { $enquire->set_sort_by_value_then_relevance(TS, $desc); } # `mairix -t / --threads' or JMAP collapseThreads if ($opts->{thread} && has_threadid($self)) { $enquire->set_collapse_key(THREADID); } $enquire->get_mset($opts->{offset} || 0, $opts->{limit} || 50); } sub mset_to_smsg { my ($self, $ibx, $mset) = @_; my $nshard = $self->{nshard} // 1; my $i = 0; my %order = map { mdocid($nshard, $_) => ++$i } $mset->items; my @msgs = sort { $order{$a->{num}} <=> $order{$b->{num}} } @{$ibx->over->get_all(keys %order)}; wantarray ? ($mset->get_matches_estimated, \@msgs) : \@msgs; } # read-write sub stemmer { $X{Stem}->new($LANG) } # read-only sub qparse_new ($) { my ($self) = @_; my $xdb = xdb($self); my $qp = $X{QueryParser}->new; $qp->set_default_op(OP_AND()); $qp->set_database($xdb); $qp->set_stemmer(stemmer($self)); $qp->set_stemming_strategy(STEM_SOME()); my $cb = $qp->can('set_max_wildcard_expansion') // $qp->can('set_max_expansion'); # Xapian 1.5.0+ $cb->($qp, 100); $cb = $qp->can('add_valuerangeprocessor') // $qp->can('add_rangeprocessor'); # Xapian 1.5.0+ $cb->($qp, $NVRP->new(YYYYMMDD, 'd:')); $cb->($qp, $NVRP->new(DT, 'dt:')); # for IMAP, undocumented for WWW and may be split off go away $cb->($qp, $NVRP->new(BYTES, 'bytes:')); $cb->($qp, $NVRP->new(TS, 'ts:')); $cb->($qp, $NVRP->new(UID, 'uid:')); while (my ($name, $prefix) = each %bool_pfx_external) { $qp->add_boolean_prefix($name, $_) foreach split(/ /, $prefix); } # we do not actually create AltId objects, # just parse the spec to avoid the extra DB handles for now. if (my $altid = $self->{altid}) { my $user_pfx = $self->{-user_pfx} = []; for (@$altid) { # $_ = 'serial:gmane:/path/to/gmane.msgmap.sqlite3' # note: Xapian supports multibyte UTF-8, /^[0-9]+$/, # and '_' with prefixes matching \w+ /\Aserial:(\w+):/ or next; my $pfx = $1; push @$user_pfx, "$pfx:", < XGMANE $qp->add_boolean_prefix($pfx, 'X'.uc($pfx)); } chomp @$user_pfx; } while (my ($name, $prefix) = each %prob_prefix) { $qp->add_prefix($name, $_) foreach split(/ /, $prefix); } $qp; } sub help { my ($self) = @_; $self->{qp} //= qparse_new($self); # parse altids my @ret = @HELP; if (my $user_pfx = $self->{-user_pfx}) { push @ret, @$user_pfx; } \@ret; } 1; public-inbox-1.6.1/lib/PublicInbox/SearchIdx.pm000066400000000000000000000572431377346120300213270ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # based on notmuch, but with no concept of folders, files or flags # # Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use # with the web and NNTP interfaces. This index maintains thread # relationships for use by PublicInbox::SearchThread. # This writes to the search index. package PublicInbox::SearchIdx; use strict; use v5.10.1; use parent qw(PublicInbox::Search PublicInbox::Lock Exporter); use PublicInbox::Eml; use PublicInbox::InboxWritable; use PublicInbox::MID qw(mids_for_index mids); use PublicInbox::MsgIter; use PublicInbox::IdxStack; use Carp qw(croak); use POSIX qw(strftime); use PublicInbox::OverIdx; use PublicInbox::Spawn qw(spawn nodatacow_dir); use PublicInbox::Git qw(git_unquote); use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); our @EXPORT_OK = qw(crlf_adjust log2stack is_ancestor check_size); my $X = \%PublicInbox::Search::X; my ($DB_CREATE_OR_OPEN, $DB_OPEN); our $DB_NO_SYNC = 0; our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff : 1_000_000; use constant DEBUG => !!$ENV{DEBUG}; my $xapianlevels = qr/\A(?:full|medium)\z/; my $hex = '[a-f0-9]'; my $OID = $hex .'{40,}'; sub new { my ($class, $ibx, $creat, $shard) = @_; ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx"; my $levels = qr/\A(?:full|medium|basic)\z/; my $inboxdir = $ibx->{inboxdir}; my $version = $ibx->version; my $indexlevel = 'full'; my $altid = $ibx->{altid}; if ($altid) { require PublicInbox::AltId; $altid = [ map { PublicInbox::AltId->new($ibx, $_); } @$altid ]; } if ($ibx->{indexlevel}) { if ($ibx->{indexlevel} =~ $levels) { $indexlevel = $ibx->{indexlevel}; } else { die("Invalid indexlevel $ibx->{indexlevel}\n"); } } $ibx = PublicInbox::InboxWritable->new($ibx); my $self = bless { ibx => $ibx, xpfx => $inboxdir, # for xpfx_init -altid => $altid, ibx_ver => $version, indexlevel => $indexlevel, }, $class; $self->xpfx_init; $self->{-set_indexlevel_once} = 1 if $indexlevel eq 'medium'; if ($ibx->{-skip_docdata}) { $self->{-set_skip_docdata_once} = 1; $self->{-skip_docdata} = 1; } $ibx->umask_prepare; if ($version == 1) { $self->{lock_path} = "$inboxdir/ssoma.lock"; my $dir = $self->xdir; $self->{oidx} = PublicInbox::OverIdx->new("$dir/over.sqlite3"); $self->{oidx}->{-no_fsync} = 1 if $ibx->{-no_fsync}; } elsif ($version == 2) { defined $shard or die "shard is required for v2\n"; # shard is a number $self->{shard} = $shard; $self->{lock_path} = undef; } else { die "unsupported inbox version=$version\n"; } $self->{creat} = ($creat || 0) == 1; $self; } sub need_xapian ($) { $_[0]->{indexlevel} =~ $xapianlevels } sub idx_release { my ($self, $wake) = @_; if (need_xapian($self)) { my $xdb = delete $self->{xdb} or croak 'not acquired'; $xdb->close; } $self->lock_release($wake) if $self->{creat}; undef; } sub load_xapian_writable () { return 1 if $X->{WritableDatabase}; PublicInbox::Search::load_xapian() or return; my $xap = $PublicInbox::Search::Xap; for (qw(Document TermGenerator WritableDatabase)) { $X->{$_} = $xap.'::'.$_; } eval 'require '.$X->{WritableDatabase} or die; *sortable_serialise = $xap.'::sortable_serialise'; $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()'); $DB_OPEN = eval($xap.'::DB_OPEN()'); my $ver = (eval($xap.'::major_version()') << 16) | (eval($xap.'::minor_version()') << 8); $DB_NO_SYNC = 0x4 if $ver >= 0x10400; 1; } sub idx_acquire { my ($self) = @_; my $flag; my $dir = $self->xdir; if (need_xapian($self)) { croak 'already acquired' if $self->{xdb}; load_xapian_writable(); $flag = $self->{creat} ? $DB_CREATE_OR_OPEN : $DB_OPEN; } if ($self->{creat}) { require File::Path; $self->lock_acquire; # don't create empty Xapian directories if we don't need Xapian my $is_shard = defined($self->{shard}); if (!-d $dir && (!$is_shard || ($is_shard && need_xapian($self)))) { File::Path::mkpath($dir); nodatacow_dir($dir); $self->{-set_has_threadid_once} = 1; } } return unless defined $flag; $flag |= $DB_NO_SYNC if $self->{ibx}->{-no_fsync}; my $xdb = eval { ($X->{WritableDatabase})->new($dir, $flag) }; croak "Failed opening $dir: $@" if $@; $self->{xdb} = $xdb; } sub add_val ($$$) { my ($doc, $col, $num) = @_; $num = sortable_serialise($num); $doc->add_value($col, $num); } sub term_generator ($) { # write-only my ($self) = @_; $self->{term_generator} //= do { my $tg = $X->{TermGenerator}->new; $tg->set_stemmer($self->stemmer); $tg; } } sub index_text ($$$$) { my ($self, $text, $wdf_inc, $prefix) = @_; my $tg = term_generator($self); # man Search::Xapian::TermGenerator if ($self->{indexlevel} eq 'full') { $tg->index_text($text, $wdf_inc, $prefix); $tg->increase_termpos; } else { $tg->index_text_without_positions($text, $wdf_inc, $prefix); } } sub index_headers ($$) { my ($self, $smsg) = @_; my @x = (from => 'A', # Author subject => 'S', to => 'XTO', cc => 'XCC'); while (my ($field, $pfx) = splice(@x, 0, 2)) { my $val = $smsg->{$field}; index_text($self, $val, 1, $pfx) if $val ne ''; } } sub index_diff_inc ($$$$) { my ($self, $text, $pfx, $xnq) = @_; if (@$xnq) { index_text($self, join("\n", @$xnq), 1, 'XNQ'); @$xnq = (); } index_text($self, $text, 1, $pfx); } sub index_old_diff_fn { my ($self, $seen, $fa, $fb, $xnq) = @_; # no renames or space support for traditional diffs, # find the number of leading common paths to strip: my @fa = split('/', $fa); my @fb = split('/', $fb); while (scalar(@fa) && scalar(@fb)) { $fa = join('/', @fa); $fb = join('/', @fb); if ($fa eq $fb) { unless ($seen->{$fa}++) { index_diff_inc($self, $fa, 'XDFN', $xnq); } return 1; } shift @fa; shift @fb; } 0; } sub index_diff ($$$) { my ($self, $txt, $doc) = @_; my %seen; my $in_diff; my @xnq; my $xnq = \@xnq; foreach (split(/\n/, $txt)) { if ($in_diff && s/^ //) { # diff context index_diff_inc($self, $_, 'XDFCTX', $xnq); } elsif (/^-- $/) { # email signature begins $in_diff = undef; } elsif (m!^diff --git "?[^/]+/.+ "?[^/]+/.+\z!) { # wait until "---" and "+++" to capture filenames $in_diff = 1; # traditional diff: } elsif (m/^diff -(.+) (\S+) (\S+)$/) { my ($opt, $fa, $fb) = ($1, $2, $3); push @xnq, $_; # only support unified: next unless $opt =~ /[uU]/; $in_diff = index_old_diff_fn($self, \%seen, $fa, $fb, $xnq); } elsif (m!^--- ("?[^/]+/.+)!) { my $fn = $1; $fn = (split('/', git_unquote($fn), 2))[1]; $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq); $in_diff = 1; } elsif (m!^\+\+\+ ("?[^/]+/.+)!) { my $fn = $1; $fn = (split('/', git_unquote($fn), 2))[1]; $seen{$fn}++ or index_diff_inc($self, $fn, 'XDFN', $xnq); $in_diff = 1; } elsif (/^--- (\S+)/) { $in_diff = $1; push @xnq, $_; } elsif (defined $in_diff && /^\+\+\+ (\S+)/) { $in_diff = index_old_diff_fn($self, \%seen, $in_diff, $1, $xnq); } elsif ($in_diff && s/^\+//) { # diff added index_diff_inc($self, $_, 'XDFB', $xnq); } elsif ($in_diff && s/^-//) { # diff removed index_diff_inc($self, $_, 'XDFA', $xnq); } elsif (m!^index ([a-f0-9]+)\.\.([a-f0-9]+)!) { my ($ba, $bb) = ($1, $2); index_git_blob_id($doc, 'XDFPRE', $ba); index_git_blob_id($doc, 'XDFPOST', $bb); $in_diff = 1; } elsif (/^@@ (?:\S+) (?:\S+) @@\s*$/) { # traditional diff w/o -p } elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)$/) { # hunk header context index_diff_inc($self, $1, 'XDFHH', $xnq); # ignore the following lines: } elsif (/^(?:dis)similarity index/ || /^(?:old|new) mode/ || /^(?:deleted|new) file mode/ || /^(?:copy|rename) (?:from|to) / || /^(?:dis)?similarity index / || /^\\ No newline at end of file/ || /^Binary files .* differ/) { push @xnq, $_; } elsif ($_ eq '') { # possible to be in diff context, some mail may be # stripped by MUA or even GNU diff(1). "git apply" # treats a bare "\n" as diff context, too } else { push @xnq, $_; warn "non-diff line: $_\n" if DEBUG && $_ ne ''; $in_diff = undef; } } index_text($self, join("\n", @xnq), 1, 'XNQ'); } sub index_xapian { # msg_iter callback my $part = $_[0]->[0]; # ignore $depth and $idx my ($self, $doc) = @{$_[1]}; my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; if (defined $fn && $fn ne '') { index_text($self, $fn, 1, 'XFN'); } if ($part->{is_submsg}) { my $mids = mids_for_index($part); index_ids($self, $doc, $part, $mids); my $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->populate($part); index_headers($self, $smsg); } my ($s, undef) = msg_part_text($part, $ct); defined $s or return; $_[0]->[0] = $part = undef; # free memory # split off quoted and unquoted blocks: my @sections = PublicInbox::MsgIter::split_quotes($s); undef $s; # free memory for my $txt (@sections) { if ($txt =~ /\A>/) { index_text($self, $txt, 0, 'XQUOT'); } else { # does it look like a diff? if ($txt =~ /^(?:diff|---|\+\+\+) /ms) { index_diff($self, $txt, $doc); } else { index_text($self, $txt, 1, 'XNQ'); } } undef $txt; # free memory } } sub index_ids ($$$$) { my ($self, $doc, $hdr, $mids) = @_; for my $mid (@$mids) { index_text($self, $mid, 1, 'XM'); # because too many Message-IDs are prefixed with # "Pine.LNX."... if ($mid =~ /\w{12,}/) { my @long = ($mid =~ /(\w{3,}+)/g); index_text($self, join(' ', @long), 1, 'XM'); } } $doc->add_boolean_term('Q' . $_) for @$mids; for my $l ($hdr->header_raw('List-Id')) { $l =~ /<([^>]+)>/ or next; my $lid = lc $1; $doc->add_boolean_term('G' . $lid); index_text($self, $lid, 1, 'XL'); # probabilistic } } sub add_xapian ($$$$) { my ($self, $eml, $smsg, $mids) = @_; my $doc = $X->{Document}->new; add_val($doc, PublicInbox::Search::TS(), $smsg->{ts}); my @ds = gmtime($smsg->{ds}); my $yyyymmdd = strftime('%Y%m%d', @ds); add_val($doc, PublicInbox::Search::YYYYMMDD(), $yyyymmdd); my $dt = strftime('%Y%m%d%H%M%S', @ds); add_val($doc, PublicInbox::Search::DT(), $dt); add_val($doc, PublicInbox::Search::BYTES(), $smsg->{bytes}); add_val($doc, PublicInbox::Search::UID(), $smsg->{num}); add_val($doc, PublicInbox::Search::THREADID, $smsg->{tid}); my $tg = term_generator($self); $tg->set_document($doc); index_headers($self, $smsg); msg_iter($eml, \&index_xapian, [ $self, $doc ]); index_ids($self, $doc, $eml, $mids); # by default, we maintain compatibility with v1.5.0 and earlier # by writing to docdata.glass, users who never exect to downgrade can # use --skip-docdata if (!$self->{-skip_docdata}) { # WWW doesn't need {to} or {cc}, only NNTP $smsg->{to} = $smsg->{cc} = ''; PublicInbox::OverIdx::parse_references($smsg, $eml, $mids); my $data = $smsg->to_doc_data; $doc->set_data($data); } if (my $altid = $self->{-altid}) { foreach my $alt (@$altid) { my $pfx = $alt->{xprefix}; foreach my $mid (@$mids) { my $id = $alt->mid2alt($mid); next unless defined $id; $doc->add_boolean_term($pfx . $id); } } } $self->{xdb}->replace_document($smsg->{num}, $doc); } sub _msgmap_init ($) { my ($self) = @_; die "BUG: _msgmap_init is only for v1\n" if $self->{ibx_ver} != 1; $self->{mm} //= eval { require PublicInbox::Msgmap; my $rw = $self->{ibx}->{-no_fsync} ? 2 : 1; PublicInbox::Msgmap->new($self->{ibx}->{inboxdir}, $rw); }; } sub add_message { # mime = PublicInbox::Eml or Email::MIME object my ($self, $mime, $smsg, $sync) = @_; my $mids = mids_for_index($mime); $smsg //= bless { blob => '' }, 'PublicInbox::Smsg'; # test-only compat $smsg->{mid} //= $mids->[0]; # v1 compatibility $smsg->{num} //= do { # v1 _msgmap_init($self); index_mm($self, $mime, $smsg->{blob}, $sync); }; # v1 and tests only: $smsg->populate($mime, $sync); $smsg->{bytes} //= length($mime->as_string); eval { # order matters, overview stores every possible piece of # data in doc_data (deflated). Xapian only stores a subset # of the fields which exist in over.sqlite3. We may stop # storing doc_data in Xapian sometime after we get multi-inbox # search working. if (my $oidx = $self->{oidx}) { # v1 only $oidx->add_overview($mime, $smsg); } if (need_xapian($self)) { add_xapian($self, $mime, $smsg, $mids); } }; if ($@) { warn "failed to index message <".join('> <',@$mids).">: $@\n"; return undef; } $smsg->{num}; } sub xdb_remove { my ($self, $oid, @removed) = @_; my $xdb = $self->{xdb} or return; for my $num (@removed) { my $doc = eval { $xdb->get_document($num) }; unless ($doc) { warn "E: $@\n" if $@; warn "E: #$num $oid missing in Xapian\n"; next; } my $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->load_expand($doc); my $blob = $smsg->{blob} // '(unset)'; if ($blob eq $oid) { $xdb->delete_document($num); } else { warn "E: #$num $oid != $blob in Xapian\n"; } } } sub remove_by_oid { my ($self, $oid, $num) = @_; die "BUG: remove_by_oid is v2-only\n" if $self->{oidx}; $self->begin_txn_lazy; xdb_remove($self, $oid, $num) if need_xapian($self); } sub index_git_blob_id { my ($doc, $pfx, $objid) = @_; my $len = length($objid); for (my $len = length($objid); $len >= 7; ) { $doc->add_term($pfx.$objid); $objid = substr($objid, 0, --$len); } } # v1 only sub unindex_eml { my ($self, $oid, $eml) = @_; my $mids = mids($eml); my $nr = 0; my %tmp; for my $mid (@$mids) { my @removed = $self->{oidx}->remove_oid($oid, $mid); $nr += scalar @removed; $tmp{$_}++ for @removed; } if (!$nr) { $mids = join('> <', @$mids); warn "W: <$mids> missing for removal from overview\n"; } while (my ($num, $nr) = each %tmp) { warn "BUG: $num appears >1 times ($nr) for $oid\n" if $nr != 1; } if ($nr) { $self->{mm}->num_delete($_) for (keys %tmp); } else { # just in case msgmap and over.sqlite3 become desynched: $self->{mm}->mid_delete($mids->[0]); } xdb_remove($self, $oid, keys %tmp) if need_xapian($self); } sub index_mm { my ($self, $mime, $oid, $sync) = @_; my $mids = mids($mime); my $mm = $self->{mm}; if ($sync->{reindex}) { my $oidx = $self->{oidx}; for my $mid (@$mids) { my ($num, undef) = $oidx->num_mid0_for_oid($oid, $mid); return $num if defined $num; } $mm->num_for($mids->[0]) // $mm->mid_insert($mids->[0]); } else { # fallback to num_for since filters like RubyLang set the number $mm->mid_insert($mids->[0]) // $mm->num_for($mids->[0]); } } # returns the number of bytes to add if given a non-CRLF arg sub crlf_adjust ($) { if (index($_[0], "\r\n") < 0) { # common case is LF-only, every \n needs an \r; # so favor a cheap tr// over an expensive m//g $_[0] =~ tr/\n/\n/; } else { # count number of '\n' w/o '\r', expensive: scalar(my @n = ($_[0] =~ m/(?cat_async callback my ($bref, $oid, $type, $size, $sync) = @_; my ($nr, $max) = @$sync{qw(nr max)}; ++$$nr; $$max -= $size; $size += crlf_adjust($$bref); my $smsg = bless { bytes => $size, blob => $oid }, 'PublicInbox::Smsg'; my $self = $sync->{sidx}; my $eml = PublicInbox::Eml->new($bref); $smsg->{num} = index_mm($self, $eml, $oid, $sync) or die "E: could not generate NNTP article number for $oid"; add_message($self, $eml, $smsg, $sync); } sub unindex_both { # git->cat_async callback my ($bref, $oid, $type, $size, $self) = @_; unindex_eml($self, $oid, PublicInbox::Eml->new($bref)); } # called by public-inbox-index sub index_sync { my ($self, $opt) = @_; delete $self->{lock_path} if $opt->{-skip_lock}; $self->{ibx}->with_umask(\&_index_sync, $self, $opt); if ($opt->{reindex}) { my %again = %$opt; delete @again{qw(rethread reindex)}; index_sync($self, \%again); } } sub check_size { # check_async cb for -index --max-size=... my ($oid, $type, $size, $arg, $git) = @_; (($type // '') eq 'blob') or die "E: bad $oid in $git->{git_dir}"; if ($size <= $arg->{max_size}) { $git->cat_async($oid, $arg->{index_oid}, $arg); } else { warn "W: skipping $oid ($size > $arg->{max_size})\n"; } } sub v1_checkpoint ($$;$) { my ($self, $sync, $stk) = @_; $self->{ibx}->git->check_async_wait; $self->{ibx}->git->cat_async_wait; # latest_cmt may be undef my $newest = $stk ? $stk->{latest_cmt} : undef; if ($newest) { my $cur = $self->{mm}->last_commit || ''; if (need_update($self, $cur, $newest)) { $self->{mm}->last_commit($newest); } } else { ${$sync->{max}} = $self->{batch_bytes}; } $self->{mm}->{dbh}->commit; if ($newest && need_xapian($self)) { my $xdb = $self->{xdb}; my $cur = $xdb->get_metadata('last_commit'); if (need_update($self, $cur, $newest)) { $xdb->set_metadata('last_commit', $newest); } # let SearchView know a full --reindex was done so it can # generate ->has_threadid-dependent links if ($sync->{reindex} && !ref($sync->{reindex})) { my $n = $xdb->get_metadata('has_threadid'); $xdb->set_metadata('has_threadid', '1') if $n ne '1'; } } $self->{oidx}->rethread_done($sync->{-opt}) if $newest; # all done commit_txn_lazy($self); $self->{ibx}->git->cleanup; my $nr = ${$sync->{nr}}; idx_release($self, $nr); # let another process do some work... if (my $pr = $sync->{-opt}->{-progress}) { $pr->("indexed $nr/$sync->{ntodo}\n") if $nr; } if (!$stk) { # more to come begin_txn_lazy($self); $self->{mm}->{dbh}->begin_work; } } # only for v1 sub process_stack { my ($self, $sync, $stk) = @_; my $git = $self->{ibx}->git; my $max = $self->{batch_bytes}; my $nr = 0; $sync->{nr} = \$nr; $sync->{max} = \$max; $sync->{sidx} = $self; $self->{mm}->{dbh}->begin_work; if (my @leftovers = keys %{delete($sync->{D}) // {}}) { warn('W: unindexing '.scalar(@leftovers)." leftovers\n"); for my $oid (@leftovers) { $oid = unpack('H*', $oid); $git->cat_async($oid, \&unindex_both, $self); } } if ($sync->{max_size} = $sync->{-opt}->{max_size}) { $sync->{index_oid} = \&index_both; } while (my ($f, $at, $ct, $oid) = $stk->pop_rec) { if ($f eq 'm') { my $arg = { %$sync, autime => $at, cotime => $ct }; if ($sync->{max_size}) { $git->check_async($oid, \&check_size, $arg); } else { $git->cat_async($oid, \&index_both, $arg); } v1_checkpoint($self, $sync) if $max <= 0; } elsif ($f eq 'd') { $git->cat_async($oid, \&unindex_both, $self); } } v1_checkpoint($self, $sync, $stk); } sub log2stack ($$$$) { my ($sync, $git, $range, $ibx) = @_; my $D = $sync->{D}; # OID_BIN => NR (if reindexing, undef otherwise) my ($add, $del); if ($ibx->version == 1) { my $path = $hex.'{2}/'.$hex.'{38}'; $add = qr!\A:000000 100644 \S+ ($OID) A\t$path$!; $del = qr!\A:100644 000000 ($OID) \S+ D\t$path$!; } else { $del = qr!\A:\d{6} 100644 $OID ($OID) [AM]\td$!; $add = qr!\A:\d{6} 100644 $OID ($OID) [AM]\tm$!; } # Count the new files so they can be added newest to oldest # and still have numbers increasing from oldest to newest my $fh = $git->popen(qw(log --raw -r --pretty=tformat:%at-%ct-%H --no-notes --no-color --no-renames --no-abbrev), $range); my ($at, $ct, $stk); while (<$fh>) { if (/\A([0-9]+)-([0-9]+)-($OID)$/o) { ($at, $ct) = ($1 + 0, $2 + 0); $stk //= PublicInbox::IdxStack->new($3); } elsif (/$del/) { my $oid = $1; if ($D) { # reindex case $D->{pack('H*', $oid)}++; } else { # non-reindex case: $stk->push_rec('d', $at, $ct, $oid); } } elsif (/$add/) { my $oid = $1; if ($D) { my $oid_bin = pack('H*', $oid); my $nr = --$D->{$oid_bin}; delete($D->{$oid_bin}) if $nr <= 0; # nr < 0 (-1) means it never existed $stk->push_rec('m', $at, $ct, $oid) if $nr < 0; } else { $stk->push_rec('m', $at, $ct, $oid); } } } close $fh or die "git log failed: \$?=$?"; $stk //= PublicInbox::IdxStack->new; $stk->read_prepare; } sub prepare_stack ($$$) { my ($self, $sync, $range) = @_; my $git = $self->{ibx}->git; if (index($range, '..') < 0) { # don't show annoying git errors to users who run -index # on empty inboxes $git->qx(qw(rev-parse -q --verify), "$range^0"); return PublicInbox::IdxStack->new->read_prepare if $?; } $sync->{D} = $sync->{reindex} ? {} : undef; # OID_BIN => NR log2stack($sync, $git, $range, $self->{ibx}); } # --is-ancestor requires git 1.8.0+ sub is_ancestor ($$$) { my ($git, $cur, $tip) = @_; return 0 unless $git->check($cur); my $cmd = [ 'git', "--git-dir=$git->{git_dir}", qw(merge-base --is-ancestor), $cur, $tip ]; my $pid = spawn($cmd); waitpid($pid, 0) == $pid or die join(' ', @$cmd) .' did not finish'; $? == 0; } sub need_update ($$$) { my ($self, $cur, $new) = @_; my $git = $self->{ibx}->git; return 1 if $cur && !is_ancestor($git, $cur, $new); my $range = $cur eq '' ? $new : "$cur..$new"; chomp(my $n = $git->qx(qw(rev-list --count), $range)); ($n eq '' || $n > 0); } # The last git commit we indexed with Xapian or SQLite (msgmap) # This needs to account for cases where Xapian or SQLite is # out-of-date with respect to the other. sub _last_x_commit { my ($self, $mm) = @_; my $lm = $mm->last_commit || ''; my $lx = ''; if (need_xapian($self)) { $lx = $self->{xdb}->get_metadata('last_commit') || ''; } else { $lx = $lm; } # Use last_commit from msgmap if it is older or unset if (!$lm || ($lx && $lm && is_ancestor($self->{ibx}->git, $lm, $lx))) { $lx = $lm; } $lx; } sub reindex_from ($$) { my ($reindex, $last_commit) = @_; return $last_commit unless $reindex; ref($reindex) eq 'HASH' ? $reindex->{from} : ''; } # indexes all unindexed messages (v1 only) sub _index_sync { my ($self, $opt) = @_; my $tip = $opt->{ref} || 'HEAD'; my $git = $self->{ibx}->git; $self->{batch_bytes} = $opt->{batch_size} // $BATCH_BYTES; $git->batch_prepare; my $pr = $opt->{-progress}; my $sync = { reindex => $opt->{reindex}, -opt => $opt }; my $xdb = $self->begin_txn_lazy; $self->{oidx}->rethread_prepare($opt); my $mm = _msgmap_init($self); if ($sync->{reindex}) { my $last = $mm->last_commit; if ($last) { $tip = $last; } else { # somebody just blindly added --reindex when indexing # for the first time, allow it: undef $sync->{reindex}; } } my $last_commit = _last_x_commit($self, $mm); my $lx = reindex_from($sync->{reindex}, $last_commit); my $range = $lx eq '' ? $tip : "$lx..$tip"; $pr->("counting changes\n\t$range ... ") if $pr; my $stk = prepare_stack($self, $sync, $range); $sync->{ntodo} = $stk ? $stk->num_records : 0; $pr->("$sync->{ntodo}\n") if $pr; # continue previous line process_stack($self, $sync, $stk); } sub DESTROY { # order matters for unlocking $_[0]->{xdb} = undef; $_[0]->{lockfh} = undef; } sub _begin_txn { my ($self) = @_; my $xdb = $self->{xdb} || idx_acquire($self); $self->{oidx}->begin_lazy if $self->{oidx}; $xdb->begin_transaction if $xdb; $self->{txn} = 1; $xdb; } sub begin_txn_lazy { my ($self) = @_; $self->{ibx}->with_umask(\&_begin_txn, $self) if !$self->{txn}; } # store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard) # This metadata is read by Admin::detect_indexlevel: sub set_metadata_once { my ($self) = @_; return if $self->{shard}; # only continue if undef or 0, not >0 my $xdb = $self->{xdb}; if (delete($self->{-set_has_threadid_once})) { $xdb->set_metadata('has_threadid', '1'); } if (delete($self->{-set_indexlevel_once})) { my $level = $xdb->get_metadata('indexlevel'); if (!$level || $level ne 'medium') { $xdb->set_metadata('indexlevel', 'medium'); } } if (delete($self->{-set_skip_docdata_once})) { $xdb->get_metadata('skip_docdata') or $xdb->set_metadata('skip_docdata', '1'); } } sub _commit_txn { my ($self) = @_; if (my $xdb = $self->{xdb}) { set_metadata_once($self); $xdb->commit_transaction; } $self->{oidx}->commit_lazy if $self->{oidx}; } sub commit_txn_lazy { my ($self) = @_; delete($self->{txn}) and $self->{ibx}->with_umask(\&_commit_txn, $self); } sub worker_done { my ($self) = @_; if (need_xapian($self)) { die "$$ $0 xdb not released\n" if $self->{xdb}; } die "$$ $0 still in transaction\n" if $self->{txn}; } 1; public-inbox-1.6.1/lib/PublicInbox/SearchIdxShard.pm000066400000000000000000000103171377346120300223000ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ # Internal interface for a single Xapian shard in V2 inboxes. # See L for more info on how we shard Xapian package PublicInbox::SearchIdxShard; use strict; use v5.10.1; use parent qw(PublicInbox::SearchIdx); use IO::Handle (); # autoflush use PublicInbox::Eml; sub new { my ($class, $v2w, $shard) = @_; my $ibx = $v2w->{ibx}; my $self = $class->SUPER::new($ibx, 1, $shard); # create the DB before forking: $self->idx_acquire; $self->set_metadata_once; $self->idx_release; $self->spawn_worker($v2w, $shard) if $v2w->{parallel}; $self; } sub spawn_worker { my ($self, $v2w, $shard) = @_; my ($r, $w); pipe($r, $w) or die "pipe failed: $!\n"; $w->autoflush(1); my $pid = fork; defined $pid or die "fork failed: $!\n"; if ($pid == 0) { my $bnote = $v2w->atfork_child; close $w or die "failed to close: $!"; # F_SETPIPE_SZ = 1031 on Linux; increasing the pipe size here # speeds V2Writable batch imports across 8 cores by nearly 20% fcntl($r, 1031, 1048576) if $^O eq 'linux'; eval { shard_worker_loop($self, $v2w, $r, $shard, $bnote) }; die "worker $shard died: $@\n" if $@; die "unexpected MM $self->{mm}" if $self->{mm}; exit; } $self->{pid} = $pid; $self->{w} = $w; close $r or die "failed to close: $!"; } # this reads all the writes to $self->{w} from the parent process sub shard_worker_loop ($$$$$) { my ($self, $v2w, $r, $shard, $bnote) = @_; $0 = "pi-v2-shard[$shard]"; $self->begin_txn_lazy; while (my $line = readline($r)) { $v2w->{current_info} = "[$shard] $line"; if ($line eq "commit\n") { $self->commit_txn_lazy; } elsif ($line eq "close\n") { $self->idx_release; } elsif ($line eq "barrier\n") { $self->commit_txn_lazy; # no need to lock < 512 bytes is atomic under POSIX print $bnote "barrier $shard\n" or die "write failed for barrier $!\n"; } elsif ($line =~ /\AD ([a-f0-9]{40,}) ([0-9]+)\n\z/s) { $self->remove_by_oid($1, $2 + 0); } else { chomp $line; # n.b. $mid may contain spaces(!) my ($to_read, $bytes, $num, $blob, $ds, $ts, $tid, $mid) = split(/ /, $line, 8); $self->begin_txn_lazy; my $n = read($r, my $msg, $to_read) or die "read: $!\n"; $n == $to_read or die "short read: $n != $to_read\n"; my $mime = PublicInbox::Eml->new(\$msg); my $smsg = bless { bytes => $bytes, num => $num + 0, blob => $blob, mid => $mid, tid => $tid, ds => $ds, ts => $ts, }, 'PublicInbox::Smsg'; $self->add_message($mime, $smsg); } } $self->worker_done; } sub index_raw { my ($self, $msgref, $eml, $smsg) = @_; if (my $w = $self->{w}) { # mid must be last, it can contain spaces (but not LF) print $w join(' ', @$smsg{qw(raw_bytes bytes num blob ds ts tid mid)}), "\n", $$msgref or die "failed to write shard $!\n"; } else { if ($eml) { undef $$msgref; } else { # --xapian-only + --sequential-shard: $eml = PublicInbox::Eml->new($msgref); } $self->begin_txn_lazy; $self->add_message($eml, $smsg); } } sub atfork_child { close $_[0]->{w} or die "failed to close write pipe: $!\n"; } sub shard_barrier { my ($self) = @_; if (my $w = $self->{w}) { print $w "barrier\n" or die "failed to print: $!"; } else { $self->commit_txn_lazy; } } sub shard_commit { my ($self) = @_; if (my $w = $self->{w}) { print $w "commit\n" or die "failed to write commit: $!"; } else { $self->commit_txn_lazy; } } sub shard_close { my ($self) = @_; if (my $w = delete $self->{w}) { my $pid = delete $self->{pid} or die "no process to wait on\n"; print $w "close\n" or die "failed to write to pid:$pid: $!\n"; close $w or die "failed to close pipe for pid:$pid: $!\n"; waitpid($pid, 0) == $pid or die "remote process did not finish"; $? == 0 or die ref($self)." pid:$pid exited with: $?"; } else { die "transaction in progress $self\n" if $self->{txn}; $self->idx_release if $self->{xdb}; } } sub shard_remove { my ($self, $oid, $num) = @_; if (my $w = $self->{w}) { # triggers remove_by_oid in a shard child print $w "D $oid $num\n" or die "failed to write remove $!"; } else { # same process $self->remove_by_oid($oid, $num); } } 1; public-inbox-1.6.1/lib/PublicInbox/SearchQuery.pm000066400000000000000000000023201377346120300216720ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # used by PublicInbox::SearchView package PublicInbox::SearchQuery; use strict; use v5.10.1; use URI::Escape qw(uri_escape); use PublicInbox::MID qw(MID_ESC); our $LIM = 200; sub new { my ($class, $qp) = @_; my $r = $qp->{r}; # relevance my $t = $qp->{t}; # collapse threads my ($l) = (($qp->{l} || '') =~ /([0-9]+)/); $l = $LIM if !$l || $l > $LIM; bless { q => $qp->{'q'}, x => $qp->{x} || '', o => (($qp->{o} || '0') =~ /(-?[0-9]+)/), l => $l, r => (defined $r && $r ne '0'), t => (defined $t && $t ne '0'), }, $class; } sub qs_html { my ($self, %override) = @_; if (scalar(keys(%override))) { $self = bless { (%$self, %override) }, ref($self); } my $q = uri_escape($self->{'q'}, MID_ESC); $q =~ s/%20/+/g; # improve URL readability my $qs = "q=$q"; if (my $o = $self->{o}) { # ignore o == 0 $qs .= "&o=$o"; } if (my $l = $self->{l}) { $qs .= "&l=$l" unless $l == $LIM; } for my $bool (qw(r t)) { $qs .= "&$bool" if $self->{$bool}; } if (my $x = $self->{x}) { $qs .= "&x=$x" if ($x eq 't' || $x eq 'A' || $x eq 'm'); } $qs; } 1; public-inbox-1.6.1/lib/PublicInbox/SearchThread.pm000066400000000000000000000116251377346120300220040ustar00rootroot00000000000000# This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # This license differs from the rest of public-inbox # # Our own jwz-style threading class based on Mail::Thread from CPAN. # Mail::Thread is unmaintained and unavailable on some distros. # We also do not want pruning or subject grouping, since we want # to encourage strict threading and hopefully encourage people # to use proper In-Reply-To/References. # # This includes fixes from several open bugs for Mail::Thread # # Avoid circular references # - https://rt.cpan.org/Public/Bug/Display.html?id=22817 # # And avoid recursion in recurse_down: # - https://rt.cpan.org/Ticket/Display.html?id=116727 # - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=833479 package PublicInbox::SearchThread; use strict; use warnings; use PublicInbox::MID qw($MID_EXTRACT); sub thread { my ($msgs, $ordersub, $ctx) = @_; # A. put all current $msgs (non-ghosts) into %id_table my %id_table = map {; # this delete saves around 4K across 1K messages # TODO: move this to a more appropriate place, breaks tests # if we do it during psgi_cull delete $_->{num}; $_->{mid} => PublicInbox::SearchThread::Msg::cast($_); } @$msgs; # Sadly, we sort here anyways since the fill-in-the-blanks References: # can be shakier if somebody used In-Reply-To with multiple, disparate # messages. So, take the client Date: into account since we can't # always determine ordering when somebody uses multiple In-Reply-To. # We'll trust the client Date: header here instead of the Received: # time since this is for display (and not retrieval) _set_parent(\%id_table, $_) for sort { $a->{ds} <=> $b->{ds} } @$msgs; my $ibx = $ctx->{-inbox}; my $rootset = [ grep { !delete($_->{parent}) && $_->visible($ibx) } values %id_table ]; $rootset = $ordersub->($rootset); $_->order_children($ordersub, $ctx) for @$rootset; $rootset; } sub _set_parent ($$) { my ($id_table, $this) = @_; # B. For each element in the message's References field: defined(my $refs = $this->{references}) or return; # This loop exists to help fill in gaps left from missing # messages. It is not needed in a perfect world where # everything is perfectly referenced, only the last ref # matters. my $prev; foreach my $ref ($refs =~ m/$MID_EXTRACT/go) { # Find a Container object for the given Message-ID my $cont = $id_table->{$ref} //= PublicInbox::SearchThread::Msg::ghost($ref); # Link the References field's Containers together in # the order implied by the References header # # * If they are already linked don't change the # existing links # * Do not add a link if adding that link would # introduce a loop... if ($prev && !$cont->{parent} && # already linked !$cont->has_descendent($prev) # would loop ) { $prev->add_child($cont); } $prev = $cont; } # C. Set the parent of this message to be the last element in # References. if (defined $prev && !$this->has_descendent($prev)) { # would loop $prev->add_child($this); } } package PublicInbox::SearchThread::Msg; use base qw(PublicInbox::Smsg); use strict; use warnings; use Carp qw(croak); # declare a ghost smsg (determined by absence of {blob}) sub ghost { bless { mid => $_[0], children => {}, # becomes an array when sorted by ->order(...) }, __PACKAGE__; } # give a existing smsg the methods of this class sub cast { my ($smsg) = @_; $smsg->{children} = {}; bless $smsg, __PACKAGE__; } sub topmost { my ($self) = @_; my @q = ($self); while (my $cont = shift @q) { return $cont if $cont->{blob}; push @q, values %{$cont->{children}}; } undef; } sub add_child { my ($self, $child) = @_; croak "Cowardly refusing to become my own parent: $self" if $self == $child; my $cid = $child->{mid}; # reparenting: if (defined(my $parent = $child->{parent})) { delete $parent->{children}->{$cid}; } $self->{children}->{$cid} = $child; $child->{parent} = $self; } sub has_descendent { my ($self, $child) = @_; my %seen; # loop prevention while ($child) { return 1 if $self == $child || $seen{$child}++; $child = $child->{parent}; } 0; } # Do not show/keep ghosts iff they have no children. Sometimes # a ghost Message-ID is the result of a long header line # being folded/mangled by a MUA, and not a missing message. sub visible ($$) { my ($self, $ibx) = @_; return 1 if $self->{blob}; if (my $by_mid = $ibx->smsg_by_mid($self->{mid})) { %$self = (%$self, %$by_mid); 1; } else { (scalar values %{$self->{children}}); } } sub order_children { my ($cur, $ordersub, $ctx) = @_; my %seen = ($cur => 1); # self-referential loop prevention my @q = ($cur); my $ibx = $ctx->{-inbox}; while (defined($cur = shift @q)) { my $c = $cur->{children}; # The hashref here... $c = [ grep { !$seen{$_}++ && visible($_, $ibx) } values %$c ]; $c = $ordersub->($c) if scalar @$c > 1; $cur->{children} = $c; # ...becomes an arrayref push @q, @$c; } } 1; public-inbox-1.6.1/lib/PublicInbox/SearchView.pm000066400000000000000000000242631377346120300215110ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # # Displays search results for the web interface package PublicInbox::SearchView; use strict; use v5.10.1; use List::Util qw(min max); use URI::Escape qw(uri_unescape); use PublicInbox::Smsg; use PublicInbox::Hval qw(ascii_html obfuscate_addrs mid_href fmt_ts); use PublicInbox::View; use PublicInbox::WwwAtomStream; use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::SearchThread; use PublicInbox::SearchQuery; use PublicInbox::Search qw(mdocid); my %rmap_inc; sub mbox_results { my ($ctx) = @_; my $q = PublicInbox::SearchQuery->new($ctx->{qp}); if ($ctx->{env}->{'psgi.input'}->read(my $buf, 3)) { $q->{t} = 1 if $buf =~ /\Ax=[^0]/; } require PublicInbox::Mbox; $q->{x} eq 'm' ? PublicInbox::Mbox::mbox_all($ctx, $q) : sres_top_html($ctx); } sub sres_top_html { my ($ctx) = @_; my $srch = $ctx->{-inbox}->search or return PublicInbox::WWW::need($ctx, 'Search'); my $q = PublicInbox::SearchQuery->new($ctx->{qp}); my $x = $q->{x}; my $query = $q->{'q'}; my $o = $q->{o}; my $asc; if ($o < 0) { $asc = 1; $o = -($o + 1); # so [-1] is the last element, like Perl lists } my $code = 200; # double the limit for expanded views: my $opts = { limit => $q->{l}, offset => $o, relevance => $q->{r}, thread => $q->{t}, asc => $asc, }; my ($mset, $total, $err, $html); retry: eval { $mset = $srch->mset($query, $opts); $total = $mset->get_matches_estimated; }; $err = $@; ctx_prepare($q, $ctx); if ($err) { $code = 400; $html = '
'.err_txt($ctx, $err).'

'; } elsif ($total == 0) { if (defined($ctx->{-uxs_retried})) { # undo retry damage: $q->{'q'} = $ctx->{-uxs_retried}; } elsif (index($q->{'q'}, '%') >= 0) { $ctx->{-uxs_retried} = $q->{'q'}; $q->{'q'} = uri_unescape($q->{'q'}); goto retry; } $code = 404; $html = "
\n[No results found]

"; } else { return adump($_[0], $mset, $q, $ctx) if $x eq 'A'; $ctx->{-html_tip} = search_nav_top($mset, $q, $ctx); return mset_thread($ctx, $mset, $q) if $x eq 't'; mset_summary($ctx, $mset, $q); # appends to {-html_tip} $html = ''; } html_oneshot($ctx, $code); } # display non-nested search results similar to what users expect from # regular WWW search engines: sub mset_summary { my ($ctx, $mset, $q) = @_; my $total = $mset->get_matches_estimated; my $pad = length("$total"); my $pfx = ' ' x $pad; my $res = \($ctx->{-html_tip}); my $ibx = $ctx->{-inbox}; my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef; my @nums = @{$ibx->search->mset_to_artnums($mset)}; my %num2msg = map { $_->{num} => $_ } @{$ibx->over->get_all(@nums)}; my ($min, $max); foreach my $m ($mset->items) { my $rank = sprintf("%${pad}d", $m->get_rank + 1); my $pct = get_pct($m); my $num = shift @nums; my $smsg = delete($num2msg{$num}) or do { eval { $m = "$m $num expired\n"; $ctx->{env}->{'psgi.errors'}->print($m); }; next; }; $ctx->{-t_max} //= $smsg->{ts}; # only when sorting by relevance, ->items is always # ordered descending: $max //= $pct; $min = $pct; my $s = ascii_html($smsg->{subject}); my $f = ascii_html($smsg->{from_name}); if ($obfs_ibx) { obfuscate_addrs($obfs_ibx, $s); obfuscate_addrs($obfs_ibx, $f); } my $date = fmt_ts($smsg->{ds}); my $mid = mid_href($smsg->{mid}); $s = '(no subject)' if $s eq ''; $$res .= qq{$rank. }. $s . "\n"; $$res .= "$pfx - by $f @ $date UTC [$pct%]\n\n"; } if ($q->{r}) { # for descriptions in search_nav_bot $q->{-min_pct} = $min; $q->{-max_pct} = $max; } $$res .= search_nav_bot($mset, $q); undef; } # shorten "/full/path/to/Foo/Bar.pm" to "Foo/Bar.pm" so error # messages don't reveal FS layout info in case people use non-standard # installation paths sub path2inc ($) { my $full = $_[0]; if (my $short = $rmap_inc{$full}) { return $short; } elsif (!scalar(keys %rmap_inc) && -e $full) { %rmap_inc = map {; "$INC{$_}" => $_ } keys %INC; # fall back to basename as last resort $rmap_inc{$full} // (split('/', $full))[-1]; } else { $full; } } sub err_txt { my ($ctx, $err) = @_; my $u = $ctx->{-inbox}->base_url($ctx->{env}) . '_/text/help/'; $err =~ s/^\s*Exception:\s*//; # bad word to show users :P $err =~ s!(\S+)!path2inc($1)!sge; $err = ascii_html($err); "\nBad query: $err\n" . qq{See $u for help on using search}; } sub search_nav_top { my ($mset, $q, $ctx) = @_; my $m = $q->qs_html(x => 'm', r => undef, t => undef); my $rv = qq{
};
	my $initial_q = $ctx->{-uxs_retried};
	if (defined $initial_q) {
		my $rewritten = $q->{'q'};
		utf8::decode($initial_q);
		utf8::decode($rewritten);
		$initial_q = ascii_html($initial_q);
		$rewritten = ascii_html($rewritten);
		$rv .= " Warning: Initial query:\n $initial_q\n";
		$rv .= " returned no results, used:\n";
		$rv .= " $rewritten\n instead\n\n";
	}

	$rv .= 'Search results ordered by [';
	if ($q->{r}) {
		my $d = $q->qs_html(r => 0);
		$rv .= qq{date|relevance};
	} else {
		my $d = $q->qs_html(r => 1);
		$rv .= qq{date|relevance};
	}

	$rv .= ']  view[';

	my $x = $q->{x};
	if ($x eq '') {
		my $t = $q->qs_html(x => 't');
		$rv .= qq{summary|nested}
	} elsif ($q->{x} eq 't') {
		my $s = $q->qs_html(x => '');
		$rv .= qq{summary|nested};
	}
	my $A = $q->qs_html(x => 'A', r => undef);
	$rv .= qq{|Atom feed]};
	if ($ctx->{-inbox}->search->has_threadid) {
		$rv .= qq{\n\t\t\tdownload mbox.gz: } .
			# we set name=z w/o using it since it seems required for
			# lynx (but works fine for w3m).
			qq{} .
			qq{|};
	} else { # BOFH needs to --reindex
		$rv .= qq{\n\t\t\t\t\t\tdownload: } .
			qq{}
	}
	$rv .= qq{
};
}

sub search_nav_bot {
	my ($mset, $q) = @_;
	my $total = $mset->get_matches_estimated;
	my $l = $q->{l};
	my $rv = '

';
	my $o = $q->{o};
	my $off = $o < 0 ? -($o + 1) : $o;
	my $end = $off + $mset->size;
	my $beg = $off + 1;

	if ($beg <= $end) {
		my $approx = $end == $total ? '' : '~';
		$rv .= "Results $beg-$end of $approx$total";
	} else {
		$rv .= "No more results, only $total";
	}
	my ($next, $join, $prev, $nd, $pd);

	if ($o >= 0) { # sort descending
		my $n = $o + $l;
		if ($n < $total) {
			$next = $q->qs_html(o => $n, l => $l);
			$nd = $q->{r} ? "[<= $q->{-min_pct}%]" : '(older)';
		}
		if ($o > 0) {
			$join = $n < $total ? ' | ' : "\t";
			my $p = $o - $l;
			$prev = $q->qs_html(o => ($p > 0 ? $p : 0));
			$pd = $q->{r} ? "[>= $q->{-max_pct}%]" : '(newer)';
		}
	} else { # o < 0, sort ascending
		my $n = $o - $l;

		if (-$n < $total) {
			$next = $q->qs_html(o => $n, l => $l);
			$nd = $q->{r} ? "[<= $q->{-min_pct}%]" : '(newer)';
		}
		if ($o < -1) {
			$join = -$n < $total ? ' | ' : "\t";
			my $p = $o + $l;
			$prev = $q->qs_html(o => ($p < 0 ? $p : 0));
			$pd = $q->{r} ? "[>= $q->{-max_pct}%]" : '(older)';
		}
	}

	$rv .= qq{  next $nd} if $next;
	$rv .= $join if $join;
	$rv .= qq{prev $pd} if $prev;

	my $rev = $q->qs_html(o => $o < 0 ? 0 : -1);
	$rv .= qq{ | reverse
}; } sub sort_relevance { [ sort { (eval { $b->topmost->{pct} } // 0) <=> (eval { $a->topmost->{pct} } // 0) } @{$_[0]} ] } sub get_pct ($) { # Capped at "99%" since "100%" takes an extra column in the # thread skeleton view. says the value isn't # very meaningful, anyways. my $n = $_[0]->get_percent; $n > 99 ? 99 : $n; } sub mset_thread { my ($ctx, $mset, $q) = @_; my $ibx = $ctx->{-inbox}; my $nshard = $ibx->search->{nshard} // 1; my %pct = map { mdocid($nshard, $_) => get_pct($_) } $mset->items; my $msgs = $ibx->over->get_all(keys %pct); $_->{pct} = $pct{$_->{num}} for @$msgs; my $r = $q->{r}; if ($r) { # for descriptions in search_nav_bot my @pct = values %pct; $q->{-min_pct} = min(@pct); $q->{-max_pct} = max(@pct); } my $rootset = PublicInbox::SearchThread::thread($msgs, $r ? \&sort_relevance : \&PublicInbox::View::sort_ds, $ctx); my $skel = search_nav_bot($mset, $q). "
";
	$ctx->{-upfx} = '';
	$ctx->{anchor_idx} = 1;
	$ctx->{cur_level} = 0;
	$ctx->{skel} = \$skel;
	$ctx->{mapping} = {};
	$ctx->{searchview} = 1;
	$ctx->{prev_attr} = '';
	$ctx->{prev_level} = 0;
	$ctx->{s_nr} = scalar(@$msgs).'+ results';

	# reduce hash lookups in skel_dump
	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
	PublicInbox::View::walk_thread($rootset, $ctx,
		\&PublicInbox::View::pre_thread);

	# link $INBOX_DIR/description text to "recent" view around
	# the newest message in this result set:
	$ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs);

	@$msgs = reverse @$msgs if $r;
	$ctx->{msgs} = $msgs;
	PublicInbox::WwwStream::aresponse($ctx, 200, \&mset_thread_i);
}

# callback for PublicInbox::WwwStream::getline
sub mset_thread_i {
	my ($ctx, $eml) = @_;
	$ctx->zmore($ctx->html_top) if exists $ctx->{-html_tip};
	$eml and return PublicInbox::View::eml_entry($ctx, $eml);
	my $smsg = shift @{$ctx->{msgs}} or
		$ctx->zmore(${delete($ctx->{skel})});
	$smsg;
}

sub ctx_prepare {
	my ($q, $ctx) = @_;
	my $qh = $q->{'q'};
	utf8::decode($qh);
	$qh = ascii_html($qh);
	$ctx->{-q_value_html} = $qh;
	$ctx->{-atom} = '?'.$q->qs_html(x => 'A', r => undef);
	$ctx->{-title_html} = "$qh - search results";
	my $extra = '';
	$extra .= qq{} if $q->{r};
	if (my $x = $q->{x}) {
		$x = ascii_html($x);
		$extra .= qq{};
	}
	$ctx->{-extra_form_html} = $extra;
}

sub adump {
	my ($cb, $mset, $q, $ctx) = @_;
	$ctx->{ids} = $ctx->{-inbox}->search->mset_to_artnums($mset);
	$ctx->{search_query} = $q; # used by WwwAtomStream::atom_header
	PublicInbox::WwwAtomStream->response($ctx, 200, \&adump_i);
}

# callback for PublicInbox::WwwAtomStream::getline
sub adump_i {
	my ($ctx) = @_;
	while (my $num = shift @{$ctx->{ids}}) {
		my $smsg = eval { $ctx->{-inbox}->over->get_art($num) } or next;
		return $smsg;
	}
}

1;
public-inbox-1.6.1/lib/PublicInbox/Sigfd.pm000066400000000000000000000041571377346120300205050ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors 
# License: AGPL-3.0+ 

# Wraps a signalfd (or similar) for PublicInbox::DS
# fields: (sig: hashref similar to %SIG, but signal numbers as keys)
package PublicInbox::Sigfd;
use strict;
use parent qw(PublicInbox::DS);
use PublicInbox::Syscall qw(signalfd EPOLLIN EPOLLET $SFD_NONBLOCK);
use POSIX qw(:signal_h);
use IO::Handle ();

# returns a coderef to unblock signals if neither signalfd or kqueue
# are available.
sub new {
	my ($class, $sig, $flags) = @_;
	my %signo = map {;
		my $cb = $sig->{$_};
		# SIGWINCH is 28 on FreeBSD, NetBSD, OpenBSD
		my $num = ($_ eq 'WINCH' && $^O =~ /linux|bsd/i) ? 28 : do {
			my $m = "SIG$_";
			POSIX->$m;
		};
		$num => $cb;
	} keys %$sig;
	my $self = bless { sig => \%signo }, $class;
	my $io;
	my $fd = signalfd(-1, [keys %signo], $flags);
	if (defined $fd && $fd >= 0) {
		$io = IO::Handle->new_from_fd($fd, 'r+');
	} elsif (eval { require PublicInbox::DSKQXS }) {
		$io = PublicInbox::DSKQXS->signalfd([keys %signo], $flags);
	} else {
		return; # wake up every second to check for signals
	}
	if ($flags & $SFD_NONBLOCK) { # it can go into the event loop
		$self->SUPER::new($io, EPOLLIN | EPOLLET);
	} else { # master main loop
		$self->{sock} = $io;
		$self;
	}
}

# PublicInbox::Daemon in master main loop (blocking)
sub wait_once ($) {
	my ($self) = @_;
	# 128 == sizeof(struct signalfd_siginfo)
	my $r = sysread($self->{sock}, my $buf, 128 * 64);
	if (defined($r)) {
		my $nr = $r / 128 - 1; # $nr may be -1
		for my $off (0..$nr) {
			# the first uint32_t of signalfd_siginfo: ssi_signo
			my $signo = unpack('L', substr($buf, 128 * $off, 4));
			my $cb = $self->{sig}->{$signo};
			$cb->($signo) if $cb ne 'IGNORE';
		}
	}
	$r;
}

# called by PublicInbox::DS in epoll_wait loop
sub event_step {
	while (wait_once($_[0])) {} # non-blocking
}

sub sig_setmask { sigprocmask(SIG_SETMASK, @_) or die "sigprocmask: $!" }

sub block_signals () {
	my $oldset = POSIX::SigSet->new;
	my $newset = POSIX::SigSet->new;
	$newset->fillset or die "fillset: $!";
	sig_setmask($newset, $oldset);
	$oldset;
}

1;
public-inbox-1.6.1/lib/PublicInbox/Smsg.pm000066400000000000000000000103061377346120300203530ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors 
# License: AGPL-3.0+ 
#
# A small/skeleton/slim representation of a message.

# This used to be "SearchMsg", but we split out overview
# indexing into over.sqlite3 so it's not just "search".  There
# may be many of these objects loaded in memory at once for
# large threads in our WWW UI and the NNTP range responses.
package PublicInbox::Smsg;
use strict;
use warnings;
use base qw(Exporter);
our @EXPORT_OK = qw(subject_normalized);
use PublicInbox::MID qw(mids);
use PublicInbox::Address;
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
use Time::Local qw(timegm);

sub get_val ($$) {
	my ($doc, $col) = @_;
	# sortable_unserialise is defined by PublicInbox::Search::load_xapian()
	sortable_unserialise($doc->get_value($col));
}

sub to_doc_data {
	my ($self) = @_;
	join("\n",
		$self->{subject},
		$self->{from},
		$self->{references} // '',
		$self->{to},
		$self->{cc},
		$self->{blob},
		$self->{mid},
		$self->{bytes} // '',
		$self->{lines} // ''
	);
}

sub load_from_data ($$) {
	my ($self) = $_[0]; # data = $_[1]
	utf8::decode($_[1]);
	(
		$self->{subject},
		$self->{from},
		$self->{references},

		# To: and Cc: are stored to optimize HDR/XHDR in NNTP since
		# some NNTP clients will use that for message displays.
		# NNTP only, and only stored in Over(view), not Xapian
		$self->{to},
		$self->{cc},

		$self->{blob},
		$self->{mid},

		# NNTP only
		$self->{bytes},
		$self->{lines}
	) = split(/\n/, $_[1]);
}

sub load_expand {
	my ($self, $doc) = @_;
	my $data = $doc->get_data or return;
	$self->{ts} = get_val($doc, PublicInbox::Search::TS());
	my $dt = get_val($doc, PublicInbox::Search::DT());
	my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $dt);
	$self->{ds} = timegm($ss, $mm, $hh, $dd, $mon - 1, $yyyy);
	load_from_data($self, $data);
	$self;
}

sub psgi_cull ($) {
	my ($self) = @_;

	# ghosts don't have ->{from}
	my $from = delete($self->{from}) // '';
	my @n = PublicInbox::Address::names($from);
	$self->{from_name} = join(', ', @n);

	# drop NNTP-only fields which aren't relevant to PSGI results:
	# saves ~80K on a 200 item search result:
	# TODO: we may need to keep some of these for JMAP...
	delete @$self{qw(tid to cc bytes lines)};
	$self;
}

# for Import and v1 non-SQLite WWW code paths
sub populate {
	my ($self, $hdr, $sync) = @_;
	for my $f (qw(From To Cc Subject)) {
		my @all = $hdr->header($f);
		my $val = join(', ', @all);
		$val =~ tr/\r//d;
		# MIME decoding can create NULs, replace them with spaces
		# to protect git and NNTP clients
		$val =~ tr/\0\t\n/   /;

		# rare: in case headers have wide chars (not RFC2047-encoded)
		utf8::decode($val);

		# lower-case fields for read-only stuff
		$self->{lc($f)} = $val;

		# Capitalized From/Subject for git-fast-import
		next if $f eq 'To' || $f eq 'Cc';
		if (scalar(@all) > 1) {
			$val = $all[0];
			$val =~ tr/\r//d;
			$val =~ tr/\0\t\n/   /;
		}
		$self->{$f} = $val if $val ne '';
	}
	$sync //= {};
	$self->{-ds} = [ my @ds = msg_datestamp($hdr, $sync->{autime}) ];
	$self->{-ts} = [ my @ts = msg_timestamp($hdr, $sync->{cotime}) ];
	$self->{ds} //= $ds[0]; # no zone
	$self->{ts} //= $ts[0];

	# for v1 users w/o SQLite
	$self->{mid} //= eval { mids($hdr)->[0] } // '';
}

# no strftime, that is locale-dependent and not for RFC822
my @DoW = qw(Sun Mon Tue Wed Thu Fri Sat);
my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);

sub date ($) { # for NNTP
	my ($self) = @_;
	my $ds = $self->{ds};
	return unless defined $ds;
	my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($ds);
	"$DoW[$wday], " . sprintf("%02d $MoY[$mon] %04d %02d:%02d:%02d +0000",
				$mday, $year+1900, $hour, $min, $sec);
}

sub internaldate { # for IMAP
	my ($self) = @_;
	my ($sec, $min, $hour, $mday, $mon, $year) = gmtime($self->{ts} // 0);
	sprintf("%02d-$MoY[$mon]-%04d %02d:%02d:%02d +0000",
				$mday, $year+1900, $hour, $min, $sec);
}

our $REPLY_RE = qr/^re:\s+/i;

sub subject_normalized ($) {
	my ($subj) = @_;
	$subj =~ s/\A\s+//s; # no leading space
	$subj =~ s/\s+\z//s; # no trailing space
	$subj =~ s/\s+/ /gs; # no redundant spaces
	$subj =~ s/\.+\z//; # no trailing '.'
	$subj =~ s/$REPLY_RE//igo; # remove reply prefix
	$subj;
}

1;
public-inbox-1.6.1/lib/PublicInbox/SolverGit.pm000066400000000000000000000477561377346120300214030ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors 
# License: AGPL-3.0+ 

# "Solve" blobs which don't exist in git code repositories by
# searching inboxes for post-image blobs.

# this emits a lot of debugging/tracing information which may be
# publicly viewed over HTTP(S).  Be careful not to expose
# local filesystem layouts in the process.
package PublicInbox::SolverGit;
use strict;
use v5.10.1;
use File::Temp 0.19 (); # 0.19 for ->newdir
use Fcntl qw(SEEK_SET);
use PublicInbox::Git qw(git_unquote git_quote);
use PublicInbox::MsgIter qw(msg_part_text);
use PublicInbox::Qspawn;
use PublicInbox::Tmpfile;
use PublicInbox::GitAsyncCat;
use PublicInbox::Eml;
use URI::Escape qw(uri_escape_utf8);

# POSIX requires _POSIX_ARG_MAX >= 4096, and xargs is required to
# subtract 2048 bytes.  We also don't factor in environment variable
# headroom into this.
use POSIX qw(sysconf _SC_ARG_MAX);
my $ARG_SIZE_MAX = (sysconf(_SC_ARG_MAX) || 4096) - 2048;
my $OID_MIN = 7;

# By default, "git format-patch" generates filenames with a four-digit
# prefix, so that means 9999 patch series are OK, right? :>
# Maybe we can make this configurable, main concern is disk space overhead
# for uncompressed patch fragments.  Aside from space, public-inbox-httpd
# is otherwise unaffected by having many patches, here, as it can share
# work fairly.  Other PSGI servers may have trouble, though.
my $MAX_PATCH = 9999;

my $LF = qr!\r?\n!;
my $ANY = qr![^\r\n]+!;
my $MODE = '100644|120000|100755';
my $FN = qr!(?:("?[^/\n]+/[^\r\n]+)|/dev/null)!;
my %BAD_COMPONENT = ('' => 1, '.' => 1, '..' => 1);

# di = diff info / a hashref with information about a diff ($di):
# {
#	oid_a => abbreviated pre-image oid,
#	oid_b => abbreviated post-image oid,
#	tmp => anonymous file handle with the diff,
#	hdr_lines => string of various header lines for mode information
#	mode_a => original mode of oid_a (string, not integer),
#	ibx => PublicInbox::Inbox object containing the diff
#	smsg => PublicInbox::Smsg object containing diff
#	path_a => pre-image path
#	path_b => post-image path
#	n => numeric path of the patch (relative to worktree)
# }

sub dbg ($$) {
	print { $_[0]->{out} } $_[1], "\n" or ERR($_[0], "print(dbg): $!");
}

sub done ($$) {
	my ($self, $res) = @_;
	my $ucb = delete($self->{user_cb}) or return;
	$ucb->($res, $self->{uarg});
}

sub ERR ($$) {
	my ($self, $err) = @_;
	print { $self->{out} } $err, "\n";
	eval { done($self, $err) };
	die $err;
}

# look for existing objects already in git repos, returns arrayref
# if found, number of remaining git coderepos to try if not.
sub solve_existing ($$) {
	my ($self, $want) = @_;
	my $try = $want->{try_gits} //= [ @{$self->{gits}} ]; # array copy
	my $git = shift @$try or die 'BUG {try_gits} empty';
	my $oid_b = $want->{oid_b};
	my ($oid_full, $type, $size) = $git->check($oid_b);

	# other than {oid_b, try_gits, try_ibxs}
	my $have_hints = scalar keys %$want > 3;
	if (defined($type) && (!$have_hints || $type eq 'blob')) {
		delete $want->{try_gits};
		return [ $git, $oid_full, $type, int($size) ]; # done, success
	}

	# TODO: deal with 40-char "abbreviations" with future SHA-256 git
	return scalar(@$try) if length($oid_b) >= 40;

	# parse stderr of "git cat-file --batch-check"
	my $err = $git->last_check_err;
	my (@oids) = ($err =~ /\b([a-f0-9]{40,})\s+blob\b/g);
	return scalar(@$try) unless scalar(@oids);

	# TODO: do something with the ambiguous array?
	# push @ambiguous, [ $git, @oids ];

	dbg($self, "`$oid_b' ambiguous in " .
			join("\n\t", $git->pub_urls($self->{psgi_env}))
			. "\n" .
			join('', map { "$_ blob\n" } @oids));
	scalar(@$try);
}

sub extract_diff ($$) {
	my ($p, $arg) = @_;
	my ($self, $want, $smsg) = @$arg;
	my ($part) = @$p; # ignore $depth and @idx;
	my $ct = $part->content_type || 'text/plain';
	my ($s, undef) = msg_part_text($part, $ct);
	defined $s or return;
	my $post = $want->{oid_b};
	my $pre = $want->{oid_a};
	if (!defined($pre) || $pre !~ /\A[a-f0-9]+\z/) {
		$pre = '[a-f0-9]{7}'; # for RE below
	}

	# Email::MIME::Encodings forces QP to be CRLF upon decoding,
	# change it back to LF:
	my $cte = $part->header('Content-Transfer-Encoding') || '';
	if ($cte =~ /\bquoted-printable\b/i && $part->crlf eq "\n") {
		$s =~ s/\r\n/\n/sg;
	}


	$s =~ m!( # $1 start header lines we save for debugging:

		# everything before ^index is optional, but we don't
		# want to match ^(old|copy|rename|deleted|...) unless
		# we match /^diff --git/ first:
		(?: # begin optional stuff:

		# try to get the pre-and-post filenames as $2 and $3
		(?:^diff\x20--git\x20$FN\x20$FN$LF)

		(?:^(?: # pass all this to git-apply:
			# old mode $4
			(?:old\x20mode\x20($MODE))
			|
			# new mode (possibly new file) ($5)
			(?:new\x20(?:file\x20)?mode\x20($MODE))
			|
			(?:(?:copy|rename|deleted|
				dissimilarity|similarity)$ANY)
		)$LF)*

		)? # end of optional stuff, everything below is required

		# match the pre and post-image OIDs as $6 $7
		^index\x20(${pre}[a-f0-9]*)\.\.(${post}[a-f0-9]*)
			# mode if unchanged $8
			(?:\x20(100644|120000|100755))?$LF
	) # end of header lines ($1)
	( # $9 is the patch body
		# "--- a/foo.c" sets pre-filename ($10) in case
		# $2 is missing
		(?:^---\x20$FN$LF)

		# "+++ b/foo.c" sets post-filename ($11) in case
		# $3 is missing
		(?:^\+{3}\x20$FN$LF)

		# the meat of the diff, including "^\\No newline ..."
		# We also allow for totally blank lines w/o leading spaces,
		# because git-apply(1) handles that case, too
		(?:^(?:[\@\+\x20\-\\][^\n]*|)$LF)+
	)!smx or return;

	my $di = {
		hdr_lines => $1,
		oid_a => $6,
		oid_b => $7,
		mode_a => $5 // $8 // $4, # new (file) // unchanged // old
	};
	my $path_a = $2 // $10;
	my $path_b = $3 // $11;
	my $patch = $9;

	# don't care for leading 'a/' and 'b/'
	my (undef, @a) = split(m{/}, git_unquote($path_a)) if defined($path_a);
	my (undef, @b) = split(m{/}, git_unquote($path_b));

	# get rid of path-traversal attempts and junk patches:
	# it's junk at best, an attack attempt at worse:
	foreach (@a, @b) { return if $BAD_COMPONENT{$_} }

	$di->{path_a} = join('/', @a) if @a;
	$di->{path_b} = join('/', @b);

	my $path = ++$self->{tot};
	$di->{n} = $path;
	open(my $tmp, '>:utf8', $self->{tmp}->dirname . "/$path") or
		die "open(tmp): $!";
	print $tmp $di->{hdr_lines}, $patch or die "print(tmp): $!";
	close $tmp or die "close(tmp): $!";

	# for debugging/diagnostics:
	$di->{ibx} = $want->{cur_ibx};
	$di->{smsg} = $smsg;

	push @{$self->{tmp_diffs}}, $di;
}

sub path_searchable ($) { defined($_[0]) && $_[0] =~ m!\A[\w/\. \-]+\z! }

# ".." appears in path names, which confuses Xapian into treating
# it as a range query.  So we split on ".." since Xapian breaks
# on punctuation anyways:
sub filename_query ($) {
	join('', map { qq( dfn:"$_") } split(/\.\./, $_[0]));
}

sub find_smsgs ($$$) {
	my ($self, $ibx, $want) = @_;
	my $srch = $ibx->search or return;

	my $post = $want->{oid_b} or die 'BUG: no {oid_b}';
	$post =~ /\A[a-f0-9]+\z/ or die "BUG: oid_b not hex: $post";

	my $q = "dfpost:$post";
	my $pre = $want->{oid_a};
	if (defined $pre && $pre =~ /\A[a-f0-9]+\z/) {
		$q .= " dfpre:$pre";
	}

	my $path_b = $want->{path_b};
	if (path_searchable($path_b)) {
		$q .= filename_query($path_b);

		my $path_a = $want->{path_a};
		if (path_searchable($path_a) && $path_a ne $path_b) {
			$q .= filename_query($path_a);
		}
	}
	my $mset = $srch->mset($q, { relevance => 1 });
	$mset->size ? $srch->mset_to_smsg($ibx, $mset) : undef;
}

sub update_index_result ($$) {
	my ($bref, $self) = @_;
	my ($qsp, $msg) = delete @$self{qw(-qsp -msg)};
	if (my $err = $qsp->{err}) {
		ERR($self, "git update-index error: $err");
	}
	dbg($self, $msg);
	next_step($self); # onto do_git_apply
}

sub prepare_index ($) {
	my ($self) = @_;
	my $patches = $self->{patches};
	$self->{nr} = 0;

	my $di = $patches->[0] or die 'no patches';
	my $oid_a = $di->{oid_a} or die '{oid_a} unset';
	my $existing = $self->{found}->{$oid_a};

	# no index creation for added files
	$oid_a =~ /\A0+\z/ and return next_step($self);

	die "BUG: $oid_a not found" unless $existing;

	my $oid_full = $existing->[1];
	my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full";
	my $mode_a = $di->{mode_a} // '100644';

	my $in = tmpfile("update-index.$oid_full") or die "tmpfile: $!";
	print $in "$mode_a $oid_full\t$path_a\0" or die "print: $!";
	$in->flush or die "flush: $!";
	sysseek($in, 0, SEEK_SET) or die "seek: $!";

	dbg($self, 'preparing index');
	my $rdr = { 0 => $in };
	my $cmd = [ qw(git update-index -z --index-info) ];
	my $qsp = PublicInbox::Qspawn->new($cmd, $self->{git_env}, $rdr);
	$path_a = git_quote($path_a);
	$self->{-qsp} = $qsp;
	$self->{-msg} = "index prepared:\n$mode_a $oid_full\t$path_a";
	$qsp->psgi_qx($self->{psgi_env}, undef, \&update_index_result, $self);
}

# pure Perl "git init"
sub do_git_init ($) {
	my ($self) = @_;
	my $dir = $self->{tmp}->dirname;
	my $git_dir = "$dir/git";

	foreach ('', qw(objects refs objects/info refs/heads)) {
		mkdir("$git_dir/$_") or die "mkdir $_: $!";
	}
	open my $fh, '>', "$git_dir/config" or die "open git/config: $!";
	print $fh <<'EOF' or die "print git/config $!";
[core]
	repositoryFormatVersion = 0
	filemode = true
	bare = false
	fsyncObjectfiles = false
	logAllRefUpdates = false
EOF
	close $fh or die "close git/config: $!";

	open $fh, '>', "$git_dir/HEAD" or die "open git/HEAD: $!";
	print $fh "ref: refs/heads/master\n" or die "print git/HEAD: $!";
	close $fh or die "close git/HEAD: $!";

	my $f = 'objects/info/alternates';
	open $fh, '>', "$git_dir/$f" or die "open: $f: $!";
	foreach my $git (@{$self->{gits}}) {
		print $fh $git->git_path('objects'),"\n" or die "print $f: $!";
	}
	close $fh or die "close: $f: $!";
	my $tmp_git = $self->{tmp_git} = PublicInbox::Git->new($git_dir);
	$tmp_git->{-tmp} = $self->{tmp};
	$self->{git_env} = {
		GIT_DIR => $git_dir,
		GIT_INDEX_FILE => "$git_dir/index",
	};
	prepare_index($self);
}

sub do_finish ($) {
	my ($self) = @_;
	my ($found, $oid_want) = @$self{qw(found oid_want)};
	if (my $exists = $found->{$oid_want}) {
		return done($self, $exists);
	}

	# let git disambiguate if oid_want was too short,
	# but long enough to be unambiguous:
	my $tmp_git = $self->{tmp_git};
	if (my @res = $tmp_git->check($oid_want)) {
		return done($self, $found->{$res[0]});
	}
	if (my $err = $tmp_git->last_check_err) {
		dbg($self, $err);
	}
	done($self, undef);
}

sub event_step ($) {
	my ($self) = @_;
	eval {
		# step 1: resolve blobs to patches in the todo queue
		if (my $want = pop @{$self->{todo}}) {
			# this populates {patches} and {todo}
			resolve_patch($self, $want);

		# step 2: then we instantiate a working tree once
		# the todo queue is finally empty:
		} elsif (!defined($self->{tmp_git})) {
			do_git_init($self);

		# step 3: apply each patch in the stack
		} elsif (scalar @{$self->{patches}}) {
			do_git_apply($self);

		# step 4: execute the user-supplied callback with
		# our result: (which may be undef)
		# Other steps may call user_cb to terminate prematurely
		# on error
		} elsif (exists $self->{user_cb}) {
			do_finish($self);
		} else {
			die 'about to call user_cb twice'; # Oops :x
		}
	}; # eval
	my $err = $@;
	if ($err) {
		$err =~ s/^\s*Exception:\s*//; # bad word to show users :P
		dbg($self, "E: $err");
		eval { done($self, $err) };
	}
}

sub next_step ($) {
	my ($self) = @_;
	# if outside of public-inbox-httpd, caller is expected to be
	# looping event_step, anyways
	my $async = $self->{psgi_env}->{'pi-httpd.async'} or return;
	# PublicInbox::HTTPD::Async->new
	$async->(undef, undef, $self);
}

sub mark_found ($$$) {
	my ($self, $oid, $found_info) = @_;
	my $found = $self->{found};
	$found->{$oid} = $found_info;
	my $oid_cur = $found_info->[1];
	while ($oid_cur ne $oid && length($oid_cur) > $OID_MIN) {
		$found->{$oid_cur} = $found_info;
		chop($oid_cur);
	}
}

sub parse_ls_files ($$) {
	my ($self, $bref) = @_;
	my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)};
	if (my $err = $qsp->{err}) {
		die "git ls-files error: $err";
	}

	my ($line, @extra) = split(/\0/, $$bref);
	scalar(@extra) and die "BUG: extra files in index: <",
				join('> <', @extra), ">";

	my ($info, $file) = split(/\t/, $line, 2);
	my ($mode_b, $oid_b_full, $stage) = split(/ /, $info);
	if ($file ne $di->{path_b}) {
		die
"BUG: index mismatch: file=$file != path_b=$di->{path_b}";
	}

	my $tmp_git = $self->{tmp_git} or die 'no git working tree';
	my (undef, undef, $size) = $tmp_git->check($oid_b_full);
	defined($size) or die "check $oid_b_full failed";

	dbg($self, "index at:\n$mode_b $oid_b_full\t$file");
	my $created = [ $tmp_git, $oid_b_full, 'blob', $size, $di ];
	mark_found($self, $di->{oid_b}, $created);
	next_step($self); # onto the next patch
}

sub ls_files_result {
	my ($bref, $self) = @_;
	eval { parse_ls_files($self, $bref) };
	ERR($self, $@) if $@;
}

sub oids_same_ish ($$) {
	(index($_[0], $_[1]) == 0) || (index($_[1], $_[0]) == 0);
}

sub skip_identical ($$$) {
	my ($self, $patches, $cur_oid_b) = @_;
	while (my $nxt = $patches->[0]) {
		if (oids_same_ish($cur_oid_b, $nxt->{oid_b})) {
			dbg($self, 'skipping '.di_url($self, $nxt).
				" for $cur_oid_b");
			shift @$patches;
		} else {
			return;
		}
	}
}

sub apply_result ($$) {
	my ($bref, $self) = @_;
	my ($qsp, $di) = delete @$self{qw(-qsp -cur_di)};
	dbg($self, $$bref);
	my $patches = $self->{patches};
	if (my $err = $qsp->{err}) {
		my $msg = "git apply error: $err";
		my $nxt = $patches->[0];
		if ($nxt && oids_same_ish($nxt->{oid_b}, $di->{oid_b})) {
			dbg($self, $msg);
			dbg($self, 'trying '.di_url($self, $nxt));
			return do_git_apply($self);
		} else {
			ERR($self, $msg);
		}
	} else {
		skip_identical($self, $patches, $di->{oid_b});
	}

	my @cmd = qw(git ls-files -s -z);
	$qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env});
	$self->{-cur_di} = $di;
	$self->{-qsp} = $qsp;
	$qsp->psgi_qx($self->{psgi_env}, undef, \&ls_files_result, $self);
}

sub do_git_apply ($) {
	my ($self) = @_;
	my $dn = $self->{tmp}->dirname;
	my $patches = $self->{patches};

	# we need --ignore-whitespace because some patches are CRLF
	my @cmd = (qw(git apply --cached --ignore-whitespace
			--unidiff-zero --whitespace=warn --verbose));
	my $len = length(join(' ', @cmd));
	my $total = $self->{tot};
	my $di; # keep track of the last one for "git ls-files"
	my $prv_oid_b;

	do {
		my $i = ++$self->{nr};
		$di = shift @$patches;
		dbg($self, "\napplying [$i/$total] " . di_url($self, $di) .
			"\n" . $di->{hdr_lines});
		my $path = $di->{n};
		$len += length($path) + 1;
		push @cmd, $path;
		$prv_oid_b = $di->{oid_b};
	} while (@$patches && $len < $ARG_SIZE_MAX &&
		 !oids_same_ish($patches->[0]->{oid_b}, $prv_oid_b));

	my $opt = { 2 => 1, -C => $dn, quiet => 1 };
	my $qsp = PublicInbox::Qspawn->new(\@cmd, $self->{git_env}, $opt);
	$self->{-cur_di} = $di;
	$self->{-qsp} = $qsp;
	$qsp->psgi_qx($self->{psgi_env}, undef, \&apply_result, $self);
}

sub di_url ($$) {
	my ($self, $di) = @_;
	# note: we don't pass the PSGI env unconditionally, here,
	# different inboxes can have different HTTP_HOST on the same instance.
	my $ibx = $di->{ibx};
	my $env = $self->{psgi_env} if $ibx eq $self->{inboxes}->[0];
	my $url = $ibx->base_url($env);
	my $mid = $di->{smsg}->{mid};
	defined($url) ? "$url$mid/" : "<$mid>";
}

sub retry_current {
	my ($self, $want) = @_;
	push @{$self->{todo}}, $want;
	next_step($self); # retry solve_existing
}

sub try_harder ($$) {
	my ($self, $want) = @_;

	# do we have more inboxes to try?
	return retry_current($self, $want) if scalar @{$want->{try_ibxs}};

	my $cur_want = $want->{oid_b};
	if (length($cur_want) > $OID_MIN) { # maybe a shorter OID will work
		delete $want->{try_ibxs}; # drop empty arrayref
		chop($cur_want);
		dbg($self, "retrying $want->{oid_b} as $cur_want");
		$want->{oid_b} = $cur_want;
		return retry_current($self, $want); # retry with shorter abbrev
	}

	dbg($self, "could not find $cur_want");
	eval { done($self, undef) };
	die "E: $@" if $@;
}

sub extract_diffs_done {
	my ($self, $want) = @_;

	delete $want->{try_smsgs};
	delete $want->{cur_ibx};

	my $diffs = delete $self->{tmp_diffs};
	if (scalar @$diffs) {
		unshift @{$self->{patches}}, @$diffs;
		dbg($self, "found $want->{oid_b} in " .  join(" ||\n\t",
			map { di_url($self, $_) } @$diffs));

		# good, we can find a path to the oid we $want, now
		# lets see if we need to apply more patches:
		my $di = $diffs->[0];
		my $src = $di->{oid_a};

		unless ($src =~ /\A0+\z/) {
			# we have to solve it using another oid, fine:
			my $job = { oid_b => $src, path_b => $di->{path_a} };
			push @{$self->{todo}}, $job;
		}
		return next_step($self); # onto the next todo item
	}
	try_harder($self, $want);
}

sub extract_diff_async {
	my ($bref, $oid, $type, $size, $x) = @_;
	my ($self, $want, $smsg) = @$x;
	if (defined($oid)) {
		$smsg->{blob} eq $oid or
				ERR($self, "BUG: $smsg->{blob} != $oid");
		PublicInbox::Eml->new($bref)->each_part(\&extract_diff, $x, 1);
	}

	scalar(@{$want->{try_smsgs}}) ? retry_current($self, $want)
					: extract_diffs_done($self, $want);
}

sub resolve_patch ($$) {
	my ($self, $want) = @_;

	my $cur_want = $want->{oid_b};
	if (scalar(@{$self->{patches}}) > $MAX_PATCH) {
		die "Aborting, too many steps to $self->{oid_want}";
	}

	if (my $msgs = $want->{try_smsgs}) {
		my $smsg = shift @$msgs;
		if ($self->{psgi_env}->{'pi-httpd.async'}) {
			return git_async_cat($want->{cur_ibx}->git,
						$smsg->{blob},
						\&extract_diff_async,
						[$self, $want, $smsg]);
		} else {
			if (my $eml = $want->{cur_ibx}->smsg_eml($smsg)) {
				$eml->each_part(\&extract_diff,
						[ $self, $want, $smsg ], 1);
			}
		}

		return scalar(@$msgs) ? retry_current($self, $want)
					: extract_diffs_done($self, $want);
	}

	# see if we can find the blob in an existing git repo:
	if (!$want->{try_ibxs} && $self->{seen_oid}->{$cur_want}++) {
		die "Loop detected solving $cur_want\n";
	}
	$want->{try_ibxs} //= [ @{$self->{inboxes}} ]; # array copy
	my $existing = solve_existing($self, $want);
	if (ref $existing) {
		my ($found_git, undef, $type, undef) = @$existing;
		dbg($self, "found $cur_want in " .
			join(" ||\n\t",
				$found_git->pub_urls($self->{psgi_env})));

		if ($cur_want eq $self->{oid_want} || $type ne 'blob') {
			eval { done($self, $existing) };
			die "E: $@" if $@;
			return;
		}
		mark_found($self, $cur_want, $existing);
		return next_step($self); # onto patch application
	} elsif ($existing > 0) {
		return retry_current($self, $want);
	} else { # $existing == 0: we may retry if inbox scan (below) fails
		delete $want->{try_gits};
	}

	# scan through inboxes to look for emails which results in
	# the oid we want:
	my $ibx = shift(@{$want->{try_ibxs}}) or die 'BUG: {try_ibxs} empty';
	if (my $msgs = find_smsgs($self, $ibx, $want)) {
		$want->{try_smsgs} = $msgs;
		$want->{cur_ibx} = $ibx;
		$self->{tmp_diffs} = [];
		return retry_current($self, $want);
	}
	try_harder($self, $want);
}

# this API is designed to avoid creating self-referential structures;
# so user_cb never references the SolverGit object
sub new {
	my ($class, $ibx, $user_cb, $uarg) = @_;

	bless {
		gits => $ibx->{-repo_objs},
		user_cb => $user_cb,
		uarg => $uarg,
		# -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks

		# TODO: config option for searching related inboxes
		inboxes => [ $ibx ],
	}, $class;
}

# recreate $oid_want using $hints
# hints keys: path_a, path_b, oid_a (note: `oid_b' is NOT a hint)
# Calls {user_cb} with: [ ::Git object, oid_full, type, size, di (diff_info) ]
# with found object, or undef if nothing was found
# Calls {user_cb} with a string error on fatal errors
sub solve ($$$$$) {
	my ($self, $env, $out, $oid_want, $hints) = @_;

	# should we even get here? Probably not, but somebody
	# could be manually typing URLs:
	return done($self, undef) if $oid_want =~ /\A0+\z/;

	$self->{oid_want} = $oid_want;
	$self->{out} = $out;
	$self->{seen_oid} = {};
	$self->{tot} = 0;
	$self->{psgi_env} = $env;
	$self->{todo} = [ { %$hints, oid_b => $oid_want } ];
	$self->{patches} = []; # [ $di, $di, ... ]
	$self->{found} = {}; # { abbr => [ ::Git, oid, type, size, $di ] }
	$self->{tmp} = File::Temp->newdir("solver.$oid_want-XXXXXXXX", TMPDIR => 1);

	dbg($self, "solving $oid_want ...");
	if (my $async = $env->{'pi-httpd.async'}) {
		# PublicInbox::HTTPD::Async->new
		$async->(undef, undef, $self);
	} else {
		event_step($self) while $self->{user_cb};
	}
}

1;
public-inbox-1.6.1/lib/PublicInbox/Spamcheck.pm000066400000000000000000000011641377346120300213420ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors 
# License: AGPL-3.0+ 

# Spamchecking used by -watch and -mda tools
package PublicInbox::Spamcheck;
use strict;
use warnings;

sub get {
	my ($config, $key, $default) = @_;
	my $spamcheck = $config->{$key};
	$spamcheck = $default unless $spamcheck;

	return if !$spamcheck || $spamcheck eq 'none';

	if ($spamcheck eq 'spamc') {
		$spamcheck = 'PublicInbox::Spamcheck::Spamc';
	}
	if ($spamcheck =~ /::/) {
		eval "require $spamcheck";
		return $spamcheck->new;
	}
	warn "unsupported $key=$spamcheck\n";
	undef;
}

1;
public-inbox-1.6.1/lib/PublicInbox/Spamcheck/000077500000000000000000000000001377346120300210025ustar00rootroot00000000000000public-inbox-1.6.1/lib/PublicInbox/Spamcheck/Spamc.pm000066400000000000000000000034341377346120300224070ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# Default spam filter class for wrapping spamc(1)
package PublicInbox::Spamcheck::Spamc;
use strict;
use warnings;
use PublicInbox::Spawn qw(popen_rd spawn);
use IO::Handle;
use Fcntl qw(SEEK_SET);

sub new {
	my ($class) = @_;
	bless {
		checkcmd => [qw(spamc -E --headers)],
		hamcmd => [qw(spamc -L ham)],
		spamcmd => [qw(spamc -L spam)],
	}, $class;
}

sub spamcheck {
	my ($self, $msg, $out) = @_;

	my $rdr = { 0 => _msg_to_fh($self, $msg) };
	my ($fh, $pid) = popen_rd($self->{checkcmd}, undef, $rdr);
	unless (ref $out) {
		my $buf = '';
		$out = \$buf;
	}
	$$out = do { local $/; <$fh> };
	close $fh or die "close failed: $!";
	waitpid($pid, 0);
	($? || $$out eq '') ? 0 : 1;
}

sub hamlearn {
	my ($self, $msg, $rdr) = @_;
	_learn($self, $msg, $rdr, 'hamcmd');
}

sub spamlearn {
	my ($self, $msg, $rdr) = @_;
	_learn($self, $msg, $rdr, 'spamcmd');
}

sub _learn {
	my ($self, $msg, $rdr, $field) = @_;
	$rdr ||= {};
	$rdr->{0} = _msg_to_fh($self, $msg);
	$rdr->{1} ||= $self->_devnull;
	$rdr->{2} ||= $self->_devnull;
	my $pid = spawn($self->{$field}, undef, $rdr);
	waitpid($pid, 0);
	!$?;
}

sub _devnull {
	my ($self) = @_;
	$self->{-devnull} //= do {
		open my $fh, '+>', '/dev/null' or
				die "failed to open /dev/null: $!";
		$fh
	}
}

sub _msg_to_fh {
	my ($self, $msg) = @_;
	if (my $ref = ref($msg)) {
		my $fd = eval { fileno($msg) };
		return $msg if defined($fd) && $fd >= 0;

		open(my $tmpfh, '+>', undef) or die "failed to open: $!";
		$tmpfh->autoflush(1);
		$msg = \($msg->as_string) if $ref ne 'SCALAR';
		print $tmpfh $$msg or die "failed to print: $!";
		sysseek($tmpfh, 0, SEEK_SET) or
			die "sysseek(fh) failed: $!";

		return $tmpfh;
	}
	$msg;
}

1;
public-inbox-1.6.1/lib/PublicInbox/Spawn.pm000066400000000000000000000206301377346120300205330ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 
#
# This allows vfork to be used for spawning subprocesses if
# ~/.cache/public-inbox/inline-c is writable or if PERL_INLINE_DIRECTORY
# is explicitly defined in the environment (and writable).
# Under Linux, vfork can make a big difference in spawning performance
# as process size increases (fork still needs to mark pages for CoW use).
# Currently, we only use this for code intended for long running
# daemons (inside the PSGI code (-httpd) and -nntpd).  The short-lived
# scripts (-mda, -index, -learn, -init) either use IPC::run or standard
# Perl routines.
#
# There'll probably be more OS-level C stuff here, down the line.
# We don't want too many DSOs: https://udrepper.livejournal.com/8790.html

package PublicInbox::Spawn;
use strict;
use parent qw(Exporter);
use Symbol qw(gensym);
use PublicInbox::ProcessPipe;
our @EXPORT_OK = qw/which spawn popen_rd nodatacow_dir/;
our @RLIMITS = qw(RLIMIT_CPU RLIMIT_CORE RLIMIT_DATA);

my $vfork_spawn = <<'VFORK_SPAWN';
#include 
#include 
#include 
#include 
#include 
#include 

/* some platforms need alloca.h, but some don't */
#if defined(__GNUC__) && !defined(alloca)
#  define alloca(sz) __builtin_alloca(sz)
#endif

#include 
#include 

/*
 * From the av_len apidoc:
 *   Note that, unlike what the name implies, it returns
 *   the highest index in the array, so to get the size of
 *   the array you need to use "av_len(av) + 1".
 *   This is unlike "sv_len", which returns what you would expect.
 */
#define AV2C_COPY(dst, src) do { \
	I32 i; \
	I32 top_index = av_len(src); \
	I32 real_len = top_index + 1; \
	I32 capa = real_len + 1; \
	dst = alloca(capa * sizeof(char *)); \
	for (i = 0; i < real_len; i++) { \
		SV **sv = av_fetch(src, i, 0); \
		dst[i] = SvPV_nolen(*sv); \
	} \
	dst[real_len] = 0; \
} while (0)

/* needs to be safe inside a vfork'ed process */
static void exit_err(int *cerrnum)
{
	*cerrnum = errno;
	_exit(1);
}

/*
 * unstable internal API.  It'll be updated depending on
 * whatever we'll need in the future.
 * Be sure to update PublicInbox::SpawnPP if this changes
 */
int pi_fork_exec(SV *redirref, SV *file, SV *cmdref, SV *envref, SV *rlimref,
		 const char *cd)
{
	AV *redir = (AV *)SvRV(redirref);
	AV *cmd = (AV *)SvRV(cmdref);
	AV *env = (AV *)SvRV(envref);
	AV *rlim = (AV *)SvRV(rlimref);
	const char *filename = SvPV_nolen(file);
	pid_t pid;
	char **argv, **envp;
	sigset_t set, old, cset;
	int ret, perrnum, cerrnum = 0;

	AV2C_COPY(argv, cmd);
	AV2C_COPY(envp, env);

	ret = sigfillset(&set);
	assert(ret == 0 && "BUG calling sigfillset");
	ret = sigprocmask(SIG_SETMASK, &set, &old);
	assert(ret == 0 && "BUG calling sigprocmask to block");
	ret = sigemptyset(&cset);
	assert(ret == 0 && "BUG calling sigemptyset");
	ret = sigaddset(&cset, SIGCHLD);
	assert(ret == 0 && "BUG calling sigaddset for SIGCHLD");
	pid = vfork();
	if (pid == 0) {
		int sig;
		I32 i, child_fd, max = av_len(redir);

		for (child_fd = 0; child_fd <= max; child_fd++) {
			SV **parent = av_fetch(redir, child_fd, 0);
			int parent_fd = SvIV(*parent);
			if (parent_fd == child_fd)
				continue;
			if (dup2(parent_fd, child_fd) < 0)
				exit_err(&cerrnum);
		}
		for (sig = 1; sig < NSIG; sig++)
			signal(sig, SIG_DFL); /* ignore errors on signals */
		if (*cd && chdir(cd) < 0)
			exit_err(&cerrnum);

		max = av_len(rlim);
		for (i = 0; i < max; i += 3) {
			struct rlimit rl;
			SV **res = av_fetch(rlim, i, 0);
			SV **soft = av_fetch(rlim, i + 1, 0);
			SV **hard = av_fetch(rlim, i + 2, 0);

			rl.rlim_cur = SvIV(*soft);
			rl.rlim_max = SvIV(*hard);
			if (setrlimit(SvIV(*res), &rl) < 0)
				exit_err(&cerrnum);
		}

		/*
		 * don't bother unblocking other signals for now, just SIGCHLD.
		 * we don't want signals to the group taking out a subprocess
		 */
		(void)sigprocmask(SIG_UNBLOCK, &cset, NULL);
		execve(filename, argv, envp);
		exit_err(&cerrnum);
	}
	perrnum = errno;
	ret = sigprocmask(SIG_SETMASK, &old, NULL);
	assert(ret == 0 && "BUG calling sigprocmask to restore");
	if (cerrnum) {
		if (pid > 0)
			waitpid(pid, NULL, 0);
		pid = -1;
		errno = cerrnum;
	} else if (perrnum) {
		errno = perrnum;
	}
	return (int)pid;
}
VFORK_SPAWN

# btrfs on Linux is copy-on-write (COW) by default.  As of Linux 5.7,
# this still leads to fragmentation for SQLite and Xapian files where
# random I/O happens, so we disable COW just for SQLite files and Xapian
# directories.  Disabling COW disables checksumming, so we only do this
# for regeneratable files, and not canonical git storage (git doesn't
# checksum refs, only data under $GIT_DIR/objects).
my $set_nodatacow = $^O eq 'linux' ? <<'SET_NODATACOW' : '';
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

void nodatacow_fd(int fd)
{
	struct statfs buf;
	int val = 0;

	if (fstatfs(fd, &buf) < 0) {
		fprintf(stderr, "fstatfs: %s\\n", strerror(errno));
		return;
	}

	/* only btrfs is known to have this problem, so skip for non-btrfs */
	if (buf.f_type != BTRFS_SUPER_MAGIC)
		return;

	if (ioctl(fd, FS_IOC_GETFLAGS, &val) < 0) {
		fprintf(stderr, "FS_IOC_GET_FLAGS: %s\\n", strerror(errno));
		return;
	}
	val |= FS_NOCOW_FL;
	if (ioctl(fd, FS_IOC_SETFLAGS, &val) < 0)
		fprintf(stderr, "FS_IOC_SET_FLAGS: %s\\n", strerror(errno));
}

void nodatacow_dir(const char *dir)
{
	DIR *dh = opendir(dir);
	int fd;

	if (!dh) croak("opendir(%s): %s", dir, strerror(errno));
	fd = dirfd(dh);
	if (fd >= 0)
		nodatacow_fd(fd);
	/* ENOTSUP probably won't happen under Linux... */
	closedir(dh);
}
SET_NODATACOW

my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= (
		$ENV{XDG_CACHE_HOME} //
		( ($ENV{HOME} // '/nonexistent').'/.cache' )
	).'/public-inbox/inline-c';

$set_nodatacow = $vfork_spawn = undef unless -d $inline_dir && -w _;
if (defined $vfork_spawn) {
	# Inline 0.64 or later has locking in multi-process env,
	# but we support 0.5 on Debian wheezy
	use Fcntl qw(:flock);
	eval {
		my $f = "$inline_dir/.public-inbox.lock";
		open my $fh, '>', $f or die "failed to open $f: $!\n";
		flock($fh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
		eval 'use Inline C => $vfork_spawn . $set_nodatacow';
		my $err = $@;
		my $ndc_err;
		if ($err && $set_nodatacow) { # missing Linux kernel headers
			$ndc_err = $err;
			undef $set_nodatacow;
			eval 'use Inline C => $vfork_spawn';
		}
		flock($fh, LOCK_UN) or die "LOCK_UN failed on $f: $!\n";
		die $err if $err;
		warn $ndc_err if $ndc_err;
	};
	if ($@) {
		warn "Inline::C failed for vfork: $@\n";
		$set_nodatacow = $vfork_spawn = undef;
	}
}

unless (defined $vfork_spawn) {
	require PublicInbox::SpawnPP;
	*pi_fork_exec = \&PublicInbox::SpawnPP::pi_fork_exec
}
unless ($set_nodatacow) {
	require PublicInbox::NDC_PP;
	no warnings 'once';
	*nodatacow_fd = \&PublicInbox::NDC_PP::nodatacow_fd;
	*nodatacow_dir = \&PublicInbox::NDC_PP::nodatacow_dir;
}
undef $set_nodatacow;
undef $vfork_spawn;

sub which ($) {
	my ($file) = @_;
	return $file if index($file, '/') >= 0;
	foreach my $p (split(':', $ENV{PATH})) {
		$p .= "/$file";
		return $p if -x $p;
	}
	undef;
}

sub spawn ($;$$) {
	my ($cmd, $env, $opts) = @_;
	my $f = which($cmd->[0]);
	defined $f or die "$cmd->[0]: command not found\n";
	my @env;
	$opts ||= {};

	my %env = $env ? (%ENV, %$env) : %ENV;
	while (my ($k, $v) = each %env) {
		push @env, "$k=$v";
	}
	my $redir = [];
	for my $child_fd (0..2) {
		my $parent_fd = $opts->{$child_fd};
		if (defined($parent_fd) && $parent_fd !~ /\A[0-9]+\z/) {
			defined(my $fd = fileno($parent_fd)) or
					die "$parent_fd not an IO GLOB? $!";
			$parent_fd = $fd;
		}
		$redir->[$child_fd] = $parent_fd // $child_fd;
	}
	my $rlim = [];

	foreach my $l (@RLIMITS) {
		defined(my $v = $opts->{$l}) or next;
		my $r = eval "require BSD::Resource; BSD::Resource::$l();";
		unless (defined $r) {
			warn "$l undefined by BSD::Resource: $@\n";
			next;
		}
		push @$rlim, $r, @$v;
	}
	my $cd = $opts->{'-C'} // ''; # undef => NULL mapping doesn't work?
	my $pid = pi_fork_exec($redir, $f, $cmd, \@env, $rlim, $cd);
	die "fork_exec @$cmd failed: $!\n" unless $pid > 0;
	$pid;
}

sub popen_rd {
	my ($cmd, $env, $opts) = @_;
	pipe(my ($r, $w)) or die "pipe: $!\n";
	$opts ||= {};
	$opts->{1} = fileno($w);
	my $pid = spawn($cmd, $env, $opts);
	return ($r, $pid) if wantarray;
	my $ret = gensym;
	tie *$ret, 'PublicInbox::ProcessPipe', $pid, $r;
	$ret;
}

1;
public-inbox-1.6.1/lib/PublicInbox/SpawnPP.pm000066400000000000000000000034311377346120300207730ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 

# Pure-Perl implementation of "spawn".  This can't take advantage
# of vfork, so no speedups under Linux for spawning from large processes.
package PublicInbox::SpawnPP;
use strict;
use warnings;
use POSIX qw(dup2 :signal_h);

# Pure Perl implementation for folks that do not use Inline::C
sub pi_fork_exec ($$$$$$) {
	my ($redir, $f, $cmd, $env, $rlim, $cd) = @_;
	my $old = POSIX::SigSet->new();
	my $set = POSIX::SigSet->new();
	$set->fillset or die "fillset failed: $!";
	sigprocmask(SIG_SETMASK, $set, $old) or die "can't block signals: $!";
	my $syserr;
	my $pid = fork;
	unless (defined $pid) { # compat with Inline::C version
		$syserr = $!;
		$pid = -1;
	}
	if ($pid == 0) {
		while (@$rlim) {
			my ($r, $soft, $hard) = splice(@$rlim, 0, 3);
			BSD::Resource::setrlimit($r, $soft, $hard) or
			  warn "failed to set $r=[$soft,$hard]\n";
		}
		for my $child_fd (0..$#$redir) {
			my $parent_fd = $redir->[$child_fd];
			next if $parent_fd == $child_fd;
			dup2($parent_fd, $child_fd) or
				die "dup2($parent_fd, $child_fd): $!\n";
		}
		if ($cd ne '') {
			chdir $cd or die "chdir $cd: $!";
		}
		$SIG{$_} = 'DEFAULT' for keys %SIG;
		my $cset = POSIX::SigSet->new();
		$cset->addset(POSIX::SIGCHLD) or die "can't add SIGCHLD: $!";
		sigprocmask(SIG_UNBLOCK, $cset) or
					die "can't unblock SIGCHLD: $!";
		if ($ENV{MOD_PERL}) {
			exec which('env'), '-i', @$env, @$cmd;
			die "exec env -i ... $cmd->[0] failed: $!\n";
		} else {
			local %ENV = map { split(/=/, $_, 2) } @$env;
			my @cmd = @$cmd;
			$cmd[0] = $f;
			exec @cmd;
			die "exec $cmd->[0] failed: $!\n";
		}
	}
	sigprocmask(SIG_SETMASK, $old) or die "can't unblock signals: $!";
	$! = $syserr;
	$pid;
}

1;
public-inbox-1.6.1/lib/PublicInbox/Syscall.pm000066400000000000000000000213021377346120300210520ustar00rootroot00000000000000# This is a fork of the (for now) unmaintained Sys::Syscall 0.25,
# specifically the Debian libsys-syscall-perl 0.25-6 version to
# fix upstream regressions in 0.25.
#
# This license differs from the rest of public-inbox
#
# This module is Copyright (c) 2005 Six Apart, Ltd.
# Copyright (C) 2019-2020 all contributors 
#
# All rights reserved.
#
# You may distribute under the terms of either the GNU General Public
# License or the Artistic License, as specified in the Perl README file.
package PublicInbox::Syscall;
use strict;
use parent qw(Exporter);
use POSIX qw(ENOSYS O_NONBLOCK);
use Config;

# $VERSION = '0.25'; # Sys::Syscall version
our @EXPORT_OK = qw(epoll_ctl epoll_create epoll_wait
                  EPOLLIN EPOLLOUT EPOLLET
                  EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
                  EPOLLONESHOT EPOLLEXCLUSIVE
                  signalfd $SFD_NONBLOCK);
our %EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait
                             EPOLLIN EPOLLOUT
                             EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
                             EPOLLONESHOT EPOLLEXCLUSIVE)],
                );

use constant {
	EPOLLIN => 1,
	EPOLLOUT => 4,
	# EPOLLERR => 8,
	# EPOLLHUP => 16,
	# EPOLLRDBAND => 128,
	EPOLLEXCLUSIVE => (1 << 28),
	EPOLLONESHOT => (1 << 30),
	EPOLLET => (1 << 31),
	EPOLL_CTL_ADD => 1,
	EPOLL_CTL_DEL => 2,
	EPOLL_CTL_MOD => 3,
};

our $loaded_syscall = 0;

sub _load_syscall {
    # props to Gaal for this!
    return if $loaded_syscall++;
    my $clean = sub {
        delete @INC{qw};
    };
    $clean->(); # don't trust modules before us
    my $rv = eval { require 'syscall.ph'; 1 } || eval { require 'sys/syscall.ph'; 1 };
    $clean->(); # don't require modules after us trust us
    return $rv;
}


our (
     $SYS_epoll_create,
     $SYS_epoll_ctl,
     $SYS_epoll_wait,
     $SYS_signalfd4,
     );

my $SFD_CLOEXEC = 02000000; # Perl does not expose O_CLOEXEC
our $SFD_NONBLOCK = O_NONBLOCK;
our $no_deprecated = 0;

if ($^O eq "linux") {
    my $machine = (POSIX::uname())[-1];
    # whether the machine requires 64-bit numbers to be on 8-byte
    # boundaries.
    my $u64_mod_8 = 0;

    # if we're running on an x86_64 kernel, but a 32-bit process,
    # we need to use the x32 or i386 syscall numbers.
    if ($machine eq "x86_64" && $Config{ptrsize} == 4) {
        $machine = $Config{cppsymbols} =~ /\b__ILP32__=1\b/ ? 'x32' : 'i386';
    }

    # Similarly for mips64 vs mips
    if ($machine eq "mips64" && $Config{ptrsize} == 4) {
        $machine = "mips";
    }

    if ($machine =~ m/^i[3456]86$/) {
        $SYS_epoll_create = 254;
        $SYS_epoll_ctl    = 255;
        $SYS_epoll_wait   = 256;
        $SYS_signalfd4 = 327;
    } elsif ($machine eq "x86_64") {
        $SYS_epoll_create = 213;
        $SYS_epoll_ctl    = 233;
        $SYS_epoll_wait   = 232;
        $SYS_signalfd4 = 289;
    } elsif ($machine eq 'x32') {
        $SYS_epoll_create = 1073742037;
        $SYS_epoll_ctl = 1073742057;
        $SYS_epoll_wait = 1073742056;
        $SYS_signalfd4 = 1073742113;
    } elsif ($machine eq 'sparc64') {
	$SYS_epoll_create = 193;
	$SYS_epoll_ctl = 194;
	$SYS_epoll_wait = 195;
	$u64_mod_8 = 1;
	$SYS_signalfd4 = 317;
	$SFD_CLOEXEC = 020000000;
    } elsif ($machine =~ m/^parisc/) {
        $SYS_epoll_create = 224;
        $SYS_epoll_ctl    = 225;
        $SYS_epoll_wait   = 226;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 309;
    } elsif ($machine =~ m/^ppc64/) {
        $SYS_epoll_create = 236;
        $SYS_epoll_ctl    = 237;
        $SYS_epoll_wait   = 238;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 313;
    } elsif ($machine eq "ppc") {
        $SYS_epoll_create = 236;
        $SYS_epoll_ctl    = 237;
        $SYS_epoll_wait   = 238;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 313;
    } elsif ($machine =~ m/^s390/) {
        $SYS_epoll_create = 249;
        $SYS_epoll_ctl    = 250;
        $SYS_epoll_wait   = 251;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 322;
    } elsif ($machine eq "ia64") {
        $SYS_epoll_create = 1243;
        $SYS_epoll_ctl    = 1244;
        $SYS_epoll_wait   = 1245;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 289;
    } elsif ($machine eq "alpha") {
        # natural alignment, ints are 32-bits
        $SYS_epoll_create = 407;
        $SYS_epoll_ctl    = 408;
        $SYS_epoll_wait   = 409;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 484;
	$SFD_CLOEXEC = 010000000;
    } elsif ($machine eq "aarch64") {
        $SYS_epoll_create = 20;  # (sys_epoll_create1)
        $SYS_epoll_ctl    = 21;
        $SYS_epoll_wait   = 22;  # (sys_epoll_pwait)
        $u64_mod_8        = 1;
        $no_deprecated    = 1;
        $SYS_signalfd4 = 74;
    } elsif ($machine =~ m/arm(v\d+)?.*l/) {
        # ARM OABI
        $SYS_epoll_create = 250;
        $SYS_epoll_ctl    = 251;
        $SYS_epoll_wait   = 252;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 355;
    } elsif ($machine =~ m/^mips64/) {
        $SYS_epoll_create = 5207;
        $SYS_epoll_ctl    = 5208;
        $SYS_epoll_wait   = 5209;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 5283;
    } elsif ($machine =~ m/^mips/) {
        $SYS_epoll_create = 4248;
        $SYS_epoll_ctl    = 4249;
        $SYS_epoll_wait   = 4250;
        $u64_mod_8        = 1;
        $SYS_signalfd4 = 4324;
    } else {
        # as a last resort, try using the *.ph files which may not
        # exist or may be wrong
        _load_syscall();
        $SYS_epoll_create = eval { &SYS_epoll_create; } || 0;
        $SYS_epoll_ctl    = eval { &SYS_epoll_ctl;    } || 0;
        $SYS_epoll_wait   = eval { &SYS_epoll_wait;   } || 0;

	# Note: do NOT add new syscalls to depend on *.ph, here.
	# Better to miss syscalls (so we can fallback to IO::Poll)
	# than to use wrong ones, since the names are not stable
	# (at least not on FreeBSD), if the actual numbers are.
    }

    if ($u64_mod_8) {
        *epoll_wait = \&epoll_wait_mod8;
        *epoll_ctl = \&epoll_ctl_mod8;
    } else {
        *epoll_wait = \&epoll_wait_mod4;
        *epoll_ctl = \&epoll_ctl_mod4;
    }
}

elsif ($^O eq "freebsd") {
    if ($ENV{FREEBSD_SENDFILE}) {
        # this is still buggy and in development
    }
}

############################################################################
# epoll functions
############################################################################

sub epoll_defined { return $SYS_epoll_create ? 1 : 0; }

sub epoll_create {
	syscall($SYS_epoll_create, $no_deprecated ? 0 : ($_[0]||100)+0);
}

# epoll_ctl wrapper
# ARGS: (epfd, op, fd, events_mask)
sub epoll_ctl_mod4 {
    syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLL", $_[3], $_[2], 0));
}
sub epoll_ctl_mod8 {
    syscall($SYS_epoll_ctl, $_[0]+0, $_[1]+0, $_[2]+0, pack("LLLL", $_[3], 0, $_[2], 0));
}

# epoll_wait wrapper
# ARGS: (epfd, maxevents, timeout (milliseconds), arrayref)
#  arrayref: values modified to be [$fd, $event]
our $epoll_wait_events;
our $epoll_wait_size = 0;
sub epoll_wait_mod4 {
    # resize our static buffer if requested size is bigger than we've ever done
    if ($_[1] > $epoll_wait_size) {
        $epoll_wait_size = $_[1];
        $epoll_wait_events = "\0" x 12 x $epoll_wait_size;
    }
    my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
    for (0..$ct-1) {
        @{$_[3]->[$_]}[1,0] = unpack("LL", substr($epoll_wait_events, 12*$_, 8));
    }
    return $ct;
}

sub epoll_wait_mod8 {
    # resize our static buffer if requested size is bigger than we've ever done
    if ($_[1] > $epoll_wait_size) {
        $epoll_wait_size = $_[1];
        $epoll_wait_events = "\0" x 16 x $epoll_wait_size;
    }
    my $ct;
    if ($no_deprecated) {
        $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0, undef);
    } else {
        $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0);
    }
    for (0..$ct-1) {
        # 16 byte epoll_event structs, with format:
        #    4 byte mask [idx 1]
        #    4 byte padding (we put it into idx 2, useless)
        #    8 byte data (first 4 bytes are fd, into idx 0)
        @{$_[3]->[$_]}[1,2,0] = unpack("LLL", substr($epoll_wait_events, 16*$_, 12));
    }
    return $ct;
}

sub signalfd ($$$) {
	my ($fd, $signos, $flags) = @_;
	if ($SYS_signalfd4) {
		my $set = POSIX::SigSet->new(@$signos);
		syscall($SYS_signalfd4, $fd, "$$set",
			# $Config{sig_count} is NSIG, so this is NSIG/8:
			int($Config{sig_count}/8),
			$flags|$SFD_CLOEXEC);
	} else {
		$! = ENOSYS;
		undef;
	}
}

1;

=head1 WARRANTY

This is free software. IT COMES WITHOUT WARRANTY OF ANY KIND.

=head1 AUTHORS

Brad Fitzpatrick 
public-inbox-1.6.1/lib/PublicInbox/TLS.pm000066400000000000000000000010541377346120300201040ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors 
# License: AGPL-3.0+ 

# IO::Socket::SSL support code
package PublicInbox::TLS;
use strict;
use IO::Socket::SSL;
use PublicInbox::Syscall qw(EPOLLIN EPOLLOUT);
use Carp qw(carp);

sub err () { $SSL_ERROR }

# returns the EPOLL event bit which matches the existing SSL error
sub epollbit () {
	return EPOLLIN if $SSL_ERROR == SSL_WANT_READ;
	return EPOLLOUT if $SSL_ERROR == SSL_WANT_WRITE;
	carp "unexpected SSL error: $SSL_ERROR";
	undef;
}

1;
public-inbox-1.6.1/lib/PublicInbox/TestCommon.pm000066400000000000000000000264251377346120300215430ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors 
# License: AGPL-3.0+ 

# internal APIs used only for tests
package PublicInbox::TestCommon;
use strict;
use parent qw(Exporter);
use v5.10.1;
use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD :seek);
use POSIX qw(dup2);
use IO::Socket::INET;
our @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
	run_script start_script key2sub xsys xqx eml_load tick
	have_xapian_compact);

sub eml_load ($) {
	my ($path, $cb) = @_;
	open(my $fh, '<', $path) or die "open $path: $!";
	require PublicInbox::Eml;
	PublicInbox::Eml->new(\(do { local $/; <$fh> }));
}

sub tmpdir (;$) {
	my ($base) = @_;
	require File::Temp;
	unless (defined $base) {
		($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!);
	}
	my $tmpdir = File::Temp->newdir("pi-$base-$$-XXXXXX", TMPDIR => 1);
	($tmpdir->dirname, $tmpdir);
}

sub tcp_server () {
	IO::Socket::INET->new(
		LocalAddr => '127.0.0.1',
		ReuseAddr => 1,
		Proto => 'tcp',
		Type => Socket::SOCK_STREAM(),
		Listen => 1024,
		Blocking => 0,
	) or Test::More::BAIL_OUT("failed to create TCP server: $!");
}

sub tcp_connect {
	my ($dest, %opt) = @_;
	my $addr = $dest->sockhost . ':' . $dest->sockport;
	my $s = IO::Socket::INET->new(
		Proto => 'tcp',
		Type => Socket::SOCK_STREAM(),
		PeerAddr => $addr,
		%opt,
	) or Test::More::BAIL_OUT("failed to connect to $addr: $!");
	$s->autoflush(1);
	$s;
}

sub require_git ($;$) {
	my ($req, $maybe) = @_;
	my ($req_maj, $req_min, $req_sub) = split(/\./, $req);
	my ($cur_maj, $cur_min, $cur_sub) = (xqx([qw(git --version)])
			=~ /version (\d+)\.(\d+)(?:\.(\d+))?/);

	my $req_int = ($req_maj << 24) | ($req_min << 16) | ($req_sub // 0);
	my $cur_int = ($cur_maj << 24) | ($cur_min << 16) | ($cur_sub // 0);
	if ($cur_int < $req_int) {
		return 0 if $maybe;
		Test::More::plan(skip_all =>
			"git $req+ required, have $cur_maj.$cur_min.$cur_sub");
	}
	1;
}

sub require_mods {
	my @mods = @_;
	my $maybe = pop @mods if $mods[-1] =~ /\A[0-9]+\z/;
	my @need;
	while (my $mod = shift(@mods)) {
		if ($mod eq 'Search::Xapian') {
			if (eval { require PublicInbox::Search } &&
				PublicInbox::Search::load_xapian()) {
				next;
			}
		} elsif ($mod eq 'Search::Xapian::WritableDatabase') {
			if (eval { require PublicInbox::SearchIdx } &&
				PublicInbox::SearchIdx::load_xapian_writable()){
					next;
			}
		} elsif (index($mod, '||') >= 0) { # "Foo||Bar"
			my $ok;
			for my $m (split(/\Q||\E/, $mod)) {
				eval "require $m";
				next if $@;
				$ok = $m;
				last;
			}
			next if $ok;
		} else {
			eval "require $mod";
		}
		if ($@) {
			push @need, $mod;
		} elsif ($mod eq 'IO::Socket::SSL' &&
			# old versions of IO::Socket::SSL aren't supported
			# by libnet, at least:
			# https://rt.cpan.org/Ticket/Display.html?id=100529
				!eval{ IO::Socket::SSL->VERSION(2.007); 1 }) {
			push @need, $@;
		}
	}
	return unless @need;
	my $m = join(', ', @need)." missing for $0";
	Test::More::skip($m, $maybe) if $maybe;
	Test::More::plan(skip_all => $m)
}

sub key2script ($) {
	my ($key) = @_;
	return $key if ($key eq 'git' || index($key, '/') >= 0);
	# n.b. we may have scripts which don't start with "public-inbox" in
	# the future:
	$key =~ s/\A([-\.])/public-inbox$1/;
	'blib/script/'.$key;
}

my @io_mode = ([ *STDIN{IO}, '<&' ], [ *STDOUT{IO}, '>&' ],
		[ *STDERR{IO}, '>&' ]);

sub _prepare_redirects ($) {
	my ($fhref) = @_;
	my $orig_io = [];
	for (my $fd = 0; $fd <= $#io_mode; $fd++) {
		my $fh = $fhref->[$fd] or next;
		my ($oldfh, $mode) = @{$io_mode[$fd]};
		open my $orig, $mode, $oldfh or die "$$oldfh $mode stash: $!";
		$orig_io->[$fd] = $orig;
		open $oldfh, $mode, $fh or die "$$oldfh $mode redirect: $!";
	}
	$orig_io;
}

sub _undo_redirects ($) {
	my ($orig_io) = @_;
	for (my $fd = 0; $fd <= $#io_mode; $fd++) {
		my $fh = $orig_io->[$fd] or next;
		my ($oldfh, $mode) = @{$io_mode[$fd]};
		open $oldfh, $mode, $fh or die "$$oldfh $mode redirect: $!";
	}
}

# $opt->{run_mode} (or $ENV{TEST_RUN_MODE}) allows choosing between
# three ways to spawn our own short-lived Perl scripts for testing:
#
# 0 - (fork|vfork) + execve, the most realistic but slowest
# 1 - (not currently implemented)
# 2 - preloading and running in current process (slightly faster than 1)
#
# 2 is not compatible with scripts which use "exit" (which we'll try to
# avoid in the future).
# The default is 2.
our $run_script_exit_code;
sub RUN_SCRIPT_EXIT () { "RUN_SCRIPT_EXIT\n" };
sub run_script_exit {
	$run_script_exit_code = $_[0] // 0;
	die RUN_SCRIPT_EXIT;
}

my %cached_scripts;
sub key2sub ($) {
	my ($key) = @_;
	$cached_scripts{$key} //= do {
		my $f = key2script($key);
		open my $fh, '<', $f or die "open $f: $!";
		my $str = do { local $/; <$fh> };
		my $pkg = (split(m!/!, $f))[-1];
		$pkg =~ s/([a-z])([a-z0-9]+)(\.t)?\z/\U$1\E$2/;
		$pkg .= "_T" if $3;
		$pkg =~ tr/-.//d;
		$pkg = "PublicInbox::TestScript::$pkg";
		eval <can('main');
	}
}

sub _run_sub ($$$) {
	my ($sub, $key, $argv) = @_;
	local @ARGV = @$argv;
	$run_script_exit_code = undef;
	my $exit_code = eval { $sub->(@$argv) };
	if ($@ eq RUN_SCRIPT_EXIT) {
		$@ = '';
		$exit_code = $run_script_exit_code;
		$? = ($exit_code << 8);
	} elsif (defined($exit_code)) {
		$? = ($exit_code << 8);
	} elsif ($@) { # mimic die() behavior when uncaught
		warn "E: eval-ed $key: $@\n";
		$? = ($! << 8) if $!;
		$? = (255 << 8) if $? == 0;
	} else {
		die "BUG: eval-ed $key: no exit code or \$@\n";
	}
}

sub run_script ($;$$) {
	my ($cmd, $env, $opt) = @_;
	my ($key, @argv) = @$cmd;
	my $run_mode = $ENV{TEST_RUN_MODE} // $opt->{run_mode} // 1;
	my $sub = $run_mode == 0 ? undef : key2sub($key);
	my $fhref = [];
	my $spawn_opt = {};
	for my $fd (0..2) {
		my $redir = $opt->{$fd};
		my $ref = ref($redir);
		if ($ref eq 'SCALAR') {
			open my $fh, '+>', undef or die "open: $!";
			$fhref->[$fd] = $fh;
			$spawn_opt->{$fd} = $fh;
			next if $fd > 0;
			$fh->autoflush(1);
			print $fh $$redir or die "print: $!";
			seek($fh, 0, SEEK_SET) or die "seek: $!";
		} elsif ($ref eq 'GLOB') {
			$spawn_opt->{$fd} = $fhref->[$fd] = $redir;
		} elsif ($ref) {
			die "unable to deal with $ref $redir";
		}
	}
	if ($run_mode == 0) {
		# spawn an independent new process, like real-world use cases:
		require PublicInbox::Spawn;
		my $cmd = [ key2script($key), @argv ];
		my $pid = PublicInbox::Spawn::spawn($cmd, $env, $spawn_opt);
		if (defined $pid) {
			my $r = waitpid($pid, 0);
			defined($r) or die "waitpid: $!";
			$r == $pid or die "waitpid: expected $pid, got $r";
		}
	} else { # localize and run everything in the same process:
		# note: "local *STDIN = *STDIN;" and so forth did not work in
		# old versions of perl
		local %ENV = $env ? (%ENV, %$env) : %ENV;
		local %SIG = %SIG;
		local $0 = join(' ', @$cmd);
		my $orig_io = _prepare_redirects($fhref);
		_run_sub($sub, $key, \@argv);
		_undo_redirects($orig_io);
	}

	# slurp the redirects back into user-supplied strings
	for my $fd (1..2) {
		my $fh = $fhref->[$fd] or next;
		seek($fh, 0, SEEK_SET) or die "seek: $!";
		my $redir = $opt->{$fd};
		local $/;
		$$redir = <$fh>;
	}
	$? == 0;
}

sub tick (;$) {
	my $tick = shift // 0.1;
	select undef, undef, undef, $tick;
	1;
}

sub wait_for_tail ($;$) {
	my ($tail_pid, $want) = @_;
	my $wait = 2;
	if ($^O eq 'linux') { # GNU tail may use inotify
		state $tail_has_inotify;
		return tick if $want < 0 && $tail_has_inotify;
		my $end = time + $wait;
		my @ino;
		do {
			@ino = grep {
				readlink($_) =~ /\binotify\b/
			} glob("/proc/$tail_pid/fd/*");
		} while (!@ino && time <= $end and tick);
		return if !@ino;
		$tail_has_inotify = 1;
		$ino[0] =~ s!/fd/!/fdinfo/!;
		my @info;
		do {
			if (open my $fh, '<', $ino[0]) {
				local $/ = "\n";
				@info = grep(/^inotify wd:/, <$fh>);
			}
		} while (scalar(@info) < $want && time <= $end and tick);
	} else {
		sleep($wait);
	}
}

# like system() built-in, but uses spawn() for env/rdr + vfork
sub xsys {
	my ($cmd, $env, $rdr) = @_;
	if (ref($cmd)) {
		$rdr ||= {};
	} else {
		$cmd = [ @_ ];
		$env = undef;
		$rdr = {};
	}
	run_script($cmd, $env, { %$rdr, run_mode => 0 });
	$? >> 8
}

# like `backtick` or qx{} op, but uses spawn() for env/rdr + vfork
sub xqx {
	my ($cmd, $env, $rdr) = @_;
	$rdr //= {};
	run_script($cmd, $env, { %$rdr, run_mode => 0, 1 => \(my $out) });
	wantarray ? split(/^/m, $out) : $out;
}

sub start_script {
	my ($cmd, $env, $opt) = @_;
	my ($key, @argv) = @$cmd;
	my $run_mode = $ENV{TEST_RUN_MODE} // $opt->{run_mode} // 2;
	my $sub = $run_mode == 0 ? undef : key2sub($key);
	my $tail_pid;
	if (my $tail_cmd = $ENV{TAIL}) {
		my @paths;
		for (@argv) {
			next unless /\A--std(?:err|out)=(.+)\z/;
			push @paths, $1;
		}
		if ($opt) {
			for (1, 2) {
				my $f = $opt->{$_} or next;
				if (!ref($f)) {
					push @paths, $f;
				} elsif (ref($f) eq 'GLOB' && $^O eq 'linux') {
					my $fd = fileno($f);
					my $f = readlink "/proc/$$/fd/$fd";
					push @paths, $f if -e $f;
				}
			}
		}
		if (@paths) {
			defined($tail_pid = fork) or die "fork: $!\n";
			if ($tail_pid == 0) {
				# make sure files exist, first
				open my $fh, '>>', $_ for @paths;
				open(STDOUT, '>&STDERR') or die "1>&2: $!";
				exec(split(' ', $tail_cmd), @paths);
				die "$tail_cmd failed: $!";
			}
			wait_for_tail($tail_pid, scalar @paths);
		}
	}
	defined(my $pid = fork) or die "fork: $!\n";
	if ($pid == 0) {
		eval { PublicInbox::DS->Reset };
		# pretend to be systemd (cf. sd_listen_fds(3))
		# 3 == SD_LISTEN_FDS_START
		my $fd;
		for ($fd = 0; 1; $fd++) {
			my $s = $opt->{$fd};
			last if $fd >= 3 && !defined($s);
			next unless $s;
			my $fl = fcntl($s, F_GETFD, 0);
			if (($fl & FD_CLOEXEC) != FD_CLOEXEC) {
				warn "got FD:".fileno($s)." w/o CLOEXEC\n";
			}
			fcntl($s, F_SETFD, $fl &= ~FD_CLOEXEC);
			dup2(fileno($s), $fd) or die "dup2 failed: $!\n";
		}
		%ENV = (%ENV, %$env) if $env;
		my $fds = $fd - 3;
		if ($fds > 0) {
			$ENV{LISTEN_PID} = $$;
			$ENV{LISTEN_FDS} = $fds;
		}
		$0 = join(' ', @$cmd);
		if ($sub) {
			eval { PublicInbox::DS->Reset };
			_run_sub($sub, $key, \@argv);
			POSIX::_exit($? >> 8);
		} else {
			exec(key2script($key), @argv);
			die "FAIL: ",join(' ', $key, @argv), ": $!\n";
		}
	}
	PublicInboxTestProcess->new($pid, $tail_pid);
}

sub have_xapian_compact () {
	require PublicInbox::Spawn;
	# $ENV{XAPIAN_COMPACT} is used by PublicInbox/Xapcmd.pm, too
	PublicInbox::Spawn::which($ENV{XAPIAN_COMPACT} || 'xapian-compact');
}

package PublicInboxTestProcess;
use strict;

# prevent new threads from inheriting these objects
sub CLONE_SKIP { 1 }

sub new {
	my ($klass, $pid, $tail_pid) = @_;
	bless { pid => $pid, tail_pid => $tail_pid, owner => $$ }, $klass;
}

sub kill {
	my ($self, $sig) = @_;
	CORE::kill($sig // 'TERM', $self->{pid});
}

sub join {
	my ($self, $sig) = @_;
	my $pid = delete $self->{pid} or return;
	CORE::kill($sig, $pid) if defined $sig;
	my $ret = waitpid($pid, 0);
	defined($ret) or die "waitpid($pid): $!";
	$ret == $pid or die "waitpid($pid) != $ret";
}

sub DESTROY {
	my ($self) = @_;
	return if $self->{owner} != $$;
	if (my $tail_pid = delete $self->{tail_pid}) {
		PublicInbox::TestCommon::wait_for_tail($tail_pid, -1);
		CORE::kill('TERM', $tail_pid);
	}
	$self->join('TERM');
}

package PublicInbox::TestCommon::InboxWakeup;
use strict;
sub on_inbox_unlock { ${$_[0]}->($_[1]) }

1;
public-inbox-1.6.1/lib/PublicInbox/Tmpfile.pm000066400000000000000000000020661377346120300210460ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors 
# License: AGPL-3.0+ 
package PublicInbox::Tmpfile;
use strict;
use warnings;
use base qw(Exporter);
our @EXPORT = qw(tmpfile);
use Fcntl qw(:DEFAULT);
use Errno qw(EEXIST);
use File::Spec;

# use tmpfile instead of open(..., '+>', undef) so we can get an
# unlinked filename which makes sense when viewed with lsof
# (at least on Linux)
# And if we ever stop caring to have debuggable filenames, O_TMPFILE :)
sub tmpfile ($;$$) {
	my ($id, $sock, $append) = @_;
	if (defined $sock) {
		# add the socket inode number so we can figure out which
		# socket it belongs to
		my @st = stat($sock);
		$id .= '-ino:'.$st[1];
	}
	$id =~ tr!/!^!;

	my $fl = O_RDWR | O_CREAT | O_EXCL;
	$fl |= O_APPEND if $append;
	do {
		my $fn = File::Spec->tmpdir . "/$id-".time.'-'.rand;
		if (sysopen(my $fh, $fn, $fl, 0600)) { # likely
			unlink($fn) or warn "unlink($fn): $!"; # FS broken
			return $fh; # success
		}
	} while ($! == EEXIST);
	undef  # EMFILE/ENFILE/ENOSPC/ENOMEM
}

1;
public-inbox-1.6.1/lib/PublicInbox/URIimap.pm000066400000000000000000000051751377346120300207600ustar00rootroot00000000000000# Copyright (C) 2020 all contributors 
# License: AGPL-3.0+ 
# cf. RFC 5092, which the `URI' package doesn't support
#
# This depends only on the documented public API of the `URI' dist,
# not on internal `_'-prefixed subclasses such as `URI::_server'
#
#  exists, but it's not in
# common distros.
#
# RFC 2192 also describes ";TYPE="
package PublicInbox::URIimap;
use strict;
use URI::Split qw(uri_split uri_join); # part of URI
use URI::Escape qw(uri_unescape);

my %default_ports = (imap => 143, imaps => 993);

sub new {
	my ($class, $url) = @_;
	$url =~ m!\Aimaps?://! ? bless \$url, $class : undef;
}

sub canonical {
	my ($self) = @_;

	# no #frag in RFC 5092 from what I can tell
	my ($scheme, $auth, $path, $query, $_frag) = uri_split($$self);
	$path =~ s!\A/+!/!; # excessive leading slash

	# lowercase the host portion
	$auth =~ s#\A(.*@)?(.*?)(?::([0-9]+))?\z#
		my $ret = ($1//'').lc($2);
		if (defined(my $port = $3)) {
			if ($default_ports{lc($scheme)} != $port) {
				$ret .= ":$port";
			}
		}
		$ret#ei;

	ref($self)->new(uri_join(lc($scheme), $auth, $path, $query));
}

sub host {
	my ($self) = @_;
	my (undef, $auth) = uri_split($$self);
	$auth =~ s!\A.*?@!!;
	$auth =~ s!:[0-9]+\z!!;
	$auth =~ s!\A\[(.*)\]\z!$1!; # IPv6
	uri_unescape($auth);
}

# unescaped, may be used for globbing
sub path {
	my ($self) = @_;
	my (undef, undef, $path) = uri_split($$self);
	$path =~ s!\A/+!!;
	$path =~ s/;.*\z//; # ;UIDVALIDITY=nz-number
	$path eq '' ? undef : $path;
}

sub mailbox {
	my ($self) = @_;
	my $path = path($self);
	defined($path) ? uri_unescape($path) : undef;
}

# TODO: UIDVALIDITY, search, and other params

sub port {
	my ($self) = @_;
	my ($scheme, $auth) = uri_split($$self);
	$auth =~ /:([0-9]+)\z/ ? $1 + 0 : $default_ports{lc($scheme)};
}

sub authority {
	my ($self) = @_;
	my (undef, $auth) = uri_split($$self);
	$auth
}

sub user {
	my ($self) = @_;
	my (undef, $auth) = uri_split($$self);
	$auth =~ s/@.*\z// or return undef; # drop host:port
	$auth =~ s/;.*\z//; # drop ;AUTH=...
	$auth =~ s/:.*\z//; # drop password
	uri_unescape($auth);
}

sub password {
	my ($self) = @_;
	my (undef, $auth) = uri_split($$self);
	$auth =~ s/@.*\z// or return undef; # drop host:port
	$auth =~ s/;.*\z//; # drop ;AUTH=...
	$auth =~ s/\A[^:]+:// ? uri_unescape($auth) : undef; # drop ->user
}

sub auth {
	my ($self) = @_;
	my (undef, $auth) = uri_split($$self);
	$auth =~ s/@.*\z//; # drop host:port
	$auth =~ /;AUTH=(.+)\z/i ? uri_unescape($1) : undef;
}

sub scheme {
	my ($self) = @_;
	(uri_split($$self))[0];
}

sub as_string { ${$_[0]} }

1;
public-inbox-1.6.1/lib/PublicInbox/Unsubscribe.pm000066400000000000000000000125071377346120300217330ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors 
# License: AGPL-3.0+ 
#
# Standalone PSGI app to handle HTTP(s) unsubscribe links generated
# by milters like examples/unsubscribe.milter to mailing lists.
#
# This does not depend on any other modules in the PublicInbox::*
# and ought to be usable with any mailing list software.
package PublicInbox::Unsubscribe;
use strict;
use warnings;
use Crypt::CBC;
use Plack::Util;
use MIME::Base64 qw(decode_base64url);
my $CODE_URL = 'https://public-inbox.org/public-inbox.git';
my @CT_HTML = ('Content-Type', 'text/html; charset=UTF-8');

sub new {
	my ($class, %opt) = @_;
	my $key_file = $opt{key_file};
	defined $key_file or die "`key_file' needed";
	open my $fh, '<', $key_file or die
		"failed to open key_file=$key_file: $!\n";
	my ($key, $iv);
	if (read($fh, $key, 8) != 8 || read($fh, $iv, 8) != 8 ||
				read($fh, my $end, 8) != 0) {
		die "key_file must be 16 bytes\n";
	}

	# these parameters were chosen to generate shorter parameters
	# to reduce the possibility of copy+paste errors
	my $cipher = Crypt::CBC->new(-key => $key,
			-iv => $iv,
			-header => 'none',
			-cipher => 'Blowfish');

	my $e = $opt{owner_email} or die "`owner_email' not specified\n";
	my $unsubscribe = $opt{unsubscribe} or
		die "`unsubscribe' callback not given\n";

	bless {
		pi_config => $opt{pi_config}, # PublicInbox::Config
		owner_email => $opt{owner_email},
		cipher => $cipher,
		unsubscribe => $unsubscribe,
		contact => qq($e),
		code_url => $opt{code_url} || $CODE_URL,
		confirm => $opt{confirm},
	}, $class;
}

# entry point for PSGI
sub call {
	my ($self, $env) = @_;
	my $m = $env->{REQUEST_METHOD};
	if ($m eq 'GET' || $m eq 'HEAD') {
		$self->{confirm} ? confirm_prompt($self, $env)
				 : finalize_unsub($self, $env);
	} elsif ($m eq 'POST') {
		finalize_unsub($self, $env);
	} else {
		r($self, 405,
			Plack::Util::encode_html($m).' method not allowed');
	}
}

sub _user_list_addr {
	my ($self, $env) = @_;
	my ($blank, $u, $list) = split('/', $env->{PATH_INFO});

	if (!defined $u || $u eq '') {
		return r($self, 400, 'Bad request',
			'Missing encrypted email address in path component');
	}
	if (!defined $list && $list eq '') {
		return r($self, 400, 'Bad request',
			'Missing mailing list name in path component');
	}
	my $user = eval { $self->{cipher}->decrypt(decode_base64url($u)) };
	if (!defined $user || index($user, '@') < 1) {
		my $err = quotemeta($@);
		my $errors = $env->{'psgi.errors'};
		$errors->print("error decrypting: $u\n");
		$errors->print("$_\n") for split("\n", $err);
		$u = Plack::Util::encode_html($u);
		return r($self, 400, 'Bad request', "Failed to decrypt: $u");
	}

	# The URLs are too damn long if we have the encrypted domain
	# name in the PATH_INFO
	if (index($list, '@') < 0) {
		my $host = (split(':', $env->{HTTP_HOST}))[0];
		$list .= '@'.$host;
	}
	($user, $list);
}

sub confirm_prompt { # on GET
	my ($self, $env) = @_;
	my ($user_addr, $list_addr) = _user_list_addr($self, $env);
	return $user_addr if ref $user_addr;

	my $xl = Plack::Util::encode_html($list_addr);
	my $xu = Plack::Util::encode_html($user_addr);
	my @body = (
		"Confirmation required to remove", '',
		"\t$xu", '',
		"from the mailing list at", '',
		"\t$xl", '',
		'You will get one last email once you hit "Confirm" below:',
		qq(
) . qq() . '
');

	push @body, archive_info($self, $env, $list_addr);

	r($self, 200, "Confirm unsubscribe for $xl", @body);
}

sub finalize_unsub { # on POST
	my ($self, $env) = @_;
	my ($user_addr, $list_addr) = _user_list_addr($self, $env);
	return $user_addr if ref $user_addr;

	my @archive = archive_info($self, $env, $list_addr);
	if (my $err = $self->{unsubscribe}->($user_addr, $list_addr)) {
		return r($self, 500, Plack::Util::encode_html($err), @archive);
	}

	my $xl = Plack::Util::encode_html($list_addr);
	r($self, 200, "Unsubscribed from $xl",
		'You may get one final goodbye message', @archive);
}

sub r {
	my ($self, $code, $title, @body) = @_;
	[ $code, [ @CT_HTML ], [
		"$title
".
		join("\n", "$title\n", @body) . '

'. "
This page is available under AGPL-3.0+\n" .
		"git clone $self->{code_url}\n" .
		qq(Email $self->{contact} if you have any questions).
		'
' ] ]; } sub archive_info { my ($self, $env, $list_addr) = @_; my $archive_url = $self->{archive_urls}->{$list_addr}; unless ($archive_url) { if (my $config = $self->{pi_config}) { # PublicInbox::Config::lookup my $ibx = $config->lookup($list_addr); # PublicInbox::Inbox::base_url $archive_url = $ibx->base_url if $ibx; } } # protocol-relative URL: "//example.com/" => "https://example.com/" if ($archive_url && $archive_url =~ m!\A//!) { $archive_url = "$env->{'psgi.url_scheme'}:$archive_url"; } # maybe there are other places where we could map # list_addr => archive_url without ~/.public-inbox/config if ($archive_url) { $archive_url = Plack::Util::encode_html($archive_url); ('', 'HTML and git clone-able archives are available at:', qq($archive_url)) } else { ('', 'There ought to be archives for this list,', 'but unfortunately the admin did not configure '. __PACKAGE__. ' to show you the URL'); } } 1; public-inbox-1.6.1/lib/PublicInbox/UserContent.pm000066400000000000000000000071461377346120300217230ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # Self-updating module containing a sample CSS for client-side # customization by users of public-inbox. Used by Makefile.PL package PublicInbox::UserContent; use strict; use warnings; # this sub is updated automatically: sub CSS () { <<'_' /* * CC0-1.0 * Dark color scheme using 216 web-safe colors, inspired * somewhat by the default color scheme in mutt. * It reduces eyestrain for me, and energy usage for all: * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme */ * { background:#000 !important; color:#ccc !important } /* * Underlined links add visual noise which make them hard-to-read. * Use colors to make them stand out, instead. */ a:link { color:#69f !important; text-decoration:none !important } a:visited { color:#96f !important } /* quoted text in emails gets a different color */ *.q { color:#09f !important } /* * these may be used with cgit , too. * (cgit uses
, public-inbox uses ) */ *.add { color:#0ff !important } /* diff post-image lines */ *.del { color:#f0f !important } /* diff pre-image lines */ *.head { color:#fff !important } /* diff header (metainformation) */ *.hunk { color:#c93 !important } /* diff hunk-header */ /* * highlight 3.x colors (tested 3.18) for displaying blobs. * This doesn't use most of the colors available, as I find too * many colors overwhelming, so the default is commented out. */ .hl.num { color:#f30 !important } /* number */ .hl.esc { color:#f0f !important } /* escape character */ .hl.str { color:#f30 !important } /* string */ .hl.ppc { color:#f0f !important } /* preprocessor */ .hl.pps { color:#f30 !important } /* preprocessor string */ .hl.slc { color:#09f !important } /* single-line comment */ .hl.com { color:#09f !important } /* multi-line comment */ /* .hl.opt { color:#ccc !important } */ /* operator */ /* .hl.ipl { color:#ccc !important } */ /* interpolation */ /* keyword groups kw[a-z] */ .hl.kwa { color:#ff0 !important } .hl.kwb { color:#0f0 !important } .hl.kwc { color:#ff0 !important } /* .hl.kwd { color:#ccc !important } */ /* line-number (unused by public-inbox) */ /* .hl.lin { color:#ccc !important } */ _ } # end of auto-updated sub # return a sample CSS sub sample ($$) { my ($ibx, $env) = @_; my $url_prefix = $ibx->base_url($env); my $preamble = <<""; /* * Firefox users: this goes in \$PROFILE_FOLDER/chrome/userContent.css * where \$PROFILE_FOLDER is platform-specific * * cf. http://kb.mozillazine.org/UserContent.css * http://kb.mozillazine.org/Profile_folder_-_Firefox * * Users of dillo can remove the entire lines with "moz-only" * in them and place the resulting file in ~/.dillo/style.css */ \@-moz-document url-prefix($url_prefix) { /* moz-only */ $preamble . CSS() . "\n} /* moz-only */\n"; } # Auto-update this file based on the contents of a CSS file: # usage: perl -I lib __FILE__ contrib/css/216dark.css # (See Makefile.PL) if (scalar(@ARGV) == 1 && -r __FILE__) { open my $ro, '<', $ARGV[0] or die $!; my $css = do { local $/; <$ro> } or die $!; # indent one level: $css =~ s/^([ \t]*\S)/\t$1/smg; # "!important" overrides whatever the BOFH sets: $css =~ s/;/ !important;/sg; $css =~ s/(\w) \}/$1 !important }/msg; open my $rw, '+<', __FILE__ or die $!; my $out = do { local $/; <$rw> } or die $!; $out =~ s/^sub CSS.*^_\n\}/sub CSS () {\n\t<<'_'\n${css}_\n}/sm; seek $rw, 0, 0; print $rw $out or die $!; close $rw or die $!; } 1; public-inbox-1.6.1/lib/PublicInbox/V2Writable.pm000066400000000000000000001127651377346120300214370ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ # This interface wraps and mimics PublicInbox::Import # Used to write to V2 inboxes (see L). package PublicInbox::V2Writable; use strict; use v5.10.1; use parent qw(PublicInbox::Lock); use PublicInbox::SearchIdxShard; use PublicInbox::Eml; use PublicInbox::Git; use PublicInbox::Import; use PublicInbox::MID qw(mids references); use PublicInbox::ContentHash qw(content_hash content_digest); use PublicInbox::InboxWritable; use PublicInbox::OverIdx; use PublicInbox::Msgmap; use PublicInbox::Spawn qw(spawn popen_rd); use PublicInbox::SearchIdx qw(log2stack crlf_adjust is_ancestor check_size); use IO::Handle; # ->autoflush use File::Temp (); my $OID = qr/[a-f0-9]{40,}/; # an estimate of the post-packed size to the raw uncompressed size my $PACKING_FACTOR = 0.4; # SATA storage lags behind what CPUs are capable of, so relying on # nproc(1) can be misleading and having extra Xapian shards is a # waste of FDs and space. It can also lead to excessive IO latency # and slow things down. Users on NVME or other fast storage can # use the NPROC env or switches in our script/public-inbox-* programs # to increase Xapian shards our $NPROC_MAX_DEFAULT = 4; sub detect_nproc () { # getconf(1) is POSIX, but *NPROCESSORS* vars are not for (qw(_NPROCESSORS_ONLN NPROCESSORS_ONLN)) { `getconf $_ 2>/dev/null` =~ /^(\d+)$/ and return $1; } for my $nproc (qw(nproc gnproc)) { # GNU coreutils nproc `$nproc 2>/dev/null` =~ /^(\d+)$/ and return $1; } # should we bother with `sysctl hw.ncpu`? Those only give # us total processor count, not online processor count. undef } sub nproc_shards ($) { my ($creat_opt) = @_; my $n = $creat_opt->{nproc} if ref($creat_opt) eq 'HASH'; $n //= $ENV{NPROC}; if (!$n) { # assume 2 cores if not detectable or zero state $NPROC_DETECTED = detect_nproc() || 2; $n = $NPROC_DETECTED; $n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT; } # subtract for the main process and git-fast-import $n -= 1; $n < 1 ? 1 : $n; } sub count_shards ($) { my ($self) = @_; # always load existing shards in case core count changes: # Also, shard count may change while -watch is running my $srch = $self->{ibx}->search or return 0; delete $self->{ibx}->{search}; $srch->{nshard} // 0 } sub new { # $creat may be any true value, or 0/undef. A hashref is true, # and $creat->{nproc} may be set to an integer my ($class, $v2ibx, $creat) = @_; $v2ibx = PublicInbox::InboxWritable->new($v2ibx); my $dir = $v2ibx->assert_usable_dir; unless (-d $dir) { if ($creat) { require File::Path; File::Path::mkpath($dir); } else { die "$dir does not exist\n"; } } $v2ibx->umask_prepare; my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION; my $self = { ibx => $v2ibx, im => undef, # PublicInbox::Import parallel => 1, transact_bytes => 0, total_bytes => 0, current_info => '', xpfx => $xpfx, oidx => PublicInbox::OverIdx->new("$xpfx/over.sqlite3"), lock_path => "$dir/inbox.lock", # limit each git repo (epoch) to 1GB or so rotate_bytes => int((1024 * 1024 * 1024) / $PACKING_FACTOR), last_commit => [], # git epoch -> commit }; $self->{oidx}->{-no_fsync} = 1 if $v2ibx->{-no_fsync}; $self->{shards} = count_shards($self) || nproc_shards($creat); bless $self, $class; } # public (for now?) sub init_inbox { my ($self, $shards, $skip_epoch, $skip_artnum) = @_; if (defined $shards) { $self->{parallel} = 0 if $shards == 0; $self->{shards} = $shards if $shards > 0; } $self->idx_init; $self->{mm}->skip_artnum($skip_artnum) if defined $skip_artnum; my $epoch_max = -1; git_dir_latest($self, \$epoch_max); if (defined $skip_epoch && $epoch_max == -1) { $epoch_max = $skip_epoch; } $self->git_init($epoch_max >= 0 ? $epoch_max : 0); $self->done; } # returns undef on duplicate or spam # mimics Import::add and wraps it for v2 sub add { my ($self, $eml, $check_cb) = @_; $self->{ibx}->with_umask(\&_add, $self, $eml, $check_cb); } # indexes a message, returns true if checkpointing is needed sub do_idx ($$$$) { my ($self, $msgref, $mime, $smsg) = @_; $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref); $self->{oidx}->add_overview($mime, $smsg); my $idx = idx_shard($self, $smsg->{num} % $self->{shards}); $idx->index_raw($msgref, $mime, $smsg); my $n = $self->{transact_bytes} += $smsg->{raw_bytes}; $n >= $self->{batch_bytes}; } sub _add { my ($self, $mime, $check_cb) = @_; # spam check: if ($check_cb) { $mime = $check_cb->($mime, $self->{ibx}) or return; } # All pipes (> $^F) known to Perl 5.6+ have FD_CLOEXEC set, # as does SQLite 3.4.1+ (released in 2007-07-20), and # Xapian 1.3.2+ (released 2015-03-15). # For the most part, we can spawn git-fast-import without # leaking FDs to it... $self->idx_init; my ($num, $mid0) = v2_num_for($self, $mime); defined $num or return; # duplicate defined $mid0 or die "BUG: \$mid0 undefined\n"; my $im = $self->importer; my $smsg = bless { mid => $mid0, num => $num }, 'PublicInbox::Smsg'; my $cmt = $im->add($mime, undef, $smsg); # sets $smsg->{ds|ts|blob} $cmt = $im->get_mark($cmt); $self->{last_commit}->[$self->{epoch_max}] = $cmt; my $msgref = delete $smsg->{-raw_email}; if (do_idx($self, $msgref, $mime, $smsg)) { $self->checkpoint; } $cmt; } sub v2_num_for { my ($self, $mime) = @_; my $mids = mids($mime); if (@$mids) { my $mid = $mids->[0]; my $num = $self->{mm}->mid_insert($mid); if (defined $num) { # common case return ($num, $mid); } # crap, Message-ID is already known, hope somebody just resent: foreach my $m (@$mids) { # read-only lookup now safe to do after above barrier # easy, don't store duplicates # note: do not add more diagnostic info here since # it gets noisy on public-inbox-watch restarts return () if content_exists($self, $mime, $m); } # AltId may pre-populate article numbers (e.g. X-Mail-Count # or NNTP article number), use that article number if it's # not in Over. my $altid = $self->{ibx}->{altid}; if ($altid && grep(/:file=msgmap\.sqlite3\z/, @$altid)) { my $num = $self->{mm}->num_for($mid); if (defined $num && !$self->{oidx}->get_art($num)) { return ($num, $mid); } } # very unlikely: warn "<$mid> reused for mismatched content\n"; # try the rest of the mids for(my $i = $#$mids; $i >= 1; $i--) { my $m = $mids->[$i]; $num = $self->{mm}->mid_insert($m); if (defined $num) { warn "alternative <$m> for <$mid> found\n"; return ($num, $m); } } } # none of the existing Message-IDs are good, generate a new one: v2_num_for_harder($self, $mime); } sub v2_num_for_harder { my ($self, $eml) = @_; my $dig = content_digest($eml); my $mid0 = PublicInbox::Import::digest2mid($dig, $eml); my $num = $self->{mm}->mid_insert($mid0); unless (defined $num) { # it's hard to spoof the last Received: header my @recvd = $eml->header_raw('Received'); $dig->add("Received: $_") foreach (@recvd); $mid0 = PublicInbox::Import::digest2mid($dig, $eml); $num = $self->{mm}->mid_insert($mid0); # fall back to a random Message-ID and give up determinism: until (defined($num)) { $dig->add(rand); $mid0 = PublicInbox::Import::digest2mid($dig, $eml); warn "using random Message-ID <$mid0> as fallback\n"; $num = $self->{mm}->mid_insert($mid0); } } PublicInbox::Import::append_mid($eml, $mid0); ($num, $mid0); } sub idx_shard { my ($self, $shard_i) = @_; $self->{idx_shards}->[$shard_i]; } sub _idx_init { # with_umask callback my ($self, $opt) = @_; $self->lock_acquire unless $opt && $opt->{-skip_lock}; $self->{oidx}->create; # xcpdb can change shard count while -watch is idle my $nshards = count_shards($self); $self->{shards} = $nshards if $nshards && $nshards != $self->{shards}; $self->{batch_bytes} = $opt->{batch_size} // $PublicInbox::SearchIdx::BATCH_BYTES; $self->{batch_bytes} *= $self->{shards} if $self->{parallel}; # need to create all shards before initializing msgmap FD # idx_shards must be visible to all forked processes my $max = $self->{shards} - 1; my $idx = $self->{idx_shards} = []; push @$idx, PublicInbox::SearchIdxShard->new($self, $_) for (0..$max); # Now that all subprocesses are up, we can open the FDs # for SQLite: my $mm = $self->{mm} = PublicInbox::Msgmap->new_file( "$self->{ibx}->{inboxdir}/msgmap.sqlite3", $self->{ibx}->{-no_fsync} ? 2 : 1); $mm->{dbh}->begin_work; } # idempotent sub idx_init { my ($self, $opt) = @_; return if $self->{idx_shards}; my $ibx = $self->{ibx}; # do not leak read-only FDs to child processes, we only have these # FDs for duplicate detection so they should not be # frequently activated. delete @$ibx{qw(mm search)}; $ibx->git->cleanup; $self->{parallel} = 0 if ($ibx->{indexlevel}//'') eq 'basic'; if ($self->{parallel}) { pipe(my ($r, $w)) or die "pipe failed: $!"; # pipe for barrier notifications doesn't need to be big, # 1031: F_SETPIPE_SZ fcntl($w, 1031, 4096) if $^O eq 'linux'; $self->{bnote} = [ $r, $w ]; $w->autoflush(1); } $ibx->umask_prepare; $ibx->with_umask(\&_idx_init, $self, $opt); } # returns an array mapping [ epoch => latest_commit ] # latest_commit may be undef if nothing was done to that epoch # $replace_map = { $object_id => $strref, ... } sub _replace_oids ($$$) { my ($self, $mime, $replace_map) = @_; $self->done; my $pfx = "$self->{ibx}->{inboxdir}/git"; my $rewrites = []; # epoch => commit my $max = $self->{epoch_max}; unless (defined($max)) { defined(my $latest = git_dir_latest($self, \$max)) or return; $self->{epoch_max} = $max; } foreach my $i (0..$max) { my $git_dir = "$pfx/$i.git"; -d $git_dir or next; my $git = PublicInbox::Git->new($git_dir); my $im = $self->import_init($git, 0, 1); $rewrites->[$i] = $im->replace_oids($mime, $replace_map); $im->done; } $rewrites; } sub content_hashes ($) { my ($mime) = @_; my @chashes = ( content_hash($mime) ); # We still support Email::MIME, here, and # Email::MIME->as_string doesn't always round-trip, so we may # use a second content_hash my $rt = content_hash(PublicInbox::Eml->new(\($mime->as_string))); push @chashes, $rt if $chashes[0] ne $rt; \@chashes; } sub content_matches ($$) { my ($chashes, $existing) = @_; my $chash = content_hash($existing); foreach (@$chashes) { return 1 if $_ eq $chash } 0 } # used for removing or replacing (purging) sub rewrite_internal ($$;$$$) { my ($self, $old_eml, $cmt_msg, $new_eml, $sref) = @_; $self->idx_init; my ($im, $need_reindex, $replace_map); if ($sref) { $replace_map = {}; # oid => sref $need_reindex = [] if $new_eml; } else { $im = $self->importer; } my $oidx = $self->{oidx}; my $chashes = content_hashes($old_eml); my $removed = []; my $mids = mids($old_eml); # We avoid introducing new blobs into git since the raw content # can be slightly different, so we do not need the user-supplied # message now that we have the mids and content_hash $old_eml = undef; my $mark; foreach my $mid (@$mids) { my %gone; # num => [ smsg, $mime, raw ] my ($id, $prev); while (my $smsg = $oidx->next_by_mid($mid, \$id, \$prev)) { my $msg = get_blob($self, $smsg); if (!defined($msg)) { warn "broken smsg for $mid\n"; next; # continue } my $orig = $$msg; my $cur = PublicInbox::Eml->new($msg); if (content_matches($chashes, $cur)) { $gone{$smsg->{num}} = [ $smsg, $cur, \$orig ]; } } my $n = scalar keys %gone; next unless $n; if ($n > 1) { warn "BUG: multiple articles linked to <$mid>\n", join(',', sort keys %gone), "\n"; } foreach my $num (keys %gone) { my ($smsg, $mime, $orig) = @{$gone{$num}}; # $removed should only be set once assuming # no bugs in our deduplication code: $removed = [ undef, $mime, $smsg ]; my $oid = $smsg->{blob}; if ($replace_map) { $replace_map->{$oid} = $sref; } else { ($mark, undef) = $im->remove($orig, $cmt_msg); $removed->[0] = $mark; } $orig = undef; if ($need_reindex) { # ->replace push @$need_reindex, $smsg; } else { # ->purge or ->remove $self->{mm}->num_delete($num); } unindex_oid_remote($self, $oid, $mid); } } if (defined $mark) { my $cmt = $im->get_mark($mark); $self->{last_commit}->[$self->{epoch_max}] = $cmt; } if ($replace_map && scalar keys %$replace_map) { my $rewrites = _replace_oids($self, $new_eml, $replace_map); return { rewrites => $rewrites, need_reindex => $need_reindex }; } defined($mark) ? $removed : undef; } # public (see PublicInbox::Import->remove), but note the 3rd element # (retval[2]) is not part of the stable API shared with Import->remove sub remove { my ($self, $eml, $cmt_msg) = @_; my $r = $self->{ibx}->with_umask(\&rewrite_internal, $self, $eml, $cmt_msg); defined($r) && defined($r->[0]) ? @$r: undef; } sub _replace ($$;$$) { my ($self, $old_eml, $new_eml, $sref) = @_; my $arg = [ $self, $old_eml, undef, $new_eml, $sref ]; my $rewritten = $self->{ibx}->with_umask(\&rewrite_internal, $self, $old_eml, undef, $new_eml, $sref) or return; my $rewrites = $rewritten->{rewrites}; # ->done is called if there are rewrites since we gc+prune from git $self->idx_init if @$rewrites; for my $i (0..$#$rewrites) { defined(my $cmt = $rewrites->[$i]) or next; $self->{last_commit}->[$i] = $cmt; } $rewritten; } # public sub purge { my ($self, $mime) = @_; my $rewritten = _replace($self, $mime, undef, \'') or return; $rewritten->{rewrites} } # returns the git object_id of $fh, does not write the object to FS sub git_hash_raw ($$) { my ($self, $raw) = @_; # grab the expected OID we have to reindex: pipe(my($in, $w)) or die "pipe: $!"; my $git_dir = $self->{ibx}->git->{git_dir}; my $cmd = ['git', "--git-dir=$git_dir", qw(hash-object --stdin)]; my $r = popen_rd($cmd, undef, { 0 => $in }); print $w $$raw or die "print \$w: $!"; close $w or die "close \$w: $!"; local $/ = "\n"; chomp(my $oid = <$r>); close $r or die "git hash-object failed: $?"; $oid =~ /\A$OID\z/ or die "OID not expected: $oid"; $oid; } sub _check_mids_match ($$$) { my ($old_list, $new_list, $hdrs) = @_; my %old_mids = map { $_ => 1 } @$old_list; my %new_mids = map { $_ => 1 } @$new_list; my @old = keys %old_mids; my @new = keys %new_mids; my $err = "$hdrs may not be changed when replacing\n"; die $err if scalar(@old) != scalar(@new); delete @new_mids{@old}; delete @old_mids{@new}; die $err if (scalar(keys %old_mids) || scalar(keys %new_mids)); } # Changing Message-IDs or References with ->replace isn't supported. # The rules for dealing with messages with multiple or conflicting # Message-IDs are pretty complex and rethreading hasn't been fully # implemented, yet. sub check_mids_match ($$) { my ($old, $new) = @_; _check_mids_match(mids($old), mids($new), 'Message-ID(s)'); _check_mids_match(references($old), references($new), 'References/In-Reply-To'); } # public sub replace ($$$) { my ($self, $old_mime, $new_mime) = @_; check_mids_match($old_mime, $new_mime); # mutt will always add Content-Length:, Status:, Lines: when editing PublicInbox::Import::drop_unwanted_headers($new_mime); my $raw = $new_mime->as_string; my $expect_oid = git_hash_raw($self, \$raw); my $rewritten = _replace($self, $old_mime, $new_mime, \$raw) or return; my $need_reindex = $rewritten->{need_reindex}; # just in case we have bugs in deduplication code: my $n = scalar(@$need_reindex); if ($n > 1) { my $list = join(', ', map { "$_->{num}: <$_->{mid}>" } @$need_reindex); warn <<""; W: rewritten $n messages matching content of original message (expected: 1). W: possible bug in public-inbox, NNTP article IDs and Message-IDs follow: W: $list } # make sure we really got the OID: my ($blob, $type, $bytes) = $self->{ibx}->git->check($expect_oid); $blob eq $expect_oid or die "BUG: $expect_oid not found after replace"; # don't leak FDs to Xapian: $self->{ibx}->git->cleanup; # reindex modified messages: for my $smsg (@$need_reindex) { my $new_smsg = bless { blob => $blob, raw_bytes => $bytes, num => $smsg->{num}, mid => $smsg->{mid}, }, 'PublicInbox::Smsg'; my $sync = { autime => $smsg->{ds}, cotime => $smsg->{ts} }; $new_smsg->populate($new_mime, $sync); do_idx($self, \$raw, $new_mime, $new_smsg); } $rewritten->{rewrites}; } sub last_epoch_commit ($$;$) { my ($self, $i, $cmt) = @_; my $v = PublicInbox::Search::SCHEMA_VERSION(); $self->{mm}->last_commit_xap($v, $i, $cmt); } sub set_last_commits ($) { my ($self) = @_; defined(my $epoch_max = $self->{epoch_max}) or return; my $last_commit = $self->{last_commit}; foreach my $i (0..$epoch_max) { defined(my $cmt = $last_commit->[$i]) or next; $last_commit->[$i] = undef; last_epoch_commit($self, $i, $cmt); } } sub barrier_init { my ($self, $n) = @_; $self->{bnote} or return; --$n; my $barrier = { map { $_ => 1 } (0..$n) }; } sub barrier_wait { my ($self, $barrier) = @_; my $bnote = $self->{bnote} or return; my $r = $bnote->[0]; while (scalar keys %$barrier) { defined(my $l = readline($r)) or die "EOF on barrier_wait: $!"; $l =~ /\Abarrier (\d+)/ or die "bad line on barrier_wait: $l"; delete $barrier->{$1} or die "bad shard[$1] on barrier wait"; } } # public sub checkpoint ($;$) { my ($self, $wait) = @_; if (my $im = $self->{im}) { if ($wait) { $im->barrier; } else { $im->checkpoint; } } my $shards = $self->{idx_shards}; if ($shards) { my $dbh = $self->{mm}->{dbh}; # SQLite msgmap data is second in importance $dbh->commit; # SQLite overview is third $self->{oidx}->commit_lazy; # Now deal with Xapian if ($wait) { my $barrier = $self->barrier_init(scalar @$shards); # each shard needs to issue a barrier command $_->shard_barrier for @$shards; # wait for each Xapian shard $self->barrier_wait($barrier); } else { $_->shard_commit for @$shards; } # last_commit is special, don't commit these until # remote shards are done: $dbh->begin_work; set_last_commits($self); $dbh->commit; $dbh->begin_work; } $self->{total_bytes} += $self->{transact_bytes}; $self->{transact_bytes} = 0; } # issue a write barrier to ensure all data is visible to other processes # and read-only ops. Order of data importance is: git > SQLite > Xapian # public sub barrier { checkpoint($_[0], 1) }; # true if locked and active sub active { !!$_[0]->{im} } # public sub done { my ($self) = @_; my $err = ''; if (my $im = delete $self->{im}) { eval { $im->done }; # PublicInbox::Import::done $err .= "import done: $@\n" if $@; } if (!$err) { eval { checkpoint($self) }; $err .= "checkpoint: $@\n" if $@; } if (my $mm = delete $self->{mm}) { my $m = $err ? 'rollback' : 'commit'; eval { $mm->{dbh}->$m }; $err .= "msgmap $m: $@\n" if $@; } my $shards = delete $self->{idx_shards}; if ($shards) { for (@$shards) { eval { $_->shard_close }; $err .= "shard close: $@\n" if $@; } } eval { $self->{oidx}->dbh_close }; $err .= "over close: $@\n" if $@; delete $self->{bnote}; my $nbytes = $self->{total_bytes}; $self->{total_bytes} = 0; $self->lock_release(!!$nbytes) if $shards; $self->{ibx}->git->cleanup; die $err if $err; } sub fill_alternates ($$) { my ($self, $epoch) = @_; my $pfx = "$self->{ibx}->{inboxdir}/git"; my $all = "$self->{ibx}->{inboxdir}/all.git"; PublicInbox::Import::init_bare($all) unless -d $all; my $info_dir = "$all/objects/info"; my $alt = "$info_dir/alternates"; my (%alt, $new); my $mode = 0644; if (-e $alt) { open(my $fh, '<', $alt) or die "open < $alt: $!\n"; $mode = (stat($fh))[2] & 07777; # we assign a sort score to every alternate and favor # the newest (highest numbered) one because loose objects # require scanning epochs and only the latest epoch is # expected to see loose objects my $score; my $other = 0; # in case admin adds non-epoch repos %alt = map {; if (m!\A\Q../../\E([0-9]+)\.git/objects\z!) { $score = $1 + 0; } else { $score = --$other; } $_ => $score; } split(/\n+/, do { local $/; <$fh> }); } foreach my $i (0..$epoch) { my $dir = "../../git/$i.git/objects"; if (!exists($alt{$dir}) && -d "$pfx/$i.git") { $alt{$dir} = $i; $new = 1; } } return unless $new; my $fh = File::Temp->new(TEMPLATE => 'alt-XXXXXXXX', DIR => $info_dir); my $tmp = $fh->filename; print $fh join("\n", sort { $alt{$b} <=> $alt{$a} } keys %alt), "\n" or die "print $tmp: $!\n"; chmod($mode, $fh) or die "fchmod $tmp: $!\n"; close $fh or die "close $tmp $!\n"; rename($tmp, $alt) or die "rename $tmp => $alt: $!\n"; $fh->unlink_on_destroy(0); } sub git_init { my ($self, $epoch) = @_; my $git_dir = "$self->{ibx}->{inboxdir}/git/$epoch.git"; PublicInbox::Import::init_bare($git_dir); my @cmd = (qw/git config/, "--file=$git_dir/config", 'include.path', '../../all.git/config'); PublicInbox::Import::run_die(\@cmd); fill_alternates($self, $epoch); $git_dir } sub git_dir_latest { my ($self, $max) = @_; $$max = -1; my $pfx = "$self->{ibx}->{inboxdir}/git"; return unless -d $pfx; my $latest; opendir my $dh, $pfx or die "opendir $pfx: $!\n"; while (defined(my $git_dir = readdir($dh))) { $git_dir =~ m!\A([0-9]+)\.git\z! or next; if ($1 > $$max) { $$max = $1; $latest = "$pfx/$git_dir"; } } $latest; } sub importer { my ($self) = @_; my $im = $self->{im}; if ($im) { if ($im->{bytes_added} < $self->{rotate_bytes}) { return $im; } else { $self->{im} = undef; $im->done; $im = undef; $self->checkpoint; my $git_dir = $self->git_init(++$self->{epoch_max}); my $git = PublicInbox::Git->new($git_dir); return $self->import_init($git, 0); } } my $epoch = 0; my $max; my $latest = git_dir_latest($self, \$max); if (defined $latest) { my $git = PublicInbox::Git->new($latest); my $packed_bytes = $git->packed_bytes; my $unpacked_bytes = $packed_bytes / $PACKING_FACTOR; if ($unpacked_bytes >= $self->{rotate_bytes}) { $epoch = $max + 1; } else { $self->{epoch_max} = $max; return $self->import_init($git, $packed_bytes); } } $self->{epoch_max} = $epoch; $latest = $self->git_init($epoch); $self->import_init(PublicInbox::Git->new($latest), 0); } sub import_init { my ($self, $git, $packed_bytes, $tmp) = @_; my $im = PublicInbox::Import->new($git, undef, undef, $self->{ibx}); $im->{bytes_added} = int($packed_bytes / $PACKING_FACTOR); $im->{lock_path} = undef; $im->{path_type} = 'v2'; $self->{im} = $im unless $tmp; $im; } # XXX experimental sub diff ($$$) { my ($mid, $cur, $new) = @_; my $ah = File::Temp->new(TEMPLATE => 'email-cur-XXXXXXXX', TMPDIR => 1); print $ah $cur->as_string or die "print: $!"; $ah->flush or die "flush: $!"; PublicInbox::Import::drop_unwanted_headers($new); my $bh = File::Temp->new(TEMPLATE => 'email-new-XXXXXXXX', TMPDIR => 1); print $bh $new->as_string or die "print: $!"; $bh->flush or die "flush: $!"; my $cmd = [ qw(diff -u), $ah->filename, $bh->filename ]; print STDERR "# MID conflict <$mid>\n"; my $pid = spawn($cmd, undef, { 1 => 2 }); waitpid($pid, 0) == $pid or die "diff did not finish"; } sub get_blob ($$) { my ($self, $smsg) = @_; if (my $im = $self->{im}) { my $msg = $im->cat_blob($smsg->{blob}); return $msg if $msg; } # older message, should be in alternates $self->{ibx}->msg_by_smsg($smsg); } sub content_exists ($$$) { my ($self, $mime, $mid) = @_; my $oidx = $self->{oidx}; my $chashes = content_hashes($mime); my ($id, $prev); while (my $smsg = $oidx->next_by_mid($mid, \$id, \$prev)) { my $msg = get_blob($self, $smsg); if (!defined($msg)) { warn "broken smsg for $mid\n"; next; } my $cur = PublicInbox::Eml->new($msg); return 1 if content_matches($chashes, $cur); # XXX DEBUG_DIFF is experimental and may be removed diff($mid, $cur, $mime) if $ENV{DEBUG_DIFF}; } undef; } sub atfork_child { my ($self) = @_; if (my $shards = $self->{idx_shards}) { $_->atfork_child foreach @$shards; } if (my $im = $self->{im}) { $im->atfork_child; } die "unexpected mm" if $self->{mm}; close $self->{bnote}->[0] or die "close bnote[0]: $!\n"; $self->{bnote}->[1]; } sub reindex_checkpoint ($$) { my ($self, $sync) = @_; $self->{ibx}->git->cleanup; # *async_wait ${$sync->{need_checkpoint}} = 0; my $mm_tmp = $sync->{mm_tmp}; $mm_tmp->atfork_prepare if $mm_tmp; $self->done; # release lock if (my $pr = $sync->{-opt}->{-progress}) { $pr->(sprintf($sync->{-regen_fmt}, ${$sync->{nr}})); } # allow -watch or -mda to write... $self->idx_init($sync->{-opt}); # reacquire lock $mm_tmp->atfork_parent if $mm_tmp; } sub index_oid { # cat_async callback my ($bref, $oid, $type, $size, $arg) = @_; return if $size == 0; # purged my ($num, $mid0); my $eml = PublicInbox::Eml->new($$bref); my $mids = mids($eml); my $chash = content_hash($eml); my $self = $arg->{v2w}; if (scalar(@$mids) == 0) { warn "E: $oid has no Message-ID, skipping\n"; return; } # {unindexed} is unlikely if ((my $unindexed = $arg->{unindexed}) && scalar(@$mids) == 1) { $num = delete($unindexed->{$mids->[0]}); if (defined $num) { $mid0 = $mids->[0]; $self->{mm}->mid_set($num, $mid0); delete($arg->{unindexed}) if !keys(%$unindexed); } } if (!defined($num)) { # reuse if reindexing (or duplicates) my $oidx = $self->{oidx}; for my $mid (@$mids) { ($num, $mid0) = $oidx->num_mid0_for_oid($oid, $mid); last if defined $num; } } $mid0 //= do { # is this a number we got before? $num = $arg->{mm_tmp}->num_for($mids->[0]); defined($num) ? $mids->[0] : undef; }; if (!defined($num)) { for (my $i = $#$mids; $i >= 1; $i--) { $num = $arg->{mm_tmp}->num_for($mids->[$i]); if (defined($num)) { $mid0 = $mids->[$i]; last; } } } if (defined($num)) { $arg->{mm_tmp}->num_delete($num); } else { # never seen $num = $self->{mm}->mid_insert($mids->[0]); if (defined($num)) { $mid0 = $mids->[0]; } else { # rare, try the rest of them, backwards for (my $i = $#$mids; $i >= 1; $i--) { $num = $self->{mm}->mid_insert($mids->[$i]); if (defined($num)) { $mid0 = $mids->[$i]; last; } } } } if (!defined($num)) { warn "E: $oid <", join('> <', @$mids), "> is a duplicate\n"; return; } ++${$arg->{nr}}; my $smsg = bless { raw_bytes => $size, num => $num, blob => $oid, mid => $mid0, }, 'PublicInbox::Smsg'; $smsg->populate($eml, $arg); if (do_idx($self, $bref, $eml, $smsg)) { ${$arg->{need_checkpoint}} = 1; } } # only update last_commit for $i on reindex iff newer than current sub update_last_commit ($$$$) { my ($self, $git, $i, $cmt) = @_; my $last = last_epoch_commit($self, $i); if (defined $last && is_ancestor($git, $last, $cmt)) { my @cmd = (qw(rev-list --count), "$last..$cmt"); chomp(my $n = $git->qx(@cmd)); return if $n ne '' && $n == 0; } last_epoch_commit($self, $i, $cmt); } sub git_dir_n ($$) { "$_[0]->{ibx}->{inboxdir}/git/$_[1].git" } sub last_commits ($$) { my ($self, $epoch_max) = @_; my $heads = []; for (my $i = $epoch_max; $i >= 0; $i--) { $heads->[$i] = last_epoch_commit($self, $i); } $heads; } # returns a revision range for git-log(1) sub log_range ($$$$$) { my ($self, $sync, $git, $i, $tip) = @_; my $opt = $sync->{-opt}; my $pr = $opt->{-progress} if (($opt->{verbose} || 0) > 1); my $cur = $sync->{ranges}->[$i] or do { $pr->("$i.git indexing all of $tip\n") if $pr; return $tip; # all of it }; # fast equality check to avoid (v)fork+execve overhead if ($cur eq $tip) { $sync->{ranges}->[$i] = undef; return; } my $range = "$cur..$tip"; $pr->("$i.git checking contiguity... ") if $pr; if (is_ancestor($git, $cur, $tip)) { # common case $pr->("OK\n") if $pr; my $n = $git->qx(qw(rev-list --count), $range); chomp($n); if ($n == 0) { $sync->{ranges}->[$i] = undef; $pr->("$i.git has nothing new\n") if $pr; return; # nothing to do } $pr->("$i.git has $n changes since $cur\n") if $pr; } else { $pr->("FAIL\n") if $pr; warn <<""; discontiguous range: $range Rewritten history? (in $git->{git_dir}) chomp(my $base = $git->qx('merge-base', $tip, $cur)); if ($base) { $range = "$base..$tip"; warn "found merge-base: $base\n" } else { $range = $tip; warn "discarding history at $cur\n"; } warn <<""; reindexing $git->{git_dir} starting at $range $sync->{unindex_range}->{$i} = "$base..$cur"; } $range; } sub sync_prepare ($$$) { my ($self, $sync, $epoch_max) = @_; my $pr = $sync->{-opt}->{-progress}; my $regen_max = 0; my $head = $self->{ibx}->{ref_head} || 'refs/heads/master'; # reindex stops at the current heads and we later rerun index_sync # without {reindex} my $reindex_heads = last_commits($self, $epoch_max) if $sync->{reindex}; for (my $i = $epoch_max; $i >= 0; $i--) { my $git_dir = git_dir_n($self, $i); -d $git_dir or next; # missing epochs are fine my $git = PublicInbox::Git->new($git_dir); if ($reindex_heads) { $head = $reindex_heads->[$i] or next; } chomp(my $tip = $git->qx(qw(rev-parse -q --verify), $head)); next if $?; # new repo my $range = log_range($self, $sync, $git, $i, $tip) or next; # can't use 'rev-list --count' if we use --diff-filter $pr->("$i.git counting $range ... ") if $pr; # Don't bump num_highwater on --reindex by using {D}. # We intentionally do NOT use {D} in the non-reindex case # because we want NNTP article number gaps from unindexed # messages to show up in mirrors, too. $sync->{D} //= $sync->{reindex} ? {} : undef; # OID_BIN => NR my $stk = log2stack($sync, $git, $range, $self->{ibx}); my $nr = $stk ? $stk->num_records : 0; $pr->("$nr\n") if $pr; $sync->{stacks}->[$i] = $stk if $stk; $regen_max += $nr; } # XXX this should not happen unless somebody bypasses checks in # our code and blindly injects "d" file history into git repos if (my @leftovers = keys %{delete($sync->{D}) // {}}) { warn('W: unindexing '.scalar(@leftovers)." leftovers\n"); my $arg = { v2w => $self }; my $all = $self->{ibx}->git; for my $oid (@leftovers) { $oid = unpack('H*', $oid); $self->{current_info} = "leftover $oid"; $all->cat_async($oid, \&unindex_oid, $arg); } $all->cat_async_wait; } if (!$regen_max && !keys(%{$self->{unindex_range}})) { $sync->{-regen_fmt} = "%u/?\n"; return 0; } # reindex should NOT see new commits anymore, if we do, # it's a problem and we need to notice it via die() my $pad = length($regen_max) + 1; $sync->{-regen_fmt} = "% ${pad}u/$regen_max\n"; $sync->{nr} = \(my $nr = 0); return -1 if $sync->{reindex}; $regen_max + $self->{mm}->num_highwater() || 0; } sub unindex_oid_remote ($$$) { my ($self, $oid, $mid) = @_; my @removed = $self->{oidx}->remove_oid($oid, $mid); for my $num (@removed) { my $idx = idx_shard($self, $num % $self->{shards}); $idx->shard_remove($oid, $num); } } sub unindex_oid ($$;$) { # git->cat_async callback my ($bref, $oid, $type, $size, $sync) = @_; my $self = $sync->{v2w}; my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef; my $mm = $self->{mm}; my $mids = mids(PublicInbox::Eml->new($bref)); undef $$bref; my $oidx = $self->{oidx}; foreach my $mid (@$mids) { my %gone; my ($id, $prev); while (my $smsg = $oidx->next_by_mid($mid, \$id, \$prev)) { $gone{$smsg->{num}} = 1 if $oid eq $smsg->{blob}; } my $n = scalar(keys(%gone)) or next; if ($n > 1) { warn "BUG: multiple articles linked to $oid\n", join(',',sort keys %gone), "\n"; } foreach my $num (keys %gone) { if ($unindexed) { my $mid0 = $mm->mid_for($num); $unindexed->{$mid0} = $num; } $mm->num_delete($num); } unindex_oid_remote($self, $oid, $mid); } } # this is rare, it only happens when we get discontiguous history in # a mirror because the source used -purge or -edit sub unindex ($$$$) { my ($self, $sync, $git, $unindex_range) = @_; my $unindexed = $sync->{unindexed} //= {}; # $mid0 => $num my $before = scalar keys %$unindexed; # order does not matter, here: my @cmd = qw(log --raw -r --no-notes --no-color --no-abbrev --no-renames); my $fh = $git->popen(@cmd, $unindex_range); my $all = $self->{ibx}->git; local $sync->{in_unindex} = 1; while (<$fh>) { /\A:\d{6} 100644 $OID ($OID) [AM]\tm$/o or next; $all->cat_async($1, \&unindex_oid, $sync); } close $fh or die "git log failed: \$?=$?"; $all->cat_async_wait; return unless $sync->{-opt}->{prune}; my $after = scalar keys %$unindexed; return if $before == $after; # ensure any blob can not longer be accessed via dumb HTTP PublicInbox::Import::run_die(['git', "--git-dir=$git->{git_dir}", qw(-c gc.reflogExpire=now gc --prune=all --quiet)]); } sub sync_ranges ($$$) { my ($self, $sync, $epoch_max) = @_; my $reindex = $sync->{reindex}; return last_commits($self, $epoch_max) unless $reindex; return [] if ref($reindex) ne 'HASH'; my $ranges = $reindex->{from}; # arrayref; if (ref($ranges) ne 'ARRAY') { die 'BUG: $reindex->{from} not an ARRAY'; } $ranges; } sub index_xap_only { # git->cat_async callback my ($bref, $oid, $type, $size, $smsg) = @_; my $self = $smsg->{v2w}; my $idx = idx_shard($self, $smsg->{num} % $self->{shards}); $smsg->{raw_bytes} = $size; $idx->index_raw($bref, undef, $smsg); $self->{transact_bytes} += $size; } sub index_xap_step ($$$;$) { my ($self, $sync, $beg, $step) = @_; my $end = $sync->{art_end}; return if $beg > $end; # nothing to do $step //= $self->{shards}; my $ibx = $self->{ibx}; if (my $pr = $sync->{-opt}->{-progress}) { $pr->("Xapian indexlevel=$ibx->{indexlevel} ". "$beg..$end (% $step)\n"); } for (my $num = $beg; $num <= $end; $num += $step) { my $smsg = $ibx->over->get_art($num) or next; $smsg->{v2w} = $self; $ibx->git->cat_async($smsg->{blob}, \&index_xap_only, $smsg); if ($self->{transact_bytes} >= $self->{batch_bytes}) { ${$sync->{nr}} = $num; reindex_checkpoint($self, $sync); } } } sub index_epoch ($$$) { my ($self, $sync, $i) = @_; my $git_dir = git_dir_n($self, $i); -d $git_dir or return; # missing epochs are fine my $git = PublicInbox::Git->new($git_dir); if (my $unindex_range = delete $sync->{unindex_range}->{$i}) { # rare unindex($self, $sync, $git, $unindex_range); } defined(my $stk = $sync->{stacks}->[$i]) or return; $sync->{stacks}->[$i] = undef; my $all = $self->{ibx}->git; while (my ($f, $at, $ct, $oid) = $stk->pop_rec) { $self->{current_info} = "$i.git $oid"; if ($f eq 'm') { my $arg = { %$sync, autime => $at, cotime => $ct }; if ($sync->{max_size}) { $all->check_async($oid, \&check_size, $arg); } else { $all->cat_async($oid, \&index_oid, $arg); } } elsif ($f eq 'd') { $all->cat_async($oid, \&unindex_oid, $sync); } if (${$sync->{need_checkpoint}}) { reindex_checkpoint($self, $sync); } } $all->check_async_wait; $all->cat_async_wait; update_last_commit($self, $git, $i, $stk->{latest_cmt}); } sub xapian_only { my ($self, $opt, $sync, $art_beg) = @_; my $seq = $opt->{sequential_shard}; $art_beg //= 0; local $self->{parallel} = 0 if $seq; $self->idx_init($opt); # acquire lock if (my $art_end = $self->{ibx}->mm->max) { $sync //= { need_checkpoint => \(my $bool = 0), -opt => $opt, v2w => $self, nr => \(my $nr = 0), -regen_fmt => "%u/?\n", }; $sync->{art_end} = $art_end; if ($seq || !$self->{parallel}) { my $shard_end = $self->{shards} - 1; for my $i (0..$shard_end) { index_xap_step($self, $sync, $art_beg + $i); if ($i != $shard_end) { reindex_checkpoint($self, $sync); } } } else { # parallel (maybe) index_xap_step($self, $sync, $art_beg, 1); } } $self->{ibx}->git->cat_async_wait; $self->done; } # public, called by public-inbox-index sub index_sync { my ($self, $opt) = @_; $opt //= {}; return xapian_only($self, $opt) if $opt->{xapian_only}; my $pr = $opt->{-progress}; my $epoch_max; my $latest = git_dir_latest($self, \$epoch_max); return unless defined $latest; my $seq = $opt->{sequential_shard}; my $art_beg; # the NNTP article number we start xapian_only at my $idxlevel = $self->{ibx}->{indexlevel}; local $self->{ibx}->{indexlevel} = 'basic' if $seq; $self->idx_init($opt); # acquire lock fill_alternates($self, $epoch_max); $self->{oidx}->rethread_prepare($opt); my $sync = { need_checkpoint => \(my $bool = 0), unindex_range => {}, # EPOCH => oid_old..oid_new reindex => $opt->{reindex}, -opt => $opt, v2w => $self, }; $sync->{ranges} = sync_ranges($self, $sync, $epoch_max); if (sync_prepare($self, $sync, $epoch_max)) { # tmp_clone seems to fail if inside a transaction, so # we rollback here (because we opened {mm} for reading) # Note: we do NOT rely on DBI transactions for atomicity; # only for batch performance. $self->{mm}->{dbh}->rollback; $self->{mm}->{dbh}->begin_work; $sync->{mm_tmp} = $self->{mm}->tmp_clone($self->{ibx}->{inboxdir}); # xapian_only works incrementally w/o --reindex if ($seq && !$opt->{reindex}) { $art_beg = $sync->{mm_tmp}->max; $art_beg++ if defined($art_beg); } } if ($sync->{max_size} = $opt->{max_size}) { $sync->{index_oid} = \&index_oid; } # work forwards through history index_epoch($self, $sync, $_) for (0..$epoch_max); $self->{oidx}->rethread_done($opt); $self->done; if (my $nr = $sync->{nr}) { my $pr = $sync->{-opt}->{-progress}; $pr->('all.git '.sprintf($sync->{-regen_fmt}, $$nr)) if $pr; } # deal with Xapian shards sequentially if ($seq && delete($sync->{mm_tmp})) { $self->{ibx}->{indexlevel} = $idxlevel; xapian_only($self, $opt, $sync, $art_beg); } # --reindex on the command-line if ($opt->{reindex} && !ref($opt->{reindex}) && $idxlevel ne 'basic') { $self->lock_acquire; my $s0 = PublicInbox::SearchIdx->new($self->{ibx}, 0, 0); if (my $xdb = $s0->idx_acquire) { my $n = $xdb->get_metadata('has_threadid'); $xdb->set_metadata('has_threadid', '1') if $n ne '1'; } $s0->idx_release; $self->lock_release; } # reindex does not pick up new changes, so we rerun w/o it: if ($opt->{reindex}) { my %again = %$opt; $sync = undef; delete @again{qw(rethread reindex -skip_lock)}; index_sync($self, \%again); } } 1; public-inbox-1.6.1/lib/PublicInbox/View.pm000066400000000000000000001056441377346120300203660ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ # # Used for displaying the HTML web interface. # See Documentation/design_www.txt for this. package PublicInbox::View; use strict; use warnings; use bytes (); # only for bytes::length use List::Util qw(max); use PublicInbox::MsgTime qw(msg_datestamp); use PublicInbox::Hval qw(ascii_html obfuscate_addrs prurl mid_href ts2str fmt_ts); use PublicInbox::Linkify; use PublicInbox::MID qw(id_compress mids mids_for_index references $MID_EXTRACT); use PublicInbox::MsgIter; use PublicInbox::Address; use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::Reply; use PublicInbox::ViewDiff qw(flush_diff); use PublicInbox::Eml; use Time::Local qw(timegm); use PublicInbox::Smsg qw(subject_normalized); use constant COLS => 72; use constant INDENT => ' '; use constant TCHILD => '` '; sub th_pfx ($) { $_[0] == 0 ? '' : TCHILD }; sub msg_page_i { my ($ctx, $eml) = @_; if ($eml) { # called by WwwStream::async_eml or getline my $smsg = $ctx->{smsg}; $ctx->{smsg} = $ctx->{over}->next_by_mid(@{$ctx->{next_arg}}); $ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ? "../${\mid_href($smsg->{mid})}/" : ''; my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($eml, $ctx); multipart_text_as_html($eml, $ctx); delete $ctx->{obuf}; $$obuf .= '

'; $$obuf .= html_footer($ctx, $ctx->{first_hdr}) if !$ctx->{smsg}; $$obuf; } else { # called by WwwStream::async_next or getline $ctx->{smsg}; # may be undef } } # /$INBOX/$MSGID/ for unindexed v1 inboxes sub no_over_html ($) { my ($ctx) = @_; my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return; # 404 my $eml = PublicInbox::Eml->new($bref); $ctx->{mhref} = ''; PublicInbox::WwwStream::init($ctx); my $obuf = $ctx->{obuf} = _msg_page_prepare_obuf($eml, $ctx); multipart_text_as_html($eml, $ctx); delete $ctx->{obuf}; $$obuf .= '

'; eval { $$obuf .= html_footer($ctx, $eml) }; html_oneshot($ctx, 200, $obuf); } # public functions: (unstable) sub msg_page { my ($ctx) = @_; my $ibx = $ctx->{-inbox}; $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; my $over = $ctx->{over} = $ibx->over or return no_over_html($ctx); my ($id, $prev); my $next_arg = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ]; my $smsg = $ctx->{smsg} = $over->next_by_mid(@$next_arg) or return; # undef == 404 # allow user to easily browse the range around this message if # they have ->over $ctx->{-t_max} = $smsg->{ts}; PublicInbox::WwwStream::aresponse($ctx, 200, \&msg_page_i); } # /$INBOX/$MESSAGE_ID/#R sub msg_reply ($$) { my ($ctx, $hdr) = @_; my $se_url = 'https://kernel.org/pub/software/scm/git/docs/git-send-email.html'; my $p_url = 'https://en.wikipedia.org/wiki/Posting_style#Interleaved_style'; my $info = ''; my $ibx = $ctx->{-inbox}; if (my $url = $ibx->{infourl}) { $url = prurl($ctx->{env}, $url); $info = qq(\n List information: $url\n); } my ($arg, $link, $reply_to_all) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); if (ref($arg) eq 'SCALAR') { return '
'.ascii_html($$arg).'
'; } # mailto: link only works if address obfuscation is disabled if ($link) { $link = <In-Reply-To header via mailto: links, try the mailto: link EOF } push @$arg, '/path/to/YOUR_REPLY'; $arg = ascii_html(join(" \\\n ", '', @$arg)); <
Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and $reply_to_all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  $p_url
$info
* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email$arg

  $se_url
$link
EOF } sub in_reply_to { my ($hdr) = @_; my $refs = references($hdr); $refs->[-1]; } sub fold_addresses ($) { return $_[0] if length($_[0]) <= COLS; # try to fold on commas after non-word chars before $lim chars, # Try to get the "," preceded by ">" or ")", but avoid folding # on the comma where somebody uses "Lastname, Firstname". # We also try to keep the last and penultimate addresses in # the list on the same line if possible, hence the extra \z # Fall back to folding on spaces at $lim + 1 chars my $lim = COLS - 8; # 8 = "\t" display width my $too_long = $lim + 1; $_[0] =~ s/\s*\z//s; # Email::Simple doesn't strip trailing spaces $_[0] = join("\n\t", ($_[0] =~ /(.{0,$lim}\W(?:,|\z)| .{1,$lim}(?:,|\z)| .{1,$lim}| .{$too_long,}?)(?:\s|\z)/xgo)); } sub _hdr_names_html ($$) { my ($hdr, $field) = @_; my @vals = $hdr->header($field) or return ''; ascii_html(join(', ', PublicInbox::Address::names(join(',', @vals)))); } sub nr_to_s ($$$) { my ($nr, $singular, $plural) = @_; return "0 $plural" if $nr == 0; $nr == 1 ? "$nr $singular" : "$nr $plural"; } # Displays the text of of the message for /$INBOX/$MSGID/[Tt]/ endpoint # this is already inside a
sub eml_entry {
	my ($ctx, $eml) = @_;
	my $smsg = delete $ctx->{smsg};
	my $subj = delete $smsg->{subject};
	my $mid_raw = $smsg->{mid};
	my $id = id_compress($mid_raw, 1);
	my $id_m = 'm'.$id;
	my $root_anchor = $ctx->{root_anchor} || '';
	my $irt;
	my $obfs_ibx = $ctx->{-obfs_ibx};

	$subj = '(no subject)' if $subj eq '';
	my $rv = "* ";
	$subj = ''.ascii_html($subj).'';
	obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
	$subj = "$subj" if $root_anchor eq $id_m;
	$rv .= $subj . "\n";
	$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
	my @tocc;
	my $ds = delete $smsg->{ds}; # for v1 non-Xapian/SQLite users

	# Deleting these fields saves about 400K as we iterate across 1K msgs
	delete @$smsg{qw(ts blob)};

	my $from = _hdr_names_html($eml, 'From');
	obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
	$rv .= "From: $from @ ".fmt_ts($ds)." UTC";
	my $upfx = $ctx->{-upfx};
	my $mhref = $upfx . mid_href($mid_raw) . '/';
	$rv .= qq{ (permalink / };
	$rv .= qq{raw)\n};
	my $to = fold_addresses(_hdr_names_html($eml, 'To'));
	my $cc = fold_addresses(_hdr_names_html($eml, 'Cc'));
	my ($tlen, $clen) = (length($to), length($cc));
	my $to_cc = '';
	if (($tlen + $clen) > COLS) {
		$to_cc .= '  To: '.$to."\n" if $tlen;
		$to_cc .= '  Cc: '.$cc."\n" if $clen;
	} else {
		if ($tlen) {
			$to_cc .= '  To: '.$to;
			$to_cc .= '; +Cc: '.$cc if $clen;
		} else {
			$to_cc .= '  Cc: '.$cc if $clen;
		}
		$to_cc .= "\n";
	}
	obfuscate_addrs($obfs_ibx, $to_cc) if $obfs_ibx;
	$rv .= $to_cc;

	my $mapping = $ctx->{mapping};
	if (!$mapping && (defined($irt) || defined($irt = in_reply_to($eml)))) {
		my $href = $upfx . mid_href($irt) . '/';
		my $html = ascii_html($irt);
		$rv .= qq(In-Reply-To: <$html>\n)
	}
	$rv .= "\n";

	# scan through all parts, looking for displayable text
	$ctx->{mhref} = $mhref;
	$ctx->{obuf} = \$rv;
	$eml->each_part(\&add_text_body, $ctx, 1);
	delete $ctx->{obuf};

	# add the footer
	$rv .= "\n^ ".
		"permalink" .
		" raw" .
		" reply";

	my $hr;
	if (defined(my $pct = $smsg->{pct})) { # used by SearchView.pm
		$rv .= "\t[relevance $pct%]";
		$hr = 1;
	} elsif ($mapping) {
		my $nested = 'nested';
		my $flat = 'flat';
		my $end = '';
		if ($ctx->{flat}) {
			$hr = 1;
			$flat = "$flat";
		} else {
			$nested = "$nested";
		}
		$rv .= "\t[$flat";
		$rv .= "|$nested]";
		$rv .= " $ctx->{s_nr}";
	} else {
		$hr = $ctx->{-hr};
	}

	# do we have more messages? start a new 
 if so
	$rv .= scalar(@{$ctx->{msgs}}) ? '

' : '
' if $hr; $rv; } sub pad_link ($$;$) { my ($mid, $level, $s) = @_; $s ||= '...'; my $id = id_compress($mid, 1); (' 'x19).indent_for($level).th_pfx($level)."($s)\n"; } sub _th_index_lite { my ($mid_raw, $irt, $id, $ctx) = @_; my $rv = ''; my $mapping = $ctx->{mapping} or return $rv; my $pad = ' '; my $mid_map = $mapping->{$mid_raw}; defined $mid_map or return 'public-inbox BUG: '.ascii_html($mid_raw).' not mapped'; my ($attr, $node, $idx, $level) = @$mid_map; my $children = $node->{children}; my $nr_c = scalar @$children; my $nr_s = 0; my $siblings; # delete saves about 200KB on a 1K message thread if (my $refs = delete $node->{references}) { ($$irt) = ($refs =~ m/$MID_EXTRACT\z/o); } my $irt_map = $mapping->{$$irt} if defined $$irt; if (defined $irt_map) { $siblings = $irt_map->[1]->{children}; $nr_s = scalar(@$siblings) - 1; $rv .= $pad . $irt_map->[0]; if ($idx > 0) { my $prev = $siblings->[$idx - 1]; my $pmid = $prev->{mid}; if ($idx > 2) { my $s = ($idx - 1). ' preceding siblings ...'; $rv .= pad_link($pmid, $level, $s); } elsif ($idx == 2) { my $ppmid = $siblings->[0]->{mid}; $rv .= $pad . $mapping->{$ppmid}->[0]; } $rv .= $pad . $mapping->{$pmid}->[0]; } } my $s_s = nr_to_s($nr_s, 'sibling', 'siblings'); my $s_c = nr_to_s($nr_c, 'reply', 'replies'); $attr =~ s!\n\z!\n!s; $attr =~ s! (?:" )?!!s; # no point in dup subject $attr =~ s!]+>([^<]+)!$1!s; # no point linking to self $rv .= "@ $attr"; if ($nr_c) { my $cmid = $children->[0]->{mid}; $rv .= $pad . $mapping->{$cmid}->[0]; if ($nr_c > 2) { my $s = ($nr_c - 1). ' more replies'; $rv .= pad_link($cmid, $level + 1, $s); } elsif (my $cn = $children->[1]) { $rv .= $pad . $mapping->{$cn->{mid}}->[0]; } } my $next = $siblings->[$idx+1] if $siblings && $idx >= 0; if ($next) { my $nmid = $next->{mid}; $rv .= $pad . $mapping->{$nmid}->[0]; my $nnext = $nr_s - $idx; if ($nnext > 2) { my $s = ($nnext - 1).' subsequent siblings'; $rv .= pad_link($nmid, $level, $s); } elsif (my $nn = $siblings->[$idx + 2]) { $rv .= $pad . $mapping->{$nn->{mid}}->[0]; } } $rv .= $pad ."$s_s, $s_c; $ctx->{s_nr}\n"; } # non-recursive thread walker sub walk_thread ($$$) { my ($rootset, $ctx, $cb) = @_; my @q = map { (0, $_, -1) } @$rootset; while (@q) { my ($level, $node, $i) = splice(@q, 0, 3); defined $node or next; $cb->($ctx, $level, $node, $i) or return; ++$level; $i = 0; unshift @q, map { ($level, $_, $i++) } @{$node->{children}}; } } sub pre_thread { # walk_thread callback my ($ctx, $level, $node, $idx) = @_; $ctx->{mapping}->{$node->{mid}} = [ '', $node, $idx, $level ]; skel_dump($ctx, $level, $node); } sub thread_eml_entry { my ($ctx, $eml) = @_; my ($beg, $end) = thread_adj_level($ctx, $ctx->{level}); $beg . '
' . eml_entry($ctx, $eml) . '
' . $end; } sub next_in_queue ($$) { my ($q, $ghost_ok) = @_; while (@$q) { my ($level, $smsg) = splice(@$q, 0, 2); my $cl = $level + 1; unshift @$q, map { ($cl, $_) } @{$smsg->{children}}; return ($level, $smsg) if $ghost_ok || exists($smsg->{blob}); } undef; } sub stream_thread_i { # PublicInbox::WwwStream::getline callback my ($ctx, $eml) = @_; return thread_eml_entry($ctx, $eml) if $eml; return unless exists($ctx->{skel}); my $ghost_ok = $ctx->{nr}++; while (1) { my ($lvl, $smsg) = next_in_queue($ctx->{-queue}, $ghost_ok); if ($smsg) { if (exists $smsg->{blob}) { # next message for cat-file $ctx->{level} = $lvl; if (!$ghost_ok) { # first non-ghost $ctx->{-title_html} = ascii_html($smsg->{subject}); $ctx->zmore($ctx->html_top); } return $smsg; } # buffer the ghost entry and loop $ctx->zmore(ghost_index_entry($ctx, $lvl, $smsg)); } else { # all done $ctx->zmore(join('', thread_adj_level($ctx, 0))); $ctx->zmore(${delete($ctx->{skel})}); return; } } } sub stream_thread ($$) { my ($rootset, $ctx) = @_; $ctx->{-queue} = [ map { (0, $_) } @$rootset ]; PublicInbox::WwwStream::aresponse($ctx, 200, \&stream_thread_i); } # /$INBOX/$MSGID/t/ and /$INBOX/$MSGID/T/ sub thread_html { my ($ctx) = @_; my $mid = $ctx->{mid}; my $ibx = $ctx->{-inbox}; my ($nr, $msgs) = $ibx->over->get_thread($mid); return missing_thread($ctx) if $nr == 0; # link $INBOX_DIR/description text to "index_topics" view around # the newest message in this thread my $t = ts2str($ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs)); my $t_fmt = fmt_ts($ctx->{-t_max}); my $skel = '
';
	$skel .= $nr == 1 ? 'only message in thread' : 'end of thread';
	$skel .= <~$t_fmt UTC | newest]

EOF
	$skel .= "Thread overview: ";
	$skel .= $nr == 1 ? '(only message)' : "$nr+ messages";
	$skel .= " (download: mbox.gz";
	$skel .= " / follow: Atom feed)\n";
	$skel .= "-- links below jump to the message on this page --\n";
	$ctx->{-upfx} = '../../';
	$ctx->{cur_level} = 0;
	$ctx->{skel} = \$skel;
	$ctx->{prev_attr} = '';
	$ctx->{prev_level} = 0;
	$ctx->{root_anchor} = anchor_for($mid);
	$ctx->{mapping} = {};
	$ctx->{s_nr} = ($nr > 1 ? "$nr+ messages" : 'only message')
	               .' in thread';

	my $rootset = thread_results($ctx, $msgs);

	# reduce hash lookups in pre_thread->skel_dump
	$ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
	walk_thread($rootset, $ctx, \&pre_thread);

	$skel .= '
'; return stream_thread($rootset, $ctx) unless $ctx->{flat}; # flat display: lazy load the full message from smsg $ctx->{msgs} = $msgs; $ctx->{-html_tip} = '
';
	PublicInbox::WwwStream::aresponse($ctx, 200, \&thread_html_i);
}

sub thread_html_i { # PublicInbox::WwwStream::getline callback
	my ($ctx, $eml) = @_;
	if ($eml) {
		my $smsg = $ctx->{smsg};
		if (exists $ctx->{-html_tip}) {
			$ctx->{-title_html} = ascii_html($smsg->{subject});
			$ctx->zmore($ctx->html_top);
		}
		return eml_entry($ctx, $eml);
	} else {
		while (my $smsg = shift @{$ctx->{msgs}}) {
			return $smsg if exists($smsg->{blob});
		}
		my $skel = delete($ctx->{skel}) or return; # all done
		$ctx->zmore($$skel);
		undef;
	}
}

sub multipart_text_as_html {
	# ($mime, $ctx) = @_; # each_part may do "$_[0] = undef"

	# scan through all parts, looking for displayable text
	$_[0]->each_part(\&add_text_body, $_[1], 1);
}

sub submsg_hdr ($$) {
	my ($ctx, $eml) = @_;
	my $obfs_ibx = $ctx->{-obfs_ibx};
	my $rv = $ctx->{obuf};
	$$rv .= "\n";
	for my $h (qw(From To Cc Subject Date Message-ID X-Alt-Message-ID)) {
		my @v = $eml->header($h);
		for my $v (@v) {
			obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
			$v = ascii_html($v);
			$$rv .= "$h: $v\n";
		}
	}
}

sub attach_link ($$$$;$) {
	my ($ctx, $ct, $p, $fn, $err) = @_;
	my ($part, $depth, $idx) = @$p;

	# Eml iteration clobbers multipart ->{bdy}, so do not offer
	# downloads for 0-byte multipart attachments
	return unless $part->{bdy};

	my $nl = $idx eq '1' ? '' : "\n"; # like join("\n", ...)
	my $size = bytes::length($part->body);

	# hide attributes normally, unless we want to aid users in
	# spotting MUA problems:
	$ct =~ s/;.*// unless $err;
	$ct = ascii_html($ct);
	my $sfn;
	if (defined $fn && $fn =~ /\A$PublicInbox::Hval::FN\z/o) {
		$sfn = $fn;
	} elsif ($ct eq 'text/plain') {
		$sfn = 'a.txt';
	} else {
		$sfn = 'a.bin';
	}
	my $rv = $ctx->{obuf};
	$$rv .= qq($nl{mhref}$idx-$sfn">);
	if ($err) {
		$$rv .= <header('Content-Description') // $fn // '';
	$desc = ascii_html($desc);
	$$rv .= ($desc eq '') ? "$ts --]" : "$desc --]\n[-- $ts --]";
	$$rv .= "\n";

	submsg_hdr($ctx, $part) if $part->{is_submsg};

	undef;
}

sub add_text_body { # callback for each_part
	my ($p, $ctx) = @_;
	my $upfx = $ctx->{mhref};
	my $ibx = $ctx->{-inbox};
	my $l = $ctx->{-linkify} //= PublicInbox::Linkify->new;
	# $p - from each_part: [ Email::MIME-like, depth, $idx ]
	my ($part, $depth, $idx) = @$p;
	my $ct = $part->content_type || 'text/plain';
	my $fn = $part->filename;
	my ($s, $err) = msg_part_text($part, $ct);
	return attach_link($ctx, $ct, $p, $fn) unless defined $s;

	my $rv = $ctx->{obuf};
	if ($part->{is_submsg}) {
		submsg_hdr($ctx, $part);
		$$rv .= "\n";
	}

	# makes no difference to browsers, and don't screw up filename
	# link generation in diffs with the extra '%0D'
	$s =~ s/\r\n/\n/sg;

	# will be escaped to `•' in HTML
	obfuscate_addrs($ibx, $s, "\x{2022}") if $ibx->{obfuscate};

	# always support diff-highlighting, but we can't linkify hunk
	# headers for solver unless some coderepo are configured:
	my $diff;
	if ($s =~ /^--- [^\n]+\n\+{3} [^\n]+\n@@ /ms) {
		# diffstat anchors do not link across attachments or messages,
		# -apfx is just a stable prefix for making diffstat anchors
		# linkable to the first diff hunk w/o crossing attachments
		$idx =~ tr!.!/!; # compatibility with previous versions
		$ctx->{-apfx} = $upfx . $idx;

		# do attr => filename mappings for diffstats in git diffs:
		$ctx->{-anchors} = {} if $s =~ /^diff --git /sm;
		$diff = 1;
		delete $ctx->{-long_path};
		my $spfx;
		if ($ibx->{-repo_objs}) {
			if (index($upfx, '//') >= 0) { # absolute URL (Atom feeds)
				$spfx = $upfx;
				$spfx =~ s!/([^/]*)/\z!/!;
			} else {
				my $n_slash = $upfx =~ tr!/!/!;
				if ($n_slash == 0) {
					$spfx = '../';
				} elsif ($n_slash == 1) {
					$spfx = '';
				} else { # nslash == 2
					$spfx = '../../';
				}
			}
		}
		$ctx->{-spfx} = $spfx;
	};

	# some editors don't put trailing newlines at the end:
	$s .= "\n" unless $s =~ /\n\z/s;

	# split off quoted and unquoted blocks:
	my @sections = PublicInbox::MsgIter::split_quotes($s);
	undef $s; # free memory
	if (defined($fn) || ($depth > 0 && !$part->{is_submsg}) || $err) {
		# badly-encoded message with $err? tell the world about it!
		attach_link($ctx, $ct, $p, $fn, $err);
		$$rv .= "\n";
	}
	foreach my $cur (@sections) {
		if ($cur =~ /\A>/) {
			# we use a  here to allow users to specify
			# their own color for quoted text
			$$rv .= qq();
			$$rv .= $l->to_html($cur);
			$$rv .= '';
		} elsif ($diff) {
			flush_diff($ctx, \$cur);
		} else {
			# regular lines, OK
			$$rv .= $l->to_html($cur);
		}
		undef $cur; # free memory
	}
}

sub _msg_page_prepare_obuf {
	my ($eml, $ctx) = @_;
	my $over = $ctx->{-inbox}->over;
	my $obfs_ibx = $ctx->{-obfs_ibx};
	my $rv = '';
	my $mids = mids_for_index($eml);
	my $nr = $ctx->{nr}++;
	if ($nr) { # unlikely
		$rv .= '
';
	} else {
		$ctx->{first_hdr} = $eml->header_obj;
		if ($ctx->{smsg}) {
			$rv .=
"
WARNING: multiple messages have this Message-ID\n
"; } $rv .= ""; # anchor for body start } $ctx->{-upfx} = '../' if $over; my @title; # (Subject[0], From[0]) for my $v ($eml->header('From')) { my @n = PublicInbox::Address::names($v); $v = ascii_html($v); $title[1] //= ascii_html(join(', ', @n)); if ($obfs_ibx) { obfuscate_addrs($obfs_ibx, $v); obfuscate_addrs($obfs_ibx, $title[1]); } $rv .= "From: $v\n" if $v ne ''; } foreach my $h (qw(To Cc)) { for my $v ($eml->header($h)) { fold_addresses($v); $v = ascii_html($v); obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; $rv .= "$h: $v\n" if $v ne ''; } } my @subj = $eml->header('Subject'); if (@subj) { my $v = ascii_html(shift @subj); obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; $rv .= 'Subject: '; $rv .= $over ? qq($v\n) : "$v\n"; $title[0] = $v; for $v (@subj) { # multi-Subject message :< $v = ascii_html($v); obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; $rv .= "Subject: $v\n"; } } else { # dummy anchor for thread skeleton at bottom of page $rv .= qq() if $over; $title[0] = '(no subject)'; } for my $v ($eml->header('Date')) { $v = ascii_html($v); obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P $rv .= "Date: $v\n"; } if (!$nr) { # first (and only) message, common case $ctx->{-title_html} = join(' - ', @title); $rv = $ctx->html_top . $rv; } if (scalar(@$mids) == 1) { # common case my $mhtml = ascii_html($mids->[0]); $rv .= "Message-ID: <$mhtml> "; $rv .= "(raw)\n"; } else { # X-Alt-Message-ID can happen if a message is injected from # public-inbox-nntpd because of multiple Message-ID headers. my $lnk = PublicInbox::Linkify->new; my $s = ''; for my $h (qw(Message-ID X-Alt-Message-ID)) { $s .= "$h: $_\n" for ($eml->header_raw($h)); } $lnk->linkify_mids('..', \$s, 1); $rv .= $s; } $rv .= _parent_headers($eml, $over); $rv .= "\n"; \$rv; } sub SKEL_EXPAND () { qq(expand[flat) . qq(|nested] ) . qq(mbox.gz ) . qq(Atom feed); } sub thread_skel ($$$) { my ($skel, $ctx, $hdr) = @_; my $mid = mids($hdr)->[0]; my $ibx = $ctx->{-inbox}; my ($nr, $msgs) = $ibx->over->get_thread($mid); my $parent = in_reply_to($hdr); $$skel .= "\nThread overview: "; if ($nr <= 1) { if (defined $parent) { $$skel .= SKEL_EXPAND."\n "; $$skel .= ghost_parent('../', $parent) . "\n"; } else { $$skel .= '[no followups] '.SKEL_EXPAND."\n"; } $ctx->{next_msg} = undef; $ctx->{parent_msg} = $parent; return; } $$skel .= $nr; $$skel .= '+ messages / '.SKEL_EXPAND.qq! top\n!; # nb: mutt only shows the first Subject in the index pane # when multiple Subject: headers are present, so we follow suit: my $subj = $hdr->header('Subject') // ''; $subj = '(no subject)' if $subj eq ''; $ctx->{prev_subj} = [ split(/ /, subject_normalized($subj)) ]; $ctx->{cur} = $mid; $ctx->{prev_attr} = ''; $ctx->{prev_level} = 0; $ctx->{skel} = $skel; # reduce hash lookups in skel_dump $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef; walk_thread(thread_results($ctx, $msgs), $ctx, \&skel_dump); $ctx->{parent_msg} = $parent; } sub _parent_headers { my ($hdr, $over) = @_; my $rv = ''; my @irt = $hdr->header_raw('In-Reply-To'); my $refs; if (@irt) { my $lnk = PublicInbox::Linkify->new; $rv .= "In-Reply-To: $_\n" for @irt; $lnk->linkify_mids('..', \$rv); } else { $refs = references($hdr); my $irt = pop @$refs; if (defined $irt) { my $html = ascii_html($irt); my $href = mid_href($irt); $rv .= "In-Reply-To: <"; $rv .= "$html>\n"; } } # do not display References: if search is present, # we show the thread skeleton at the bottom, instead. return $rv if $over; $refs //= references($hdr); if (@$refs) { @$refs = map { linkify_ref_no_over($_) } @$refs; $rv .= 'References: '. join("\n\t", @$refs) . "\n"; } $rv; } # returns a string buffer sub html_footer { my ($ctx, $hdr) = @_; my $ibx = $ctx->{-inbox}; my $upfx = '../'; my $skel; my $rv = '
';
	if ($ibx->over) {
		my $t = ts2str($ctx->{-t_max});
		my $t_fmt = fmt_ts($ctx->{-t_max});
		$skel .= <~$t_fmt UTC|newest]
EOF

		thread_skel(\$skel, $ctx, $hdr);
		my ($next, $prev);
		my $parent = '       ';
		$next = $prev = '    ';

		if (my $n = $ctx->{next_msg}) {
			$n = mid_href($n);
			$next = "next";
		}
		my $u;
		my $par = $ctx->{parent_msg};
		if ($par) {
			$u = mid_href($par);
			$u = "$upfx$u/";
		}
		if (my $p = $ctx->{prev_msg}) {
			$prev = mid_href($p);
			if ($p && $par && $p eq $par) {
				$prev = "prev parent';
				$parent = '';
			} else {
				$prev = "prev';
				$parent = " parent" if $u;
			}
		} elsif ($u) { # unlikely
			$parent = " parent";
		}
		$rv .= "$next $prev$parent ";
	} else { # unindexed inboxes w/o over
		$skel = qq( latest);
	}
	$rv .= qq(reply);
	$rv .= $skel;
	$rv .= '
'; $rv .= msg_reply($ctx, $hdr); } sub linkify_ref_no_over { my ($mid) = @_; my $href = mid_href($mid); my $html = ascii_html($mid); "<$html>"; } sub anchor_for { my ($msgid) = @_; 'm' . id_compress($msgid, 1); } sub ghost_parent { my ($upfx, $mid) = @_; my $href = mid_href($mid); my $html = ascii_html($mid); qq{[parent not found: <$html>]}; } sub indent_for { my ($level) = @_; $level ? INDENT x ($level - 1) : ''; } sub find_mid_root { my ($ctx, $level, $node, $idx) = @_; ++$ctx->{root_idx} if $level == 0; if ($node->{mid} eq $ctx->{mid}) { $ctx->{found_mid_at} = $ctx->{root_idx}; return 0; # stop iterating } 1; } sub strict_loose_note ($) { my ($nr) = @_; my $msg = " -- strict thread matches above, loose matches on Subject: below --\n"; if ($nr > PublicInbox::Over::DEFAULT_LIMIT()) { $msg .= " -- use mbox.gz link to download all $nr messages --\n"; } $msg; } sub thread_results { my ($ctx, $msgs) = @_; require PublicInbox::SearchThread; my $rootset = PublicInbox::SearchThread::thread($msgs, \&sort_ds, $ctx); # FIXME: `tid' is broken on --reindex, so that needs to be fixed # and preserved in the future. This bug is hidden by `sid' matches # in get_thread, so we never noticed it until now. And even when # reindexing is fixed, we'll keep this code until a SCHEMA_VERSION # bump since reindexing is expensive and users may not do it # loose threading could've returned too many results, # put the root the message we care about at the top: my $mid = $ctx->{mid}; if (defined($mid) && scalar(@$rootset) > 1) { $ctx->{root_idx} = -1; my $nr = scalar @$msgs; walk_thread($rootset, $ctx, \&find_mid_root); my $idx = $ctx->{found_mid_at}; if (defined($idx) && $idx != 0) { my $tip = splice(@$rootset, $idx, 1); @$rootset = reverse @$rootset; unshift @$rootset, $tip; $ctx->{sl_note} = strict_loose_note($nr); } } $rootset } sub missing_thread { my ($ctx) = @_; require PublicInbox::ExtMsg; PublicInbox::ExtMsg::ext_msg($ctx); } sub dedupe_subject { my ($prev_subj, $subj, $val) = @_; my $omit; # '"' denotes identical text omitted my (@prev_pop, @curr_pop); while (@$prev_subj && @$subj && $subj->[-1] eq $prev_subj->[-1]) { push(@prev_pop, pop(@$prev_subj)); push(@curr_pop, pop(@$subj)); $omit //= $val; } pop @$subj if @$subj && $subj->[-1] =~ /^re:\s*/i; if (scalar(@curr_pop) == 1) { $omit = undef; push @$prev_subj, @prev_pop; push @$subj, @curr_pop; } $omit // ''; } sub skel_dump { # walk_thread callback my ($ctx, $level, $smsg) = @_; $smsg->{blob} or return _skel_ghost($ctx, $level, $smsg); my $skel = $ctx->{skel}; my $cur = $ctx->{cur}; my $mid = $smsg->{mid}; if ($level == 0 && $ctx->{skel_dump_roots}++) { $$skel .= delete($ctx->{sl_note}) || ''; } my $f = ascii_html($smsg->{from_name}); my $obfs_ibx = $ctx->{-obfs_ibx}; obfuscate_addrs($obfs_ibx, $f) if $obfs_ibx; my $d = fmt_ts($smsg->{ds}); my $unmatched; # if lazy-loaded by SearchThread::Msg::visible() if (exists $ctx->{searchview}) { if (defined(my $pct = $smsg->{pct})) { $d .= (sprintf(' % 2u', $pct) . '%'); } else { $unmatched = 1; $d .= ' '; } } $d .= ' ' . indent_for($level) . th_pfx($level); my $attr = $f; $ctx->{first_level} ||= $level; if ($attr ne $ctx->{prev_attr} || $ctx->{prev_level} > $level) { $ctx->{prev_attr} = $attr; } $ctx->{prev_level} = $level; if ($cur) { if ($cur eq $mid) { delete $ctx->{cur}; $$skel .= "$d". "$attr [this message]\n"; return 1; } else { $ctx->{prev_msg} = $mid; } } else { $ctx->{next_msg} ||= $mid; } # Subject is never undef, this mail was loaded from # our Xapian which would've resulted in '' if it were # really missing (and Filter rejects empty subjects) my @subj = split(/ /, subject_normalized($smsg->{subject})); # remove common suffixes from the subject if it matches the previous, # so we do not show redundant text at the end. my $prev_subj = $ctx->{prev_subj} || []; $ctx->{prev_subj} = [ @subj ]; my $omit = dedupe_subject($prev_subj, \@subj, '" '); my $end; if (@subj) { my $subj = join(' ', @subj); $subj = ascii_html($subj); obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx; $end = "$subj $omit$f\n" } else { $end = "$f\n"; } my $m; my $id = ''; my $mapping = $unmatched ? undef : $ctx->{mapping}; if ($mapping) { my $map = $mapping->{$mid}; $id = id_compress($mid, 1); $m = '#m'.$id; $map->[0] = "$d$end"; $id = "\nid=r".$id; } else { $m = $ctx->{-upfx}.mid_href($mid).'/'; } $$skel .= $d . "" . $end; 1; } sub _skel_ghost { my ($ctx, $level, $node) = @_; my $mid = $node->{mid}; my $d = ' [not found] '; $d .= ' ' if exists $ctx->{searchview}; $d .= indent_for($level) . th_pfx($level); my $upfx = $ctx->{-upfx}; my $href = $upfx . mid_href($mid) . '/'; my $html = ascii_html($mid); my $mapping = $ctx->{mapping}; my $map = $mapping->{$mid} if $mapping; if ($map) { my $id = id_compress($mid, 1); $map->[0] = $d . qq{<$html>\n}; $d .= qq{<$html>\n}; } else { $d .= qq{<$html>\n}; } ${$ctx->{skel}} .= $d; 1; } sub sort_ds { [ sort { (eval { $a->topmost->{ds} } || 0) <=> (eval { $b->topmost->{ds} } || 0) } @{$_[0]} ]; } # accumulate recent topics if search is supported # returns 200 if done, 404 if not sub acc_topic { # walk_thread callback my ($ctx, $level, $smsg) = @_; my $mid = $smsg->{mid}; my $has_blob = $smsg->{blob} // do { if (my $by_mid = $ctx->{-inbox}->smsg_by_mid($mid)) { %$smsg = (%$smsg, %$by_mid); 1; } }; if ($has_blob) { my $subj = subject_normalized($smsg->{subject}); $subj = '(no subject)' if $subj eq ''; my $ds = $smsg->{ds}; if ($level == 0) { # new, top-level topic my $topic = [ $ds, 1, { $subj => $mid }, $subj ]; $ctx->{-cur_topic} = $topic; push @{$ctx->{order}}, $topic; return 1; } # continue existing topic my $topic = $ctx->{-cur_topic}; # should never be undef $topic->[0] = $ds if $ds > $topic->[0]; $topic->[1]++; # bump N+ message counter my $seen = $topic->[2]; if (scalar(@$topic) == 3) { # parent was a ghost push @$topic, $subj; } elsif (!defined($seen->{$subj})) { push @$topic, $level, $subj; # @extra messages } $seen->{$subj} = $mid; # latest for subject } else { # ghost message return 1 if $level != 0; # ignore child ghosts my $topic = $ctx->{-cur_topic} = [ -666, 0, {} ]; push @{$ctx->{order}}, $topic; } 1; } sub dump_topics { my ($ctx) = @_; my $order = delete $ctx->{order}; # [ ds, subj1, subj2, subj3, ... ] unless ($order) { $ctx->{-html_tip} = '
[No topics in range]
'; return 404; } my @out; my $ibx = $ctx->{-inbox}; my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef; # sort by recency, this allows new posts to "bump" old topics... foreach my $topic (sort { $b->[0] <=> $a->[0] } @$order) { my ($ds, $n, $seen, $top_subj, @extra) = @$topic; @$topic = (); next unless defined $top_subj; # ghost topic my $mid = delete $seen->{$top_subj}; my $href = mid_href($mid); my $prev_subj = [ split(/ /, $top_subj) ]; $top_subj = ascii_html($top_subj); $ds = fmt_ts($ds); # $n isn't the total number of posts on the topic, # just the number of posts in the current results window my $anchor; if ($n == 1) { $n = ''; $anchor = '#u'; # top of only message } else { $n = " ($n+ messages)"; $anchor = '#t'; # thread skeleton } my $mbox = qq(mbox.gz); my $atom = qq(Atom); my $s = "$top_subj\n" . " $ds UTC $n - $mbox / $atom\n"; for (my $i = 0; $i < scalar(@extra); $i += 2) { my $level = $extra[$i]; my $subj = $extra[$i + 1]; # already normalized $mid = delete $seen->{$subj}; my @subj = split(/ /, $subj); my @next_prev = @subj; # full copy my $omit = dedupe_subject($prev_subj, \@subj, ' "'); $prev_subj = \@next_prev; $subj = join(' ', @subj); $subj = ascii_html($subj); obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx; $href = mid_href($mid); $s .= indent_for($level) . TCHILD; $s .= qq($subj$omit\n); } push @out, $s; } $ctx->{-html_tip} = '
' . join("\n", @out) . '
'; 200; } sub str2ts ($) { my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $_[0]); timegm($ss || 0, $mm || 0, $hh || 0, $dd, $mon - 1, $yyyy); } sub pagination_footer ($$) { my ($ctx, $latest) = @_; my $next = $ctx->{next_page} || ''; my $prev = $ctx->{prev_page} || ''; if ($prev) { # aligned padding for: 'next (older) | ' $next = $next ? "$next | " : ' | '; $prev .= qq[ | latest]; } "
page: $next$prev
"; } sub paginate_recent ($$) { my ($ctx, $lim) = @_; my $t = $ctx->{qp}->{t} || ''; my $opts = { limit => $lim }; my ($after, $before); # Xapian uses '..' but '-' is perhaps friendier to URL linkifiers # if only $after exists "YYYYMMDD.." because "." could be skipped # if interpreted as an end-of-sentence $t =~ s/\A([0-9]{8,14})-// and $after = str2ts($1); $t =~ /\A([0-9]{8,14})\z/ and $before = str2ts($1); my $ibx = $ctx->{-inbox}; my $msgs = $ibx->recent($opts, $after, $before); my $nr = scalar @$msgs; if ($nr < $lim && defined($after)) { $after = $before = undef; $msgs = $ibx->recent($opts); $nr = scalar @$msgs; } my $more = $nr == $lim; my ($newest, $oldest); if ($nr) { $newest = $msgs->[0]->{ts}; $oldest = $msgs->[-1]->{ts}; # if we only had $after, our SQL query in ->recent ordered if ($newest < $oldest) { ($oldest, $newest) = ($newest, $oldest); $more = 0 if defined($after) && $after < $oldest; } } if (defined($oldest) && $more) { my $s = ts2str($oldest); $ctx->{next_page} = qq[] . 'next (older)'; } if (defined($newest) && (defined($before) || defined($after))) { my $s = ts2str($newest); $ctx->{prev_page} = qq[] . 'prev (newer)'; } $msgs; } # GET /$INBOX - top-level inbox view for indexed inboxes sub index_topics { my ($ctx) = @_; my $msgs = paginate_recent($ctx, 200); # 200 is our window if (@$msgs) { walk_thread(thread_results($ctx, $msgs), $ctx, \&acc_topic); } html_oneshot($ctx, dump_topics($ctx), \pagination_footer($ctx, '.')); } sub thread_adj_level { my ($ctx, $level) = @_; my $max = $ctx->{cur_level}; if ($level <= 0) { return ('', '') if $max == 0; # flat output # reset existing lists my $beg = $max > 1 ? ('' x ($max - 1)) : ''; $ctx->{cur_level} = 0; ("$beg", ''); } elsif ($level == $max) { # continue existing list qw(
  • ); } elsif ($level < $max) { my $beg = $max > 1 ? ('' x ($max - $level)) : ''; $ctx->{cur_level} = $level; ("$beg
  • ", '
  • '); } else { # ($level > $max) # start a new level $ctx->{cur_level} = $level; my $beg = ($max ? '
  • ' : '') . '
    • '; ($beg, '
    • '); } } sub ghost_index_entry { my ($ctx, $level, $node) = @_; my ($beg, $end) = thread_adj_level($ctx, $level); $beg . '
      '. ghost_parent($ctx->{-upfx}, $node->{mid})
      		. '
      ' . $end; } 1; public-inbox-1.6.1/lib/PublicInbox/ViewDiff.pm000066400000000000000000000166201377346120300211520ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # # used by PublicInbox::View # This adds CSS spans for diff highlighting. # It also generates links for ViewVCS + SolverGit to show # (or reconstruct) blobs. package PublicInbox::ViewDiff; use 5.010_001; use strict; use warnings; use base qw(Exporter); our @EXPORT_OK = qw(flush_diff); use URI::Escape qw(uri_escape_utf8); use PublicInbox::Hval qw(ascii_html to_attr); use PublicInbox::Git qw(git_unquote); sub UNSAFE () { "^A-Za-z0-9\-\._~/" } my $OID_NULL = '0{7,}'; my $OID_BLOB = '[a-f0-9]{7,}'; my $LF = qr!\n!; my $ANY = qr![^\n]!; my $FN = qr!(?:"?[^/\n]+/[^\n]+|/dev/null)!; # cf. git diff.c :: get_compact_summary my $DIFFSTAT_COMMENT = qr/(?: *\((?:new|gone|(?:(?:new|mode) [\+\-][lx]))\))? *\z/s; my $NULL_TO_BLOB = qr/^(index $OID_NULL\.\.)($OID_BLOB)\b/ms; my $BLOB_TO_NULL = qr/^index ($OID_BLOB)(\.\.$OID_NULL)\b/ms; my $BLOB_TO_BLOB = qr/^index ($OID_BLOB)\.\.($OID_BLOB)/ms; my $EXTRACT_DIFFS = qr/( (?: # begin header stuff, don't capture filenames, here, # but instead wait for the --- and +++ lines. (?:^diff\x20--git\x20$FN\x20$FN$LF) # old mode || new mode || copy|rename|deleted|... (?:^[a-z]$ANY+$LF)* )? # end of optional stuff, everything below is required ^index\x20($OID_BLOB)\.\.($OID_BLOB)$ANY*$LF ^---\x20($FN)$LF ^\+{3}\x20($FN)$LF)/msx; my $IS_OID = qr/\A$OID_BLOB\z/s; # link to line numbers in blobs sub diff_hunk ($$$$) { my ($dst, $dctx, $ca, $cb) = @_; my ($oid_a, $oid_b, $spfx) = @$dctx{qw(oid_a oid_b spfx)}; if (defined($spfx) && defined($oid_a) && defined($oid_b)) { my ($n) = ($ca =~ /^-([0-9]+)/); $n = defined($n) ? "#n$n" : ''; $$dst .= qq(@@ {Q}$n">$ca); ($n) = ($cb =~ /^\+([0-9]+)/); $n = defined($n) ? "#n$n" : ''; $$dst .= qq( {Q}$n">$cb @@); } else { $$dst .= "@@ $ca $cb @@"; } } sub oid ($$$) { my ($dctx, $spfx, $oid) = @_; defined($spfx) ? qq({Q}">$oid) : $oid; } # returns true if diffstat anchor written, false otherwise sub anchor0 ($$$$) { my ($dst, $ctx, $fn, $rest) = @_; my $orig = $fn; # normal git diffstat output is impossible to parse reliably # without --numstat, and that isn't the default for format-patch. # So only do best-effort handling of renames for common cases; # which works well in practice. If projects put "=>", or trailing # spaces in filenames, oh well :P $fn =~ s/$DIFFSTAT_COMMENT//; $fn =~ s/{(?:.+) => (.+)}/$1/ or $fn =~ s/.* => (.+)/$1/; $fn = git_unquote($fn); # long filenames will require us to check in anchor1() push(@{$ctx->{-long_path}}, $fn) if $fn =~ s!\A\.\.\./?!!; if (my $attr = to_attr($ctx->{-apfx}.$fn)) { $ctx->{-anchors}->{$attr} = 1; my $spaces = ($orig =~ s/( +)\z//) ? $1 : ''; $$dst .= " " . ascii_html($orig) . '' . $spaces . $ctx->{-linkify}->to_html($rest); return 1; } undef; } # returns "diff --git" anchor destination, undef otherwise sub anchor1 ($$) { my ($ctx, $pb) = @_; my $attr = to_attr($ctx->{-apfx}.$pb) or return; my $ok = delete $ctx->{-anchors}->{$attr}; # unlikely, check the end of long path names we captured, # assume diffstat and diff output follow the same order, # and ignore different ordering (could be malicious input) unless ($ok) { my $fn = shift(@{$ctx->{-long_path}}) or return; $pb =~ /\Q$fn\E\z/s or return; $attr = to_attr($ctx->{-apfx}.$fn) or return; $ok = delete $ctx->{-anchors}->{$attr} or return; } $ok ? "diff --git" : undef } sub diff_header ($$$) { my ($x, $ctx, $top) = @_; my (undef, undef, $pa, $pb) = splice(@$top, 0, 4); # ignore oid_{a,b} my $spfx = $ctx->{-spfx}; my $dctx = { spfx => $spfx }; # get rid of leading "a/" or "b/" (or whatever --{src,dst}-prefix are) $pa = (split('/', git_unquote($pa), 2))[1] if $pa ne '/dev/null'; $pb = (split('/', git_unquote($pb), 2))[1] if $pb ne '/dev/null'; if ($pa eq $pb && $pb ne '/dev/null') { $dctx->{Q} = "?b=".uri_escape_utf8($pb, UNSAFE); } else { my @q; if ($pb ne '/dev/null') { push @q, 'b='.uri_escape_utf8($pb, UNSAFE); } if ($pa ne '/dev/null') { push @q, 'a='.uri_escape_utf8($pa, UNSAFE); } $dctx->{Q} = '?'.join('&', @q); } # linkify early and all at once, since we know the following # subst ops on $$x won't need further escaping: $$x = $ctx->{-linkify}->to_html($$x); # no need to capture oid_a and oid_b on add/delete, # we just linkify OIDs directly via s///e in conditional if (($$x =~ s/$NULL_TO_BLOB/$1 . oid($dctx, $spfx, $2)/e) || ($$x =~ s/$BLOB_TO_NULL/ 'index ' . oid($dctx, $spfx, $1) . $2/e)) { } elsif ($$x =~ $BLOB_TO_BLOB) { # modification-only, not add/delete: # linkify hunk headers later using oid_a and oid_b @$dctx{qw(oid_a oid_b)} = ($1, $2); } else { warn "BUG? <$$x> had no ^index line"; } $$x =~ s!^diff --git!anchor1($ctx, $pb) // 'diff --git'!ems; my $dst = $ctx->{obuf}; $$dst .= qq(); $$dst .= $$x; $$dst .= ''; $dctx; } sub diff_before_or_after ($$) { my ($ctx, $x) = @_; my $linkify = $ctx->{-linkify}; my $dst = $ctx->{obuf}; my $anchors = exists($ctx->{-anchors}) ? 1 : 0; for my $y (split(/(^---\n)/sm, $$x)) { if ($y =~ /\A---\n\z/s) { $$dst .= "---\n"; # all HTML is "\r\n" => "\n" $anchors |= 2; } elsif ($anchors == 3 && $y =~ /^ [0-9]+ files? changed, /sm) { # ok, looks like a diffstat, go line-by-line: for my $l (split(/^/m, $y)) { if ($l =~ /^ (.+)( +\| .*\z)/s) { anchor0($dst, $ctx, $1, $2) and next; } $$dst .= $linkify->to_html($l); } } else { # commit message, notes, etc $$dst .= $linkify->to_html($y); } } } # callers must do CRLF => LF conversion before calling this sub flush_diff ($$) { my ($ctx, $cur) = @_; my @top = split($EXTRACT_DIFFS, $$cur); undef $$cur; # free memory my $linkify = $ctx->{-linkify}; my $dst = $ctx->{obuf}; my $dctx; # {}, keys: Q, oid_a, oid_b while (defined(my $x = shift @top)) { if (scalar(@top) >= 4 && $top[1] =~ $IS_OID && $top[0] =~ $IS_OID) { $dctx = diff_header(\$x, $ctx, \@top); } elsif ($dctx) { my $after = ''; # Quiet "Complex regular subexpression recursion limit" # warning. Perl will truncate matches upon hitting # that limit, giving us more (and shorter) scalars than # would be ideal, but otherwise it's harmless. # # We could replace the `+' metacharacter with `{1,100}' # to limit the matches ourselves to 100, but we can # let Perl do it for us, quietly. no warnings 'regexp'; for my $s (split(/((?:(?:^\+[^\n]*\n)+)| (?:(?:^-[^\n]*\n)+)| (?:^@@ [^\n]+\n))/xsm, $x)) { if (!defined($dctx)) { $after .= $s; } elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) { $$dst .= qq(); diff_hunk($dst, $dctx, $1, $2); $$dst .= $linkify->to_html($s); $$dst .= ''; } elsif ($s =~ /\A\+/) { $$dst .= qq(); $$dst .= $linkify->to_html($s); $$dst .= ''; } elsif ($s =~ /\A-- $/sm) { # email sig starts $dctx = undef; $after .= $s; } elsif ($s =~ /\A-/) { $$dst .= qq(); $$dst .= $linkify->to_html($s); $$dst .= ''; } else { $$dst .= $linkify->to_html($s); } } diff_before_or_after($ctx, \$after) unless $dctx; } else { diff_before_or_after($ctx, \$x); } } } 1; public-inbox-1.6.1/lib/PublicInbox/ViewVCS.pm000066400000000000000000000145661377346120300207440ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # show any VCS object, similar to "git show" # FIXME: we only show blobs for now # # This can use a "solver" to reconstruct blobs based on git # patches (with abbreviated OIDs in the header). However, the # abbreviated OIDs must match exactly what's in the original # email (unless a normal code repo already has the blob). # # In other words, we can only reliably reconstruct blobs based # on links generated by ViewDiff (and only if the emailed # patches apply 100% cleanly to published blobs). package PublicInbox::ViewVCS; use strict; use warnings; use bytes (); # only for bytes::length use PublicInbox::SolverGit; use PublicInbox::WwwStream qw(html_oneshot); use PublicInbox::Linkify; use PublicInbox::Tmpfile; use PublicInbox::Hval qw(ascii_html to_filename); my $hl = eval { require PublicInbox::HlMod; PublicInbox::HlMod->new; }; my %QP_MAP = ( A => 'oid_a', a => 'path_a', b => 'path_b' ); our $MAX_SIZE = 1024 * 1024; # TODO: configurable my $BIN_DETECT = 8000; # same as git sub html_page ($$$) { my ($ctx, $code, $strref) = @_; my $wcb = delete $ctx->{-wcb}; $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/" my $res = html_oneshot($ctx, $code, $strref); $wcb ? $wcb->($res) : $res; } sub stream_blob_parse_hdr { # {parse_hdr} for Qspawn my ($r, $bref, $ctx) = @_; my ($res, $logref) = delete @$ctx{qw(-res -logref)}; my ($git, $oid, $type, $size, $di) = @$res; my @cl = ('Content-Length', $size); if (!defined $r) { # error html_page($ctx, 500, $logref); } elsif (index($$bref, "\0") >= 0) { [200, [qw(Content-Type application/octet-stream), @cl] ]; } else { my $n = bytes::length($$bref); if ($n >= $BIN_DETECT || $n == $size) { return [200, [ 'Content-Type', 'text/plain; charset=UTF-8', @cl ] ]; } if ($r == 0) { warn "premature EOF on $oid $$logref\n"; return html_page($ctx, 500, $logref); } @$ctx{qw(-res -logref)} = ($res, $logref); undef; # bref keeps growing } } sub stream_large_blob ($$$$) { my ($ctx, $res, $logref, $fn) = @_; $ctx->{-logref} = $logref; $ctx->{-res} = $res; my ($git, $oid, $type, $size, $di) = @$res; my $cmd = ['git', "--git-dir=$git->{git_dir}", 'cat-file', $type, $oid]; my $qsp = PublicInbox::Qspawn->new($cmd); my $env = $ctx->{env}; $env->{'qspawn.wcb'} = delete $ctx->{-wcb}; $qsp->psgi_return($env, undef, \&stream_blob_parse_hdr, $ctx); } sub show_other_result ($$) { my ($bref, $ctx) = @_; my ($qsp, $logref) = delete @$ctx{qw(-qsp -logref)}; if (my $err = $qsp->{err}) { utf8::decode($$err); $$logref .= "git show error: $err"; return html_page($ctx, 500, $logref); } my $l = PublicInbox::Linkify->new; utf8::decode($$bref); $$bref = '
      '. $l->to_html($$bref);
      	$$bref .= '

      ' . $$logref; html_page($ctx, 200, $bref); } sub show_other ($$$$) { my ($ctx, $res, $logref, $fn) = @_; my ($git, $oid, $type, $size) = @$res; if ($size > $MAX_SIZE) { $$logref = "$oid is too big to show\n" . $$logref; return html_page($ctx, 200, $logref); } my $cmd = ['git', "--git-dir=$git->{git_dir}", qw(show --encoding=UTF-8 --no-color --no-abbrev), $oid ]; my $qsp = PublicInbox::Qspawn->new($cmd); my $env = $ctx->{env}; $ctx->{-qsp} = $qsp; $ctx->{-logref} = $logref; $qsp->psgi_qx($env, undef, \&show_other_result, $ctx); } # user_cb for SolverGit, called as: user_cb->($result_or_error, $uarg) sub solve_result { my ($res, $ctx) = @_; my ($log, $hints, $fn) = delete @$ctx{qw(log hints fn)}; unless (seek($log, 0, 0)) { $ctx->{env}->{'psgi.errors'}->print("seek(log): $!\n"); return html_page($ctx, 500, \'seek error'); } $log = do { local $/; <$log> }; my $ref = ref($res); my $l = PublicInbox::Linkify->new; $log = '
      debug log:

      ' .
      		$l->to_html($log) . '
      '; $res or return html_page($ctx, 404, \$log); $ref eq 'ARRAY' or return html_page($ctx, 500, \$log); my ($git, $oid, $type, $size, $di) = @$res; return show_other($ctx, $res, \$log, $fn) if $type ne 'blob'; my $path = to_filename($di->{path_b} // $hints->{path_b} // 'blob'); my $raw_link = "(raw)"; if ($size > $MAX_SIZE) { return stream_large_blob($ctx, $res, \$log, $fn) if defined $fn; $log = "
      Too big to show, download available\n" .
      			"$oid $type $size bytes $raw_link
      " . $log; return html_page($ctx, 200, \$log); } my $blob = $git->cat_file($oid); if (!$blob) { # WTF? my $e = "Failed to retrieve generated blob ($oid)"; $ctx->{env}->{'psgi.errors'}->print("$e ($git->{git_dir})\n"); $log = "
      $e
      " . $log; return html_page($ctx, 500, \$log); } my $bin = index(substr($$blob, 0, $BIN_DETECT), "\0") >= 0; if (defined $fn) { my $h = [ 'Content-Length', $size, 'Content-Type' ]; push(@$h, ($bin ? 'application/octet-stream' : 'text/plain')); return delete($ctx->{-wcb})->([200, $h, [ $$blob ]]); } if ($bin) { $log = "
      $oid $type $size bytes (binary)" .
      			" $raw_link
      " . $log; return html_page($ctx, 200, \$log); } # TODO: detect + convert to ensure validity utf8::decode($$blob); my $nl = ($$blob =~ s/\r?\n/\n/sg); my $pad = length($nl); $l->linkify_1($$blob); my $ok = $hl->do_hl($blob, $path) if $hl; if ($ok) { $blob = $ok; } else { $$blob = ascii_html($$blob); } # using some of the same CSS class names and ids as cgit $log = "
      $oid $type $size bytes $raw_link
      " . "
      ". "
      " . join('', map {
      			sprintf("% ${pad}u\n", $_)
      		} (1..$nl)) . '
      ' . '
       
      '. # pad for non-CSS users "" . $l->linkify_2($$blob) . '
  • ' . $log; html_page($ctx, 200, \$log); } # GET /$INBOX/$GIT_OBJECT_ID/s/ # GET /$INBOX/$GIT_OBJECT_ID/s/$FILENAME sub show ($$;$) { my ($ctx, $oid_b, $fn) = @_; my $qp = $ctx->{qp}; my $hints = $ctx->{hints} = {}; while (my ($from, $to) = each %QP_MAP) { defined(my $v = $qp->{$from}) or next; $hints->{$to} = $v if $v ne ''; } $ctx->{'log'} = tmpfile("solve.$oid_b"); $ctx->{fn} = $fn; my $solver = PublicInbox::SolverGit->new($ctx->{-inbox}, \&solve_result, $ctx); # PSGI server will call this immediately and give us a callback (-wcb) sub { $ctx->{-wcb} = $_[0]; # HTTP write callback $solver->solve($ctx->{env}, $ctx->{log}, $oid_b, $hints); }; } 1; public-inbox-1.6.1/lib/PublicInbox/WWW.pm000066400000000000000000000451651377346120300201410ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ # # Main web interface for mailing list archives # # We focus on the lowest common denominators here: # - targeted at text-only console browsers (w3m, links, etc..) # - Only basic HTML, CSS only for line-wrapping
     text content for GUIs
    #   and diff/syntax-highlighting (optional)
    # - No JavaScript, graphics or icons allowed.
    # - Must not rely on static content
    # - UTF-8 is only for user-content, 7-bit US-ASCII for us
    package PublicInbox::WWW;
    use 5.010_001;
    use strict;
    use warnings;
    use bytes (); # only for bytes::length
    use PublicInbox::Config;
    use PublicInbox::Hval;
    use URI::Escape qw(uri_unescape);
    use PublicInbox::MID qw(mid_escape);
    use PublicInbox::GitHTTPBackend;
    use PublicInbox::UserContent;
    use PublicInbox::WwwStatic qw(r path_info_raw);
    use PublicInbox::Eml;
    
    # TODO: consider a routing tree now that we have more endpoints:
    our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
    our $MID_RE = qr!([^/]+)!;
    our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
    our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
    our $OID_RE = qr![a-f0-9]{7,}!;
    
    sub new {
    	my ($class, $pi_config) = @_;
    	$pi_config ||= PublicInbox::Config->new;
    	bless { pi_config => $pi_config }, $class;
    }
    
    # backwards compatibility, do not use
    sub run {
    	my ($req, $method) = @_;
    	PublicInbox::WWW->new->call($req->env);
    }
    
    sub call {
    	my ($self, $env) = @_;
    	my $ctx = { env => $env, www => $self };
    
    	# we don't care about multi-value
    	%{$ctx->{qp}} = map {
    		utf8::decode($_);
    		tr/+/ /;
    		my ($k, $v) = split('=', $_, 2);
    		$v = uri_unescape($v // '');
    		# none of the keys we care about will need escaping
    		$k => $v;
    	} split(/[&;]+/, $env->{QUERY_STRING});
    
    	my $path_info = path_info_raw($env);
    	my $method = $env->{REQUEST_METHOD};
    
    	if ($method eq 'POST') {
    		if ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)?
    					(git-upload-pack)\z!x) {
    			my ($epoch, $path) = ($2, $3);
    			return invalid_inbox($ctx, $1) ||
    				serve_git($ctx, $epoch, $path);
    		} elsif ($path_info =~ m!$INBOX_RE/(\w+)\.sql\.gz\z!o) {
    			return get_altid_dump($ctx, $1, $2);
    		} elsif ($path_info =~ m!$INBOX_RE/!o) {
    			return invalid_inbox($ctx, $1) || mbox_results($ctx);
    		}
    	}
    	elsif ($method !~ /\A(?:GET|HEAD)\z/) {
    		return r(405);
    	}
    
    	# top-level indices and feeds
    	if ($path_info eq '/') {
    		require PublicInbox::WwwListing;
    		PublicInbox::WwwListing->response($ctx);
    	} elsif ($path_info eq '/manifest.js.gz') {
    		require PublicInbox::ManifestJsGz;
    		PublicInbox::ManifestJsGz->response($ctx);
    	} elsif ($path_info =~ m!$INBOX_RE\z!o) {
    		invalid_inbox($ctx, $1) || r301($ctx, $1);
    	} elsif ($path_info =~ m!$INBOX_RE(?:/|/index\.html)?\z!o) {
    		invalid_inbox($ctx, $1) || get_index($ctx);
    	} elsif ($path_info =~ m!$INBOX_RE/(?:atom\.xml|new\.atom)\z!o) {
    		invalid_inbox($ctx, $1) || get_atom($ctx);
    	} elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
    		invalid_inbox($ctx, $1) || get_new($ctx);
    	} elsif ($path_info =~ m!$INBOX_RE/description\z!o) {
    		get_description($ctx, $1);
    	} elsif ($path_info =~ m!$INBOX_RE/(?:(?:git/)?([0-9]+)(?:\.git)?/)?
    				($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
    		my ($epoch, $path) = ($2, $3);
    		invalid_inbox($ctx, $1) || serve_git($ctx, $epoch, $path);
    	} elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-]+).mbox\.gz\z!o) {
    		serve_mbox_range($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) {
    		msg_page($ctx, $1, $2, $3);
    
    	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$ATTACH_RE\z!o) {
    		my ($idx, $fn) = ($3, $4);
    		invalid_inbox_mid($ctx, $1, $2) || get_attach($ctx, $idx, $fn);
    	# in case people leave off the trailing slash:
    	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(T|t)\z!o) {
    		my ($inbox, $mid_ue, $suffix) = ($1, $2, $3);
    		$suffix .= $suffix =~ /\A[tT]\z/ ? '/#u' : '/';
    		r301($ctx, $inbox, $mid_ue, $suffix);
    
    	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/R/?\z!o) {
    		my ($inbox, $mid_ue) = ($1, $2);
    		r301($ctx, $inbox, $mid_ue, '#R');
    
    	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/f/?\z!o) {
    		r301($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) {
    		get_text($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/([a-zA-Z0-9_\-\.]+)\.css\z!o) {
    		get_css($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/manifest\.js\.gz\z!o) {
    		get_inbox_manifest($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) {
    		get_vcs_object($ctx, $1, $2);
    	} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/
    				($PublicInbox::Hval::FN)\z!ox) {
    		get_vcs_object($ctx, $1, $2, $3);
    	} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
    		r301($ctx, $1, $2, 's/');
    	} elsif ($path_info =~ m!$INBOX_RE/(\w+)\.sql\.gz\z!o) {
    		get_altid_dump($ctx, $1, $2);
    	# convenience redirects order matters
    	} elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
    		r301($ctx, $1, $2);
    
    	} else {
    		legacy_redirects($ctx, $path_info);
    	}
    }
    
    # for CoW-friendliness, MOOOOO!  Even for single-process setups,
    # we want to get all immortal allocations done early to avoid heap
    # fragmentation since common allocators favor a large contiguous heap.
    sub preload {
    	my ($self) = @_;
    
    	# populate caches used by Encode internally, since emails
    	# may show up with any encoding.
    	require Encode;
    	Encode::find_encoding($_) for Encode->encodings(':all');
    
    	require PublicInbox::ExtMsg;
    	require PublicInbox::Feed;
    	require PublicInbox::View;
    	require PublicInbox::SearchThread;
    	require PublicInbox::Eml;
    	require PublicInbox::Mbox;
    	require PublicInbox::ViewVCS;
    	require PublicInbox::WwwText;
    	require PublicInbox::WwwAttach;
    	eval {
    		require PublicInbox::Search;
    		PublicInbox::Search::load_xapian();
    	};
    	for (qw(SearchView MboxGz WwwAltId)) {
    		eval "require PublicInbox::$_;";
    	}
    	if (ref($self)) {
    		my $pi_config = $self->{pi_config};
    		if (defined($pi_config->{'publicinbox.cgitrc'})) {
    			$pi_config->limiter('-cgit');
    		}
    		$self->cgit;
    		$self->stylesheets_prepare($_) for ('', '../', '../../');
    		$self->news_www;
    		$pi_config->each_inbox(\&preload_inbox);
    	}
    }
    
    sub preload_inbox {
    	my $ibx = shift;
    	$ibx->altid_map;
    	$ibx->cloneurl;
    	$ibx->description;
    	$ibx->base_url;
    }
    
    # private functions below
    
    sub r404 {
    	my ($ctx) = @_;
    	if ($ctx && $ctx->{mid}) {
    		require PublicInbox::ExtMsg;
    		return PublicInbox::ExtMsg::ext_msg($ctx);
    	}
    	r(404);
    }
    
    sub news_cgit_fallback ($) {
    	my ($ctx) = @_;
    	my $www = $ctx->{www};
    	my $env = $ctx->{env};
    	my $res = $www->news_www->call($env);
    	$res->[0] == 404 ? $www->cgit->call($env) : $res;
    }
    
    # returns undef if valid, array ref response if invalid
    sub invalid_inbox ($$) {
    	my ($ctx, $inbox) = @_;
    	my $ibx = $ctx->{www}->{pi_config}->lookup_name($inbox);
    	if (defined $ibx) {
    		$ctx->{-inbox} = $ibx;
    		return;
    	}
    
    	# sometimes linkifiers (not ours!) screw up automatic link
    	# generation and link things intended for nntp:// to https?://,
    	# so try to infer links and redirect them to the appropriate
    	# list URL.
    	news_cgit_fallback($ctx);
    }
    
    # returns undef if valid, array ref response if invalid
    sub invalid_inbox_mid {
    	my ($ctx, $inbox, $mid_ue) = @_;
    	my $ret = invalid_inbox($ctx, $inbox);
    	return $ret if $ret;
    
    	my $mid = $ctx->{mid} = uri_unescape($mid_ue);
    	my $ibx = $ctx->{-inbox};
    	if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) {
    		my ($x2, $x38) = ($1, $2);
    		# this is horrifically wasteful for legacy URLs:
    		my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return;
    		my $s = PublicInbox::Eml->new($str);
    		$mid = PublicInbox::MID::mid_clean($s->header_raw('Message-ID'));
    		return r301($ctx, $inbox, mid_escape($mid));
    	}
    	undef;
    }
    
    # /$INBOX/new.atom                     -> Atom feed, includes replies
    sub get_atom {
    	my ($ctx) = @_;
    	require PublicInbox::Feed;
    	PublicInbox::Feed::generate($ctx);
    }
    
    # /$INBOX/new.html			-> HTML only
    sub get_new {
    	my ($ctx) = @_;
    	require PublicInbox::Feed;
    	PublicInbox::Feed::new_html($ctx);
    }
    
    # /$INBOX/?r=$GIT_COMMIT                 -> HTML only
    sub get_index {
    	my ($ctx) = @_;
    	require PublicInbox::Feed;
    	if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
    		require PublicInbox::SearchView;
    		PublicInbox::SearchView::sres_top_html($ctx);
    	} else {
    		PublicInbox::Feed::generate_html_index($ctx);
    	}
    }
    
    # /$INBOX/$MESSAGE_ID/raw                    -> raw mbox
    sub get_mid_txt {
    	my ($ctx) = @_;
    	require PublicInbox::Mbox;
    	PublicInbox::Mbox::emit_raw($ctx) || r404($ctx);
    }
    
    # /$INBOX/$MESSAGE_ID/                   -> HTML content (short quotes)
    sub get_mid_html {
    	my ($ctx) = @_;
    	require PublicInbox::View;
    	PublicInbox::View::msg_page($ctx) || r404($ctx);
    }
    
    # /$INBOX/$MESSAGE_ID/t/
    sub get_thread {
    	my ($ctx, $flat) = @_;
    	$ctx->{-inbox}->over or return need($ctx, 'Overview');
    	$ctx->{flat} = $flat;
    	require PublicInbox::View;
    	PublicInbox::View::thread_html($ctx);
    }
    
    # /$INBOX/_/text/$KEY/
    # /$INBOX/_/text/$KEY/raw
    # KEY may contain slashes
    sub get_text {
    	my ($ctx, $inbox, $key) = @_;
    	my $r404 = invalid_inbox($ctx, $inbox);
    	return $r404 if $r404;
    
    	require PublicInbox::WwwText;
    	PublicInbox::WwwText::get_text($ctx, $key);
    }
    
    # show git objects (blobs and commits)
    # /$INBOX/$GIT_OBJECT_ID/s/
    # /$INBOX/$GIT_OBJECT_ID/s/$FILENAME
    sub get_vcs_object ($$$;$) {
    	my ($ctx, $inbox, $oid, $filename) = @_;
    	my $r404 = invalid_inbox($ctx, $inbox);
    	return $r404 if $r404;
    	require PublicInbox::ViewVCS;
    	PublicInbox::ViewVCS::show($ctx, $oid, $filename);
    }
    
    sub get_altid_dump {
    	my ($ctx, $inbox, $altid_pfx) =@_;
    	my $r404 = invalid_inbox($ctx, $inbox);
    	return $r404 if $r404;
    	eval { require PublicInbox::WwwAltId } or return need($ctx, 'sqlite3');
    	PublicInbox::WwwAltId::sqldump($ctx, $altid_pfx);
    }
    
    sub need {
    	my ($ctx, $extra) = @_;
    	require PublicInbox::WwwStream;
    	PublicInbox::WwwStream::html_oneshot($ctx, 501, \<$extra is not available for this public-inbox
    Return to index
    EOF } # /$INBOX/$MESSAGE_ID/t.mbox -> thread as mbox # /$INBOX/$MESSAGE_ID/t.mbox.gz -> thread as gzipped mbox # note: I'm not a big fan of other compression formats since they're # significantly more expensive on CPU than gzip and less-widely available, # especially on older systems. Stick to zlib since that's what git uses. sub get_thread_mbox { my ($ctx, $sfx) = @_; my $over = $ctx->{-inbox}->over or return need($ctx, 'Overview'); require PublicInbox::Mbox; PublicInbox::Mbox::thread_mbox($ctx, $over, $sfx); } # /$INBOX/$MESSAGE_ID/t.atom -> thread as Atom feed sub get_thread_atom { my ($ctx) = @_; $ctx->{-inbox}->over or return need($ctx, 'Overview'); require PublicInbox::Feed; PublicInbox::Feed::generate_thread_atom($ctx); } sub legacy_redirects { my ($ctx, $path_info) = @_; # single-message pages if ($path_info =~ m!$INBOX_RE/m/(\S+)/\z!o) { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/m/(\S+)/raw\z!o) { r301($ctx, $1, $2, 'raw'); } elsif ($path_info =~ m!$INBOX_RE/f/(\S+)/\z!o) { r301($ctx, $1, $2); # thread display } elsif ($path_info =~ m!$INBOX_RE/t/(\S+)/\z!o) { r301($ctx, $1, $2, 't/#u'); } elsif ($path_info =~ m!$INBOX_RE/t/(\S+)/mbox(\.gz)?\z!o) { r301($ctx, $1, $2, "t.mbox$3"); # even older legacy redirects } elsif ($path_info =~ m!$INBOX_RE/m/(\S+)\.html\z!o) { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/t/(\S+)\.html\z!o) { r301($ctx, $1, $2, 't/#u'); } elsif ($path_info =~ m!$INBOX_RE/f/(\S+)\.html\z!o) { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/(?:m|f)/(\S+)\.txt\z!o) { r301($ctx, $1, $2, 'raw'); } elsif ($path_info =~ m!$INBOX_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) { r301($ctx, $1, $2, "t$3"); # legacy convenience redirects, order still matters } elsif ($path_info =~ m!$INBOX_RE/m/(\S+)\z!o) { r301($ctx, $1, $2); } elsif ($path_info =~ m!$INBOX_RE/t/(\S+)\z!o) { r301($ctx, $1, $2, 't/#u'); } elsif ($path_info =~ m!$INBOX_RE/f/(\S+)\z!o) { r301($ctx, $1, $2); # some Message-IDs have slashes in them and the HTTP server # may try to be clever and unescape them :< } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/$END_RE\z!o) { msg_page($ctx, $1, $2, $3); # in case people leave off the trailing slash: } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/(T|t)\z!o) { r301($ctx, $1, $2, $3 eq 't' ? 't/#u' : $3); } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/f\z!o) { r301($ctx, $1, $2); } else { news_cgit_fallback($ctx); } } sub r301 { my ($ctx, $inbox, $mid_ue, $suffix) = @_; my $ibx = $ctx->{-inbox}; unless ($ibx) { my $r404 = invalid_inbox($ctx, $inbox); return $r404 if $r404; $ibx = $ctx->{-inbox}; } my $url = $ibx->base_url($ctx->{env}); my $qs = $ctx->{env}->{QUERY_STRING}; if (defined $mid_ue) { # common, and much nicer as '@' than '%40': $mid_ue =~ s/%40/@/g; $url .= $mid_ue . '/'; } $url .= $suffix if (defined $suffix); $url .= "?$qs" if $qs ne ''; [ 301, [ Location => $url, 'Content-Type' => 'text/plain' ], [ "Redirecting to $url\n" ] ] } sub msg_page { my ($ctx, $inbox, $mid_ue, $e) = @_; my $ret; $ret = invalid_inbox_mid($ctx, $inbox, $mid_ue) and return $ret; '' eq $e and return get_mid_html($ctx); 'T/' eq $e and return get_thread($ctx, 1); 't/' eq $e and return get_thread($ctx); 't.atom' eq $e and return get_thread_atom($ctx); 't.mbox' eq $e and return get_thread_mbox($ctx); 't.mbox.gz' eq $e and return get_thread_mbox($ctx, '.gz'); 'raw' eq $e and return get_mid_txt($ctx); # legacy, but no redirect for compatibility: 'f/' eq $e and return get_mid_html($ctx); r404($ctx); } sub serve_git { my ($ctx, $epoch, $path) = @_; my $env = $ctx->{env}; my $ibx = $ctx->{-inbox}; my $git = defined $epoch ? $ibx->git_epoch($epoch) : $ibx->git; $git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404(); } sub mbox_results { my ($ctx) = @_; if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) { $ctx->{-inbox}->search or return need($ctx, 'search'); require PublicInbox::SearchView; return PublicInbox::SearchView::mbox_results($ctx); } r404(); } sub serve_mbox_range { my ($ctx, $inbox, $range) = @_; invalid_inbox($ctx, $inbox) || eval { require PublicInbox::Mbox; PublicInbox::Mbox::emit_range($ctx, $range); } } sub news_www { my ($self) = @_; $self->{news_www} ||= do { require PublicInbox::NewsWWW; PublicInbox::NewsWWW->new($self->{pi_config}); } } sub cgit { my ($self) = @_; $self->{cgit} ||= do { my $pi_config = $self->{pi_config}; if (defined($pi_config->{'publicinbox.cgitrc'})) { require PublicInbox::Cgit; PublicInbox::Cgit->new($pi_config); } else { require Plack::Util; Plack::Util::inline_object(call => sub { r404() }); } } } # GET $INBOX/manifest.js.gz sub get_inbox_manifest ($$$) { my ($ctx, $inbox, $key) = @_; my $r404 = invalid_inbox($ctx, $inbox); return $r404 if $r404; require PublicInbox::ManifestJsGz; PublicInbox::ManifestJsGz::per_inbox($ctx); } sub get_attach { my ($ctx, $idx, $fn) = @_; require PublicInbox::WwwAttach; PublicInbox::WwwAttach::get_attach($ctx, $idx, $fn); } # User-generated content (UGC) may have excessively long lines # and screw up rendering on some browsers, so we use pre-wrap. # # We also force everything to the same scaled font-size because GUI # browsers (tested both Firefox and surf (webkit)) uses a larger font # for the Search
    element than the rest of the page. Font size # uniformity is important to people who rely on gigantic fonts. # Finally, we use monospace to ensure the Search field and button # has the same size and spacing as everything else which is #
    -formatted anyways.
    our $STYLE = 'pre{white-space:pre-wrap}*{font-size:100%;font-family:monospace}';
    
    sub stylesheets_prepare ($$) {
    	my ($self, $upfx) = @_;
    	my $mini = eval {
    		require CSS::Minifier;
    		sub { CSS::Minifier::minify(input => $_[0]) };
    	} || eval {
    		require CSS::Minifier::XS;
    		sub { CSS::Minifier::XS::minify($_[0]) };
    	} || sub { $_[0] };
    
    	my $css_map = {};
    	my $stylesheets = $self->{pi_config}->{css} || [];
    	my $links = [];
    	my $inline_ok = 1;
    
    	foreach my $s (@$stylesheets) {
    		my $attr = {};
    		local $_ = $s;
    		foreach my $k (qw(media title href)) {
    			if (s/\s*$k='([^']+)'// || s/\s*$k=(\S+)//) {
    				$attr->{$k} = $1;
    			}
    		}
    
    		if (defined $attr->{href}) {
    			$inline_ok = 0;
    		} else {
    			my $fn = $_;
    			my ($key) = (m!([^/]+?)(?:\.css)?\z!i);
    			if ($key !~ /\A[a-zA-Z0-9_\-\.]+\z/) {
    				warn "ignoring $fn, non-ASCII word character\n";
    				next;
    			}
    			open(my $fh, '<', $fn) or do {
    				warn "failed to open $fn: $!\n";
    				next;
    			};
    			my $ctime = 0;
    			my $local = do { local $/; <$fh> };
    			if ($local =~ /\S/) {
    				$ctime = sprintf('%x',(stat($fh))[10]);
    				$local = $mini->($local);
    			}
    
    			# do not let BOFHs override userContent.css:
    			if ($local =~ /!\s*important\b/i) {
    				warn "ignoring $fn since it uses `!important'\n";
    				next;
    			}
    
    			$css_map->{$key} = $local;
    			$attr->{href} = "$upfx$key.css?$ctime";
    			if (defined($attr->{title})) {
    				$inline_ok = 0;
    			} elsif (($attr->{media}||'screen') eq 'screen') {
    				$attr->{-inline} = $local;
    			}
    		}
    		push @$links, $attr;
    	}
    
    	my $buf = "';
    
    	if (@$links) {
    		foreach my $attr (@$links) {
    			delete $attr->{-inline};
    			$buf .= "{"-style-$upfx"} = $buf;
    	} else {
    		$self->{-style_inline} = $buf;
    	}
    	$self->{-css_map} = $css_map;
    }
    
    # returns an HTML fragment with 
    EOF
    
    $STYLE =~ s/^\s*//gm;
    $STYLE =~ tr/\n//d;
    
    sub r ($;$) {
    	my ($code, $msg) = @_;
    	$msg ||= status_message($code);
    	[ $code, [ qw(Content-Type text/plain), 'Content-Length', length($msg),
    		@NO_CACHE ],
    	  [ $msg ] ]
    }
    
    sub getline_response ($$$$$) {
    	my ($env, $in, $off, $len, $path) = @_;
    	my $r = bless {}, __PACKAGE__;
    	if ($env->{'pi-httpd.async'}) { # public-inbox-httpd-only mode
    		$env->{'psgix.no-compress'} = 1; # do not chunk response
    		%$r = ( bypass => [$in, $off, $len, $env->{'psgix.io'}] );
    	} else {
    		%$r = ( in => $in, off => $off, len => $len, path => $path );
    	}
    	$r;
    }
    
    sub setup_range {
    	my ($env, $in, $h, $beg, $end, $size) = @_;
    	my $code = 200;
    	my $len = $size;
    	if ($beg eq '') {
    		if ($end ne '') { # "bytes=-$end" => last N bytes
    			$beg = $size - $end;
    			$beg = 0 if $beg < 0;
    			$end = $size - 1;
    			$code = 206;
    		} else {
    			$code = 416;
    		}
    	} else {
    		if ($beg > $size) {
    			$code = 416;
    		} elsif ($end eq '' || $end >= $size) {
    			$end = $size - 1;
    			$code = 206;
    		} elsif ($end < $size) {
    			$code = 206;
    		} else {
    			$code = 416;
    		}
    	}
    	if ($code == 206) {
    		$len = $end - $beg + 1;
    		if ($len <= 0) {
    			$code = 416;
    		} else {
    			push @$h, qw(Accept-Ranges bytes Content-Range);
    			push @$h, "bytes $beg-$end/$size";
    
    			# FIXME: Plack::Middleware::Deflater bug?
    			$env->{'psgix.no-compress'} = 1;
    		}
    	}
    	if ($code == 416) {
    		push @$h, 'Content-Range', "bytes */$size";
    		return [ 416, $h, [] ];
    	}
    	($code, $beg, $len);
    }
    
    # returns a PSGI arrayref response iff .gz and non-.gz mtimes match
    sub try_gzip_static ($$$$) {
    	my ($env, $h, $path, $type) = @_;
    	return unless ($env->{HTTP_ACCEPT_ENCODING} // '') =~ /\bgzip\b/i;
    	my $mtime;
    	return unless -f $path && defined(($mtime = (stat(_))[9]));
    	my $gz = "$path.gz";
    	return unless -f $gz && (stat(_))[9] == $mtime;
    	my $res = response($env, $h, $gz, $type);
    	return if ($res->[0] > 300 || $res->[0] < 200);
    	push @{$res->[1]}, qw(Cache-Control no-transform
    				Content-Encoding gzip
    				Vary Accept-Encoding);
    	$res;
    }
    
    sub response ($$$;$) {
    	my ($env, $h, $path, $type) = @_;
    	$type //= Plack::MIME->mime_type($path) // 'application/octet-stream';
    	if ($path !~ /\.gz\z/i) {
    		if (my $res = try_gzip_static($env, $h, $path, $type)) {
    			return $res;
    		}
    	}
    
    	my $in;
    	if ($env->{REQUEST_METHOD} eq 'HEAD') {
    		return r(404) unless -f $path && -r _; # in case it's a FIFO :P
    	} else { # GET, callers should've already filtered out other methods
    		if (!sysopen($in, $path, O_RDONLY|O_NONBLOCK)) {
    			return r(404) if $! == ENOENT || $! == ENOTDIR;
    			return r(403) if $! == EACCES;
    			return r(500);
    		}
    		return r(404) unless -f $in;
    	}
    	my $size = -s _; # bare "_" reuses "struct stat" from "-f" above
    	my $mtime = time2str((stat(_))[9]);
    
    	if (my $ims = $env->{HTTP_IF_MODIFIED_SINCE}) {
    		return [ 304, [], [] ] if $mtime eq $ims;
    	}
    
    	my $len = $size;
    	my $code = 200;
    	push @$h, 'Content-Type', $type;
    	my $off = 0;
    	if (($env->{HTTP_RANGE} || '') =~ /\bbytes=([0-9]*)-([0-9]*)\z/) {
    		($code, $off, $len) = setup_range($env, $in, $h, $1, $2, $size);
    		return $code if ref($code);
    	}
    	push @$h, 'Content-Length', $len, 'Last-Modified', $mtime;
    	[ $code, $h, $in ? getline_response($env, $in, $off, $len, $path) : [] ]
    }
    
    # called by PSGI servers on each response chunk:
    sub getline {
    	my ($self) = @_;
    
    	# avoid buffering, by becoming the buffer! (public-inbox-httpd)
    	if (my $tmpio = delete $self->{bypass}) {
    		my $http = pop @$tmpio; # PublicInbox::HTTP
    		push @{$http->{wbuf}}, $tmpio; # [ $in, $off, $len ]
    		$http->flush_write;
    		return; # undef, EOF
    	}
    
    	# generic PSGI runs this:
    	my $len = $self->{len} or return; # undef, tells server we're done
    	my $n = 8192;
    	$n = $len if $len < $n;
    	sysseek($self->{in}, $self->{off}, SEEK_SET) or
    			die "sysseek ($self->{path}): $!";
    	my $r = sysread($self->{in}, my $buf, $n);
    	if (defined $r && $r > 0) { # success!
    		$self->{len} = $len - $r;
    		$self->{off} += $r;
    		return $buf;
    	}
    	my $m = defined $r ? "EOF with $len bytes left" : "read error: $!";
    	die "$self->{path} $m, dropping client socket\n";
    }
    
    sub close {} # noop, called by PSGI server, just let everything go out-of-scope
    
    # OO interface for use as a Plack app
    sub new {
    	my ($class, %opt) = @_;
    	my $index = $opt{'index'} // [ 'index.html' ];
    	$index = [ $index ] if defined($index) && ref($index) ne 'ARRAY';
    	$index = undef if scalar(@$index) == 0;
    	my $style = $opt{style};
    	if (defined $style) {
    		$style = \$style unless ref($style);
    	}
    	my $docroot = $opt{docroot};
    	die "`docroot' not set" unless defined($docroot) && $docroot ne '';
    	bless {
    		docroot => $docroot,
    		index => $index,
    		autoindex => $opt{autoindex},
    		style => $style // \$STYLE,
    	}, $class;
    }
    
    # PATH_INFO is decoded, and we want the undecoded original
    my %path_re_cache;
    sub path_info_raw ($) {
    	my ($env) = @_;
    	my $sn = $env->{SCRIPT_NAME};
    	my $re = $path_re_cache{$sn} ||= do {
    		$sn = '/'.$sn unless index($sn, '/') == 0;
    		$sn =~ s!/\z!!;
    		qr!\A(?:https?://[^/]+)?\Q$sn\E(/[^\?\#]+)!;
    	};
    	$env->{REQUEST_URI} =~ $re ? $1 : $env->{PATH_INFO};
    }
    
    sub redirect_slash ($) {
    	my ($env) = @_;
    	my $url = $env->{'psgi.url_scheme'} . '://';
    	my $host_port = $env->{HTTP_HOST} //
    		"$env->{SERVER_NAME}:$env->{SERVER_PORT}";
    	$url .= $host_port . path_info_raw($env) . '/';
    	my $body = "Redirecting to $url\n";
    	[ 302, [ qw(Content-Type text/plain), 'Location', $url,
    		'Content-Length', length($body) ], [ $body ] ]
    }
    
    sub human_size ($) {
    	my ($size) = @_;
    	my $suffix = '';
    	for my $s (qw(K M G T P)) {
    		last if $size < 1024;
    		$size /= 1024;
    		if ($size <= 1024) {
    			$suffix = $s;
    			last;
    		}
    	}
    	sprintf('%lu', $size).$suffix;
    }
    
    # by default, this returns "index.html" if it exists for a given directory
    # It'll generate a directory listing, (autoindex).
    # May be disabled by setting autoindex => 0
    sub dir_response ($$$) {
    	my ($self, $env, $fs_path) = @_;
    	if (my $index = $self->{'index'}) { # serve index.html or similar
    		for my $html (@$index) {
    			my $p = $fs_path . $html;
    			my $res = response($env, [], $p);
    			return $res if $res->[0] != 404;
    		}
    	}
    	return r(404) unless $self->{autoindex};
    	opendir(my $dh, $fs_path) or do {
    		return r(404) if ($! == ENOENT || $! == ENOTDIR);
    		return r(403) if $! == EACCES;
    		return r(500);
    	};
    	my @entries = grep(!/\A\./, readdir($dh));
    	$dh = undef;
    	my (%dirs, %other, %want_gz);
    	my $path_info = $env->{PATH_INFO};
    	push @entries, '..' if $path_info ne '/';
    	for my $base (@entries) {
    		my $href = ascii_html(uri_escape_utf8($base));
    		my $name = ascii_html($base);
    		my @st = stat($fs_path . $base) or next; # unlikely
    		my ($gzipped, $uncompressed, $hsize);
    		my $entry = '';
    		my $mtime = $st[9];
    		if (-d _) {
    			$href .= '/';
    			$name .= '/';
    			$hsize = '-';
    			$dirs{"$base\0$mtime"} = \$entry;
    		} elsif (-f _) {
    			$other{"$base\0$mtime"} = \$entry;
    			if ($base !~ /\.gz\z/i) {
    				$want_gz{"$base.gz\0$mtime"} = undef;
    			}
    			$hsize = human_size($st[7]);
    		} else {
    			next;
    		}
    		# 54 = 80 - (SP length(strftime(%Y-%m-%d %k:%M)) SP human_size)
    		$hsize = sprintf('% 8s', $hsize);
    		my $pad = 54 - length($name);
    		$pad = 1 if $pad <= 0;
    		$entry .= qq($name) . (' ' x $pad);
    		$mtime = strftime('%Y-%m-%d %k:%M', gmtime($mtime));
    		$entry .= $mtime . $hsize;
    	}
    
    	# filter out '.gz' files as long as the mtime matches the
    	# uncompressed version
    	delete(@other{keys %want_gz});
    	@entries = ((map { ${$dirs{$_}} } sort keys %dirs),
    			(map { ${$other{$_}} } sort keys %other));
    
    	my $path_info_html = ascii_html($path_info);
    	my $h = [qw(Content-Type text/html Content-Length), undef];
    	my $gzf = gzf_maybe($h, $env);
    	$gzf->zmore("Index of $path_info_html" .
    		${$self->{style}} .
    		"
    Index of $path_info_html

    \n");
    	$gzf->zmore(join("\n", @entries));
    	my $out = $gzf->zflush("

    \n"); $h->[3] = bytes::length($out); [ 200, $h, [ $out ] ] } sub call { # PSGI app endpoint my ($self, $env) = @_; return r(405) if $env->{REQUEST_METHOD} !~ /\A(?:GET|HEAD)\z/; my $path_info = $env->{PATH_INFO}; return r(403) if index($path_info, "\0") >= 0; my (@parts) = split(m!/+!, $path_info, -1); return r(403) if grep(/\A(?:\.\.)\z/, @parts) || $parts[0] ne ''; my $fs_path = join('/', $self->{docroot}, @parts); return dir_response($self, $env, $fs_path) if $parts[-1] eq ''; my $res = response($env, [], $fs_path); $res->[0] == 404 && -d $fs_path ? redirect_slash($env) : $res; } 1; public-inbox-1.6.1/lib/PublicInbox/WwwStream.pm000066400000000000000000000146421377346120300214110ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # # HTML body stream for which yields getline+close methods for # generic PSGI servers and callbacks for public-inbox-httpd. # # See PublicInbox::GzipFilter parent class for more info. package PublicInbox::WwwStream; use strict; use parent qw(Exporter PublicInbox::GzipFilter); our @EXPORT_OK = qw(html_oneshot); use bytes (); # length use PublicInbox::Hval qw(ascii_html prurl ts2str); our $TOR_URL = 'https://www.torproject.org/'; our $CODE_URL = 'https://public-inbox.org/public-inbox.git'; sub base_url ($) { my $ctx = shift; my $base_url = $ctx->{-inbox}->base_url($ctx->{env}); chop $base_url; # no trailing slash for clone $base_url; } sub init { my ($ctx, $cb) = @_; $ctx->{cb} = $cb; $ctx->{base_url} = base_url($ctx); bless $ctx, __PACKAGE__; } sub async_eml { # for async_blob_cb my ($ctx, $eml) = @_; $ctx->{http_out}->write($ctx->translate($ctx->{cb}->($ctx, $eml))); } sub html_top ($) { my ($ctx) = @_; my $ibx = $ctx->{-inbox}; my $desc = ascii_html($ibx->description); my $title = delete($ctx->{-title_html}) // $desc; my $upfx = $ctx->{-upfx} || ''; my $help = $upfx.'_/text/help'; my $color = $upfx.'_/text/color'; my $atom = $ctx->{-atom} || $upfx.'new.atom'; my $top = "$desc"; if (my $t_max = $ctx->{-t_max}) { $t_max = ts2str($t_max); $top = qq($top); # we had some kind of query, link to /$INBOX/?t=YYYYMMDDhhmmss } elsif ($ctx->{qp}->{t}) { $top = qq($top); } my $links = qq(help / ). qq(color / ). qq(mirror / ). qq(Atom feed); if ($ibx->search) { my $q_val = delete($ctx->{-q_value_html}) // ''; $q_val = qq(\nvalue="$q_val") if $q_val ne ''; # XXX gross, for SearchView.pm my $extra = delete($ctx->{-extra_form_html}) // ''; my $action = $upfx eq '' ? './' : $upfx; $top = qq{
    $top} .
    			  qq{\n} .
    			  $extra .
    			  qq{} .
    			  ' ' . $links .
    			  q{
    } } else { $top = '
    ' . $top . "\n" . $links . '
    '; } "$title" . qq() . $ctx->{www}->style($upfx) . ''. $top . (delete($ctx->{-html_tip}) // ''); } sub coderepos ($) { my ($ctx) = @_; my $ibx = $ctx->{-inbox}; my @ret; if (defined(my $cr = $ibx->{coderepo})) { my $cfg = $ctx->{www}->{pi_config}; my $env = $ctx->{env}; for my $cr_name (@$cr) { my $urls = $cfg->{"coderepo.$cr_name.cgiturl"}; if ($urls) { $ret[0] //= <$u) } sub _html_end { my ($ctx) = @_; my $urls = <This inbox may be cloned and mirrored by anyone: EOF my $ibx = $ctx->{-inbox}; my $desc = ascii_html($ibx->description); my @urls; my $http = $ctx->{base_url}; my $max = $ibx->max_git_epoch; my $dir = (split(m!/!, $http))[-1]; my %seen = ($http => 1); if (defined($max)) { # v2 for my $i (0..$max) { # old epochs my be deleted: -d "$ibx->{inboxdir}/git/$i.git" or next; my $url = "$http/$i"; $seen{$url} = 1; push @urls, "$url $dir/git/$i.git"; } my $nr = scalar(@urls); if ($nr > 1) { $urls .= "\n\t# this inbox consists of $nr epochs:"; $urls[0] .= "\t# oldest"; $urls[-1] .= "\t# newest"; } } else { # v1 push @urls, $http; } # FIXME: epoch splits can be different in other repositories, # use the "cloneurl" file as-is for now: foreach my $u (@{$ibx->cloneurl}) { next if $seen{$u}++; push @urls, $u =~ /\Ahttps?:/ ? qq($u) : $u; } $urls .= "\n" . join('', map { "\tgit clone --mirror $_\n" } @urls); my $addrs = $ibx->{address}; $addrs = join(' ', @$addrs) if ref($addrs) eq 'ARRAY'; my $v = defined $max ? '-V2' : '-V1'; $urls .= <{name} $dir/ $http \\ $addrs public-inbox-index $dir EOF my $cfg_link = ($ctx->{-upfx} // '').'_/text/config/raw'; $urls .= <config snippet for mirrors. EOF my @nntp = map { qq($_) } @{$ibx->nntp_url}; if (@nntp) { $urls .= @nntp == 1 ? 'Newsgroup' : 'Newsgroups are'; $urls .= ' available over NNTP:'; $urls .= "\n\t" . join("\n\t", @nntp) . "\n"; } if ($urls =~ m!\b[^:]+://\w+\.onion/!) { $urls .= " note: .onion URLs require Tor: "; $urls .= qq[$TOR_URL]; } '
    '.join("\n\n",
    		$desc,
    		$urls,
    		coderepos($ctx),
    		code_footer($ctx->{env})
    	).'
    '; } # callback for HTTP.pm (and any other PSGI servers) sub getline { my ($ctx) = @_; my $cb = $ctx->{cb} or return; while (defined(my $x = $cb->($ctx))) { # x = smsg or scalar non-ref if (ref($x)) { # smsg my $eml = $ctx->{-inbox}->smsg_eml($x) or next; $ctx->{smsg} = $x; return $ctx->translate($cb->($ctx, $eml)); } else { # scalar return $ctx->translate($x); } } delete $ctx->{cb}; $ctx->zflush(_html_end($ctx)); } sub html_oneshot ($$;$) { my ($ctx, $code, $sref) = @_; my $res_hdr = [ 'Content-Type' => 'text/html; charset=UTF-8', 'Content-Length' => undef ]; bless $ctx, __PACKAGE__; $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res_hdr, $ctx->{env}); $ctx->{base_url} //= do { $ctx->zmore(html_top($ctx)); base_url($ctx); }; $ctx->zmore($$sref) if $sref; my $bdy = $ctx->zflush(_html_end($ctx)); $res_hdr->[3] = bytes::length($bdy); [ $code, $res_hdr, [ $bdy ] ] } sub async_next ($) { my ($http) = @_; # PublicInbox::HTTP my $ctx = $http->{forward} or return; eval { if (my $smsg = $ctx->{smsg} = $ctx->{cb}->($ctx)) { $ctx->smsg_blob($smsg); } else { $ctx->{http_out}->write( $ctx->translate(_html_end($ctx))); $ctx->close; # GzipFilter->close } }; warn "E: $@" if $@; } sub aresponse { my ($ctx, $code, $cb) = @_; my $res_hdr = [ 'Content-Type' => 'text/html; charset=UTF-8' ]; init($ctx, $cb); $ctx->psgi_response($code, $res_hdr); } 1; public-inbox-1.6.1/lib/PublicInbox/WwwText.pm000066400000000000000000000224551377346120300211030ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # used for displaying help texts and other non-mail content package PublicInbox::WwwText; use strict; use warnings; use bytes (); # only for bytes::length use PublicInbox::Linkify; use PublicInbox::WwwStream; use PublicInbox::Hval qw(ascii_html); use URI::Escape qw(uri_escape_utf8); use PublicInbox::GzipFilter qw(gzf_maybe); our $QP_URL = 'https://xapian.org/docs/queryparser.html'; our $WIKI_URL = 'https://en.wikipedia.org/wiki'; my $hl = eval { require PublicInbox::HlMod; PublicInbox::HlMod->new }; # /$INBOX/_/text/$KEY/ # KEY may contain slashes # For now, "help" is the only supported $KEY sub get_text { my ($ctx, $key) = @_; my $code = 200; $key = 'help' if !defined $key; # this 302s to _/text/help/ # get the raw text the same way we get mboxrds my $raw = ($key =~ s!/raw\z!!); my $have_tslash = ($key =~ s!/\z!!) if !$raw; my $txt = ''; my $hdr = [ 'Content-Type', 'text/plain', 'Content-Length', undef ]; if (!_default_text($ctx, $key, $hdr, \$txt)) { $code = 404; $txt = "404 Not Found ($key)\n"; } my $env = $ctx->{env}; if ($raw) { if ($code == 200) { my $gzf = gzf_maybe($hdr, $env); $txt = $gzf->translate($txt); $txt .= $gzf->zflush; } $hdr->[3] = bytes::length($txt); return [ $code, $hdr, [ $txt ] ] } # enforce trailing slash for "wget -r" compatibility if (!$have_tslash && $code == 200) { my $url = $ctx->{-inbox}->base_url($env); $url .= "_/text/$key/"; return [ 302, [ 'Content-Type', 'text/plain', 'Location', $url ], [ "Redirecting to $url\n" ] ]; } # Follow git commit message conventions, # first line is the Subject/title my ($title) = ($txt =~ /\A([^\n]*)/s); $ctx->{-title_html} = ascii_html($title); my $nslash = ($key =~ tr!/!/!); $ctx->{-upfx} = '../../../' . ('../' x $nslash); my $l = PublicInbox::Linkify->new; $l->linkify_1($txt); if ($hl) { $hl->do_hl_text(\$txt); } else { $txt = ascii_html($txt); } $txt = '
    ' . $l->linkify_2($txt) . '
    '; PublicInbox::WwwStream::html_oneshot($ctx, $code, \$txt); } sub _srch_prefix ($$) { my ($srch, $txt) = @_; my $pad = 0; my $htxt = ''; my $help = $srch->help; my $i; for ($i = 0; $i < @$help; $i += 2) { my $pfx = $help->[$i]; my $n = length($pfx); $pad = $n if $n > $pad; $htxt .= $pfx . "\0"; $htxt .= $help->[$i + 1]; $htxt .= "\f\n"; } $pad += 2; my $padding = ' ' x ($pad + 8); $htxt =~ s/^/$padding/gms; $htxt =~ s/^$padding(\S+)\0/" $1". (' ' x ($pad - length($1)))/egms; $htxt =~ s/\f\n/\n/gs; $$txt .= $htxt; 1; } sub _colors_help ($$) { my ($ctx, $txt) = @_; my $ibx = $ctx->{-inbox}; my $env = $ctx->{env}; my $base_url = $ibx->base_url($env); $$txt .= "color customization for $base_url\n"; $$txt .= <{-inbox}; push @$hdr, 'Content-Disposition', 'inline; filename=inbox.config'; my $name = dq_escape($ibx->{name}); my $inboxdir = '/path/to/top-level-inbox'; my $base_url = $ibx->base_url($ctx->{env}); $$txt .= <{$k}) or next; $$txt .= "\t$k = $_\n" for @$v; } if (my $altid = $ibx->{altid}) { my $altid_map = $ibx->altid_map; $$txt .= <{$k}) or next; $$txt .= "\t$k = $v\n"; } $$txt .= "\tnntpmirror = $_\n" for (@{$ibx->nntp_url}); # note: this doesn't preserve cgitrc layout, since we parse cgitrc # and drop the original structure if (defined(my $cr = $ibx->{coderepo})) { $$txt .= "\tcoderepo = $_\n" for @$cr; $$txt .= <<'EOF'; ; `coderepo' entries allows blob reconstruction via patch emails if ; the inbox is indexed with Xapian. `@@ @@' ; line number ranges in `[PATCH]' emails link to /$INBOX_NAME/$OID/s/, ; an HTTP endpoint which reconstructs git blobs via git-apply(1). EOF my $pi_config = $ctx->{www}->{pi_config}; for my $cr_name (@$cr) { my $urls = $pi_config->{"coderepo.$cr_name.cgiturl"}; my $path = "/path/to/$cr_name"; $cr_name = dq_escape($cr_name); $$txt .= qq([coderepo "$cr_name"]\n); if ($urls && scalar(@$urls)) { $$txt .= "\t; "; $$txt .= join(" ||\n\t;\t", map {; my $dst = $path; if ($path !~ m![a-z0-9_/\.\-]!i) { $dst = '"'.dq_escape($dst).'"'; } qq(git clone $_ $dst); } @$urls); $$txt .= "\n"; } $$txt .= "\tdir = $path\n"; $$txt .= "\tcgiturl = https://example.com/"; $$txt .= uri_escape_utf8($cr_name, URI_PATH)."\n"; } } 1; } sub _default_text ($$$$) { my ($ctx, $key, $hdr, $txt) = @_; return _colors_help($ctx, $txt) if $key eq 'color'; return inbox_config($ctx, $hdr, $txt) if $key eq 'config'; return if $key ne 'help'; # TODO more keys? my $ibx = $ctx->{-inbox}; my $base_url = $ibx->base_url($ctx->{env}); $$txt .= "public-inbox help for $base_url\n"; $$txt .= <') into the URL. Forward slash ('/') characters in the Message-IDs need to be escaped as "%2F" (without quotes). Thus, it is possible to retrieve any message by its Message-ID by going to: $base_url/ (without the '<' or '>') Message-IDs are described at: $WIKI_URL/Message-ID EOF # n.b. we use the Xapian DB for any regeneratable, # order-of-arrival-independent data. my $srch = $ibx->search; if ($srch) { $$txt .= <over; if ($over) { $$txt .= </T/#u Loads the thread belonging to the given in flat chronological order. The "#u" anchor focuses the browser on the given . * $base_url/t/#u Loads the thread belonging to the given in threaded order with nesting. For deep threads, this requires a wide display or horizontal scrolling. Both of these HTML endpoints are suitable for offline reading using the thread overview at the bottom of each page. Users of feed readers may follow a particular thread using: * $base_url/t.atom Which loads the thread in Atom Syndication Standard described at Wikipedia and RFC4287: $WIKI_URL/Atom_(standard) https://tools.ietf.org/html/rfc4287 Atom Threading Extensions (RFC4685) is supported: https://tools.ietf.org/html/rfc4685 Finally, the gzipped mbox for a thread is available for downloading and importing into your favorite mail client: * $base_url/t.mbox.gz We use the mboxrd variant of the mbox format described at: $WIKI_URL/Mbox EOF } # $over $$txt .= < # License: AGPL-3.0+ package PublicInbox::Xapcmd; use strict; use PublicInbox::Spawn qw(which popen_rd nodatacow_dir); use PublicInbox::Admin qw(setup_signals); use PublicInbox::Over; use PublicInbox::SearchIdx; use File::Temp 0.19 (); # ->newdir use File::Path qw(remove_tree); use File::Basename qw(dirname); use POSIX qw(WNOHANG); # support testing with dev versions of Xapian which installs # commands with a version number suffix (e.g. "xapian-compact-1.5") our $XAPIAN_COMPACT = $ENV{XAPIAN_COMPACT} || 'xapian-compact'; our @COMPACT_OPT = qw(jobs|j=i quiet|q blocksize|b=s no-full|n fuller|F); sub commit_changes ($$$$) { my ($ibx, $im, $tmp, $opt) = @_; my $reshard = $opt->{reshard}; $SIG{INT} or die 'BUG: $SIG{INT} not handled'; my @old_shard; my $over_chg; while (my ($old, $newdir) = each %$tmp) { next if $old eq ''; # no invalid paths my @st = stat($old); if (!@st && !defined($opt->{reshard})) { die "failed to stat($old): $!"; } my $new = $newdir->dirname if defined($newdir); my $over = "$old/over.sqlite3"; if (-f $over) { # only for v1, v2 over is untouched defined $new or die "BUG: $over exists when culling v2"; $over = PublicInbox::Over->new($over); my $tmp_over = "$new/over.sqlite3"; $over->dbh->sqlite_backup_to_file($tmp_over); $over = undef; $over_chg = 1; } if (!defined($new)) { # culled shard push @old_shard, $old; next; } if (@st) { chmod($st[2] & 07777, $new) or die "chmod $old: $!\n"; rename($old, "$new/old") or die "rename $old => $new/old: $!\n"; } rename($new, $old) or die "rename $new => $old: $!\n"; if (@st) { my $prev = "$old/old"; remove_tree($prev) or die "failed to remove $prev: $!\n"; } } # trigger ->check_inodes in read-only daemons syswrite($im->{lockfh}, '.') if $over_chg; remove_tree(@old_shard); $tmp = undef; if (!$opt->{-coarse_lock}) { $opt->{-skip_lock} = 1; if ($im->can('count_shards')) { my $pr = $opt->{-progress}; my $n = $im->count_shards; if (defined $reshard && $n != $reshard) { die "BUG: counted $n shards after resharding to $reshard"; } my $prev = $im->{shards}; if ($pr && $prev != $n) { $pr->("shard count changed: $prev => $n\n"); $im->{shards} = $n; } } my $env = $opt->{-idx_env}; local %ENV = (%ENV, %$env) if $env; PublicInbox::Admin::index_inbox($ibx, $im, $opt); } } sub cb_spawn { my ($cb, $args, $opt) = @_; # $cb = cpdb() or compact() defined(my $pid = fork) or die "fork: $!"; return $pid if $pid > 0; $cb->($args, $opt); POSIX::_exit(0); } sub runnable_or_die ($) { my ($exe) = @_; which($exe) or die "$exe not found in PATH\n"; } sub prepare_reindex ($$$) { my ($ibx, $im, $opt) = @_; if ($ibx->version == 1) { my $dir = $ibx->search->xdir(1); my $xdb = $PublicInbox::Search::X{Database}->new($dir); if (my $lc = $xdb->get_metadata('last_commit')) { $opt->{reindex}->{from} = $lc; } } else { # v2 my $max; $im->git_dir_latest(\$max) or return; my $from = $opt->{reindex}->{from}; my $mm = $ibx->mm; my $v = PublicInbox::Search::SCHEMA_VERSION(); foreach my $i (0..$max) { $from->[$i] = $mm->last_commit_xap($v, $i); } } } sub same_fs_or_die ($$) { my ($x, $y) = @_; return if ((stat($x))[0] == (stat($y))[0]); # 0 - st_dev die "$x and $y reside on different filesystems\n"; } sub kill_pids { my ($sig, $pids) = @_; kill($sig, keys %$pids); # pids may be empty } sub process_queue { my ($queue, $cb, $opt) = @_; my $max = $opt->{jobs} // scalar(@$queue); if ($max <= 1) { while (defined(my $args = shift @$queue)) { $cb->($args, $opt); } return; } # run in parallel: my %pids; local %SIG = %SIG; setup_signals(\&kill_pids, \%pids); while (@$queue) { while (scalar(keys(%pids)) < $max && scalar(@$queue)) { my $args = shift @$queue; $pids{cb_spawn($cb, $args, $opt)} = $args; } my $flags = 0; while (scalar keys %pids) { my $pid = waitpid(-1, $flags) or last; last if $pid < 0; my $args = delete $pids{$pid}; if ($args) { die join(' ', @$args)." failed: $?\n" if $?; } else { warn "unknown PID($pid) reaped: $?\n"; } $flags = WNOHANG if scalar(@$queue); } } } sub prepare_run { my ($ibx, $opt) = @_; my $tmp = {}; # old shard dir => File::Temp->newdir object or undef my @queue; # ([old//src,newdir]) - list of args for cpdb() or compact() my $old; if (my $srch = $ibx->search) { $old = $srch->xdir(1); -d $old or die "$old does not exist\n"; } my $reshard = $opt->{reshard}; if (defined $reshard && $reshard <= 0) { die "--reshard must be a positive number\n"; } # we want temporary directories to be as deep as possible, # so v2 shards can keep "xap$SCHEMA_VERSION" on a separate FS. if ($old && $ibx->version == 1) { if (defined $reshard) { warn "--reshard=$reshard ignored for v1 $ibx->{inboxdir}\n"; } my $dir = dirname($old); same_fs_or_die($dir, $old); my $v = PublicInbox::Search::SCHEMA_VERSION(); my $wip = File::Temp->newdir("xapian$v-XXXXXXXX", DIR => $dir); $tmp->{$old} = $wip; nodatacow_dir($wip->dirname); push @queue, [ $old, $wip ]; } elsif ($old) { opendir my $dh, $old or die "Failed to opendir $old: $!\n"; my @old_shards; while (defined(my $dn = readdir($dh))) { if ($dn =~ /\A[0-9]+\z/) { push @old_shards, $dn; } elsif ($dn eq '.' || $dn eq '..') { } elsif ($dn =~ /\Aover\.sqlite3/) { } else { warn "W: skipping unknown dir: $old/$dn\n" } } die "No Xapian shards found in $old\n" unless @old_shards; my ($src, $max_shard); if (!defined($reshard) || $reshard == scalar(@old_shards)) { # 1:1 copy $max_shard = scalar(@old_shards) - 1; } else { # M:N copy $max_shard = $reshard - 1; $src = [ map { "$old/$_" } @old_shards ]; } foreach my $dn (0..$max_shard) { my $tmpl = "$dn-XXXXXXXX"; my $wip = File::Temp->newdir($tmpl, DIR => $old); same_fs_or_die($old, $wip->dirname); my $cur = "$old/$dn"; push @queue, [ $src // $cur , $wip ]; nodatacow_dir($wip->dirname); $tmp->{$cur} = $wip; } # mark old shards to be unlinked if ($src) { $tmp->{$_} ||= undef for @$src; } } ($tmp, \@queue); } sub check_compact () { runnable_or_die($XAPIAN_COMPACT) } sub _run { my ($ibx, $cb, $opt) = @_; my $im = $ibx->importer(0); $im->lock_acquire; my ($tmp, $queue) = prepare_run($ibx, $opt); # fine-grained locking if we prepare for reindex if (!$opt->{-coarse_lock}) { prepare_reindex($ibx, $im, $opt); $im->lock_release; } $ibx->cleanup; process_queue($queue, $cb, $opt); $im->lock_acquire if !$opt->{-coarse_lock}; commit_changes($ibx, $im, $tmp, $opt); } sub run { my ($ibx, $task, $opt) = @_; # task = 'cpdb' or 'compact' my $cb = \&$task; PublicInbox::Admin::progress_prepare($opt ||= {}); defined(my $dir = $ibx->{inboxdir}) or die "no inboxdir defined\n"; -d $dir or die "inboxdir=$dir does not exist\n"; check_compact() if $opt->{compact} && $ibx->search; if (!$opt->{-coarse_lock}) { # per-epoch ranges for v2 # v1:{ from => $OID }, v2:{ from => [ $OID, $OID, $OID ] } } $opt->{reindex} = { from => $ibx->version == 1 ? '' : [] }; PublicInbox::SearchIdx::load_xapian_writable(); } local %SIG = %SIG; setup_signals(); $ibx->umask_prepare; $ibx->with_umask(\&_run, $ibx, $cb, $opt); } sub cpdb_retryable ($$) { my ($src, $pfx) = @_; if (ref($@) =~ /\bDatabaseModifiedError\b/) { warn "$pfx Xapian DB modified, reopening and retrying\n"; $src->reopen; return 1; } if ($@) { warn "$pfx E: ", ref($@), "\n"; die; } 0; } sub progress_pfx ($) { my ($wip) = @_; # tempdir v2: ([0-9])+-XXXXXXXX my @p = split('/', $wip); # return "xap15/0" for v2, or "xapian15" for v1: ($p[-1] =~ /\A([0-9]+)/) ? "$p[-2]/$1" : $p[-1]; } sub kill_compact { # setup_signals callback my ($sig, $pidref) = @_; kill($sig, $$pidref) if defined($$pidref); } # xapian-compact wrapper sub compact ($$) { my ($args, $opt) = @_; my ($src, $newdir) = @$args; my $dst = ref($newdir) ? $newdir->dirname : $newdir; my $pfx = $opt->{-progress_pfx} ||= progress_pfx($src); my $pr = $opt->{-progress}; my $rdr = {}; foreach my $fd (0..2) { defined(my $dfd = $opt->{$fd}) or next; $rdr->{$fd} = $dfd; } # we rely on --no-renumber to keep docids synched to NNTP my $cmd = [ $XAPIAN_COMPACT, '--no-renumber' ]; for my $sw (qw(no-full fuller)) { push @$cmd, "--$sw" if $opt->{$sw}; } for my $sw (qw(blocksize)) { defined(my $v = $opt->{$sw}) or next; push @$cmd, "--$sw", $v; } $pr->("$pfx `".join(' ', @$cmd)."'\n") if $pr; push @$cmd, $src, $dst; my ($rd, $pid); local %SIG = %SIG; setup_signals(\&kill_compact, \$pid); ($rd, $pid) = popen_rd($cmd, undef, $rdr); while (<$rd>) { if ($pr) { s/\r/\r$pfx /g; $pr->("$pfx $_"); } } waitpid($pid, 0); die "@$cmd failed: \$?=$?\n" if $?; } sub cpdb_loop ($$$;$$) { my ($src, $dst, $pr_data, $cur_shard, $reshard) = @_; my ($pr, $fmt, $nr, $pfx); if ($pr_data) { $pr = $pr_data->{pr}; $fmt = $pr_data->{fmt}; $nr = \($pr_data->{nr}); $pfx = $pr_data->{pfx}; } my ($it, $end); do { eval { $it = $src->postlist_begin(''); $end = $src->postlist_end(''); }; } while (cpdb_retryable($src, $pfx)); do { eval { for (; $it != $end; $it++) { my $docid = $it->get_docid; if (defined $reshard) { my $dst_shard = $docid % $reshard; next if $dst_shard != $cur_shard; } my $doc = $src->get_document($docid); $dst->replace_document($docid, $doc); if ($pr_data && !(++$$nr & 1023)) { $pr->(sprintf($fmt, $$nr)); } } # unlike copydatabase(1), we don't copy spelling # and synonym data (or other user metadata) since # the Perl APIs don't expose iterators for them # (and public-inbox does not use those features) }; } while (cpdb_retryable($src, $pfx)); } # Like copydatabase(1), this is horribly slow; and it doesn't seem due # to the overhead of Perl. sub cpdb ($$) { my ($args, $opt) = @_; my ($old, $newdir) = @$args; my $new = $newdir->dirname; my ($src, $cur_shard); my $reshard; PublicInbox::SearchIdx::load_xapian_writable() or die; my $XapianDatabase = $PublicInbox::Search::X{Database}; if (ref($old) eq 'ARRAY') { ($cur_shard) = ($new =~ m!xap[0-9]+/([0-9]+)\b!); defined $cur_shard or die "BUG: could not extract shard # from $new"; $reshard = $opt->{reshard}; defined $reshard or die 'BUG: got array src w/o --reshard'; # resharding, M:N copy means have full read access foreach (@$old) { if ($src) { my $sub = $XapianDatabase->new($_); $src->add_database($sub); } else { $src = $XapianDatabase->new($_); } } } else { $src = $XapianDatabase->new($old); } my ($tmp, $ft); local %SIG = %SIG; if ($opt->{compact}) { my $dir = dirname($new); same_fs_or_die($dir, $new); $ft = File::Temp->newdir("$new.compact-XXXXXX", DIR => $dir); setup_signals(); $tmp = $ft->dirname; nodatacow_dir($tmp); } else { $tmp = $new; } # like copydatabase(1), be sure we don't overwrite anything in case # of other bugs: my $flag = eval($PublicInbox::Search::Xap.'::DB_CREATE()'); die if $@; my $XapianWritableDatabase = $PublicInbox::Search::X{WritableDatabase}; $flag |= $PublicInbox::SearchIdx::DB_NO_SYNC if !$opt->{fsync}; my $dst = $XapianWritableDatabase->new($tmp, $flag); my $pr = $opt->{-progress}; my $pfx = $opt->{-progress_pfx} = progress_pfx($new); my $pr_data = { pr => $pr, pfx => $pfx, nr => 0 } if $pr; do { eval { # update the only metadata key for v1: my $lc = $src->get_metadata('last_commit'); $dst->set_metadata('last_commit', $lc) if $lc; # only the first xapian shard (0) gets 'indexlevel' if ($new =~ m!(?:xapian[0-9]+|xap[0-9]+/0)\b!) { my $l = $src->get_metadata('indexlevel'); if ($l eq 'medium') { $dst->set_metadata('indexlevel', $l); } } if ($pr_data) { my $tot = $src->get_doccount; # we can only estimate when resharding, # because removed spam causes slight imbalance my $est = ''; if (defined $cur_shard && $reshard > 1) { $tot = int($tot/$reshard); $est = 'around '; } my $fmt = "$pfx % ".length($tot)."u/$tot\n"; $pr->("$pfx copying $est$tot documents\n"); $pr_data->{fmt} = $fmt; $pr_data->{total} = $tot; } }; } while (cpdb_retryable($src, $pfx)); if (defined $reshard) { # we rely on document IDs matching NNTP article number, # so we can't have the Xapian sharding DB support rewriting # document IDs. Thus we iterate through each shard # individually. $src = undef; foreach (@$old) { my $old = $XapianDatabase->new($_); cpdb_loop($old, $dst, $pr_data, $cur_shard, $reshard); } } else { cpdb_loop($src, $dst, $pr_data); } $pr->(sprintf($pr_data->{fmt}, $pr_data->{nr})) if $pr; return unless $opt->{compact}; $src = $dst = undef; # flushes and closes # this is probably the best place to do xapian-compact # since $dst isn't readable by HTTP or NNTP clients, yet: compact([ $tmp, $new ], $opt); remove_tree($tmp) or die "failed to remove $tmp: $!\n"; } 1; public-inbox-1.6.1/sa_config/000077500000000000000000000000001377346120300160505ustar00rootroot00000000000000public-inbox-1.6.1/sa_config/Makefile000066400000000000000000000007371377346120300175170ustar00rootroot00000000000000INSTALL = install all:: @cat README ROOT_FILES = etc/spamassassin/public-inbox.pre install-root: @mkdir -p /etc/spamassassin for f in $(ROOT_FILES); do $(INSTALL) -m 0644 root/$$f /$$f; done diff-root: for f in $(ROOT_FILES); do diff -u root/$$f /$$f; done USER_FILES = .spamassassin/user_prefs install-user: @mkdir -p ~/.spamassassin/ for f in $(USER_FILES); do $(INSTALL) -m 0644 user/$$f ~/$$f; done diff-user: for f in $(USER_FILES); do diff -u user/$$f ~/$$f; done public-inbox-1.6.1/sa_config/README000066400000000000000000000012721377346120300167320ustar00rootroot00000000000000SpamAssassin configs for public-inbox.org ----------------------------------------- root/ - files for system-wide use (plugins, rule definitions, new rules should have a zero score which should be overridden) user/ - per-user config (keep as much in here as possible) These files go into the users home directory All files in these example directory are CC0: To the extent possible under law, Eric Wong has waived all copyright and related or neighboring rights to these examples. Make targets ------------ install-root - install system-wide files (run as root) install-user - install user-specific files (run as the user public-inbox runs as) public-inbox-1.6.1/sa_config/root/000077500000000000000000000000001377346120300170335ustar00rootroot00000000000000public-inbox-1.6.1/sa_config/root/etc/000077500000000000000000000000001377346120300176065ustar00rootroot00000000000000public-inbox-1.6.1/sa_config/root/etc/spamassassin/000077500000000000000000000000001377346120300223135ustar00rootroot00000000000000public-inbox-1.6.1/sa_config/root/etc/spamassassin/public-inbox.pre000066400000000000000000000006631377346120300254230ustar00rootroot00000000000000# public-inbox.org uses the Debian spamd installation + init and sets # CRON=1 in /etc/default/spamassassin for automatic rule updates # compile rules to C, sa-compile(1) must be run as the appropriate user # (debian-spamd on Debian). sa-compile(1) will also be run by the cronjob loadplugin Mail::SpamAssassin::Plugin::Rule2XSBody # for ok_languages in user_prefs loadplugin Mail::SpamAssassin::Plugin::TextCat allow_user_rules 1 public-inbox-1.6.1/sa_config/user/000077500000000000000000000000001377346120300170265ustar00rootroot00000000000000public-inbox-1.6.1/sa_config/user/.spamassassin/000077500000000000000000000000001377346120300216115ustar00rootroot00000000000000public-inbox-1.6.1/sa_config/user/.spamassassin/user_prefs000066400000000000000000000070601377346120300237140ustar00rootroot00000000000000# raise or lower as needed required_score 3.0 # do not mess with the original message body, only notify in headers clear_report_template clear_unsafe_report_template report_safe 0 # we do not use nor support this on NFS lock_method flock # do not throw off Bayes bayes_ignore_header X-Bogosity bayes_ignore_header X-Spam-Flag bayes_ignore_header X-Spam-Status bayes_ignore_header X-Spam-Report # English-only for all lists on public-inbox.org ok_locales en # we have "loadplugin Mail::SpamAssassin::Plugin::TextCat" in a *.pre file ok_languages en # uncomment the following for importing archives: # dns_available no # skip_rbl_checks 1 # skip_uribl_checks 1 # manual rules (some stolen from debian) # these require "allow_user_rules 1" in the system-wide config rawbody LOCAL_VIEWINBROWSER /View this message in a browser/ describe LOCAL_VIEWINBROWSER "HTML mail not welcome" score LOCAL_VIEWINBROWSER 2.8 body MEETUPSECURELY /meetupsecurely\.com/i describe MEETUPSECURELY "site used by spammers" score MEETUPSECURELY 2.8 body HELLOMYDEAR /hello my dear/i describe HELLOMYDEAR "spam phrase" score HELLOMYDEAR 2.8 body JUSTAMAILBOX /I'm just a mailbox used for sending notifications/ describe JUSTAMAILBOX "autoreply phrase" score JUSTAMAILBOX 5.0 # hello foo header PI_HELLO subject =~ /^hello \w+/i describe PI_HELLO "Hello foo" score PI_HELLO 3 # no delivery header PI_DNOT subject =~ /delivery\s+(?:status\s+)?notification/i describe PI_DNOT "delivery notification" score PI_DNOT 3 # no delivery header PI_PARCEL subject =~ /could not deliver your parcel/ describe PI_PARCEL "delivery notification" score PI_PARCEL 3 # notice to appear header PI_DNOTICE subject =~ /notice to appear/i describe PI_DNOTICE "notice to appear" score PI_DNOTICE 3 full ZIPFILE /\b(?:file)?name\=.*\.zip\b/i describe ZIPFILE zipfile attachment score ZIPFILE 1.5 header PI_NUM_WORD_SUBJ subject =~ /^\d+ [a-z0-9_-]+$/ describe PI_NUM_WORD_SUBJ "number and single word subject" score PI_NUM_WORD_SUBJ 0.5 header PI_NUM_ONLY_SUBJ subject =~ /^\d+$/ describe PI_NUM_ONLY_SUBJ "number only subject" score PI_NUM_ONLY_SUBJ 0.5 header PI_IMPORTANCE_HIGH importance =~ /^high$/ describe PI_IMPORTANCE_HIGH "importance: high header" score PI_IMPORTANCE_HIGH 0.5 meta PI_ZIPFILE_NUM_WORD_SUBJ PI_NUM_WORD_SUBJ && ZIPFILE describe PI_ZIPFILE_NUM_WORD_SUBJ "common spam/virus pattern" score PI_ZIPFILE_NUM_WORD_SUBJ 3 meta PI_ZIPFILE_NUM_ONLY_SUBJ PI_NUM_ONLY_SUBJ && ZIPFILE describe PI_ZIPFILE_NUM_ONLY_SUBJ "common spam/virus pattern" score PI_ZIPFILE_NUM_ONLY_SUBJ 3 full DOCFILE /\b(?:file)?name\=.*\.doc\b/i describe DOCFILE doc attachment score DOCFILE 3 score BAYES_999 3 score BAYES_05 -1.5 score BAYES_00 -3 # trust paid whitelist services? never score RCVD_IN_RP_SAFE 0.0 score RCVD_IN_RP_CERTIFIED 0.0 # this depends on PublicInbox::SaPlugin::ListMirror: # header LIST_MIRROR_RECEIVED eval:check_list_mirror_received() # describe LIST_MIRROR_RECEIVED Received does not match expected # score LIST_MIRROR_RECEIVED 10 # # this depends on PublicInbox::SaPlugin::ListMirror: # header LIST_MIRROR_BCC eval:check_list_mirror_bcc() # describe LIST_MIRROR_BCC Mailing list was Bcc-ed # score LIST_MIRROR_BCC 10 # list_mirror X-Mailing-List git@vger.kernel.org *.kernel.org git@vger.kernel.org # DNSWL scores is a bit too powerful by default (-5 adjustment!) # default score RCVD_IN_DNSWL_LOW 0 -0.7 0 -0.7 # default score RCVD_IN_DNSWL_MED 0 -2.3 0 -2.3 # default score RCVD_IN_DNSWL_HI 0 -5 0 -5 score RCVD_IN_DNSWL_MED 0 -0.1 0 -0.1 score RCVD_IN_DNSWL_HI 0 -0.2 0 -0.2 public-inbox-1.6.1/script/000077500000000000000000000000001377346120300154245ustar00rootroot00000000000000public-inbox-1.6.1/script/public-inbox-compact000077500000000000000000000022351377346120300213730ustar00rootroot00000000000000#!perl -w # Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $opt = { compact => 1, -coarse_lock => 1 }; my $help = <{help}) { print $help; exit 0 }; require PublicInbox::Admin; PublicInbox::Admin::require_or_die('-index'); PublicInbox::Admin::progress_prepare($opt); require PublicInbox::InboxWritable; require PublicInbox::Xapcmd; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); unless (@ibxs) { print STDERR $help; exit 1 } foreach (@ibxs) { my $ibx = PublicInbox::InboxWritable->new($_); PublicInbox::Xapcmd::run($ibx, 'compact', $opt); } public-inbox-1.6.1/script/public-inbox-convert000077500000000000000000000141431377346120300214260ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = < 1, # index defaults: quiet => -1, compact => 0, maxsize => undef, fsync => 1, reindex => 1, # we always reindex }; GetOptions($opt, qw(jobs|j=i index! help|h), # index options qw(verbose|v+ rethread compact|c+ fsync|sync! indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s sequential_shard|sequential-shard|seq-shard )) or die $help; if ($opt->{help}) { print $help; exit 0 }; my $old_dir = shift(@ARGV) // ''; my $new_dir = shift(@ARGV) // ''; die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq ''); die "$new_dir exists\n" if -d $new_dir; die "$old_dir not a directory\n" unless -d $old_dir; require Cwd; Cwd->import('abs_path'); require PublicInbox::Config; require PublicInbox::InboxWritable; my $abs = abs_path($old_dir); die "failed to resolve $old_dir: $!\n" if (!defined($abs)); my $cfg = PublicInbox::Config->new; my $old; $cfg->each_inbox(sub { $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir; }); if ($old) { $old = PublicInbox::InboxWritable->new($old); } else { warn "W: $old_dir not configured in " . PublicInbox::Config::default_file() . "\n"; $old = PublicInbox::InboxWritable->new({ inboxdir => $old_dir, name => 'ignored', -primary_address => 'old@example.com', address => [ 'old@example.com' ], }); } die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2; require File::Spec; require PublicInbox::Admin; my $detected = PublicInbox::Admin::detect_indexlevel($old); $old->{indexlevel} //= $detected; my $env; if ($opt->{'index'}) { my $mods = {}; PublicInbox::Admin::scan_ibx_modules($mods, $old); PublicInbox::Admin::require_or_die(keys %$mods); PublicInbox::Admin::progress_prepare($opt); $env = PublicInbox::Admin::index_prepare($opt, $cfg); } local %ENV = (%$env, %ENV) if $env; my $new = { %$old }; $new->{inboxdir} = File::Spec->canonpath($new_dir); $new->{version} = 2; $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); $new->{-no_fsync} = 1 if !$opt->{fsync}; my $v2w; $old->umask_prepare; sub link_or_copy ($$) { my ($src, $dst) = @_; link($src, $dst) and return; $!{EXDEV} or warn "link $src, $dst failed: $!, trying cp\n"; require File::Copy; # preserves permissions: File::Copy::cp($src, $dst) or die "cp $src, $dst failed: $!\n"; } $old->with_umask(sub { my $old_cfg = "$old->{inboxdir}/config"; local $ENV{GIT_CONFIG} = $old_cfg; my $new_cfg = "$new->{inboxdir}/all.git/config"; $v2w = $new->importer(1); $v2w->init_inbox(delete $opt->{jobs}); unlink $new_cfg; link_or_copy($old_cfg, $new_cfg); if (my $alt = $new->{altid}) { require PublicInbox::AltId; foreach my $i (0..$#$alt) { my $src = PublicInbox::AltId->new($old, $alt->[$i], 0); $src = $src->mm_alt or next; $src = $src->{dbh}->sqlite_db_filename; my $dst = PublicInbox::AltId->new($new, $alt->[$i], 1); $dst->mm_alt->{dbh}->sqlite_backup_from_file($src); } } my $desc = "$old->{inboxdir}/description"; link_or_copy($desc, "$new->{inboxdir}/description") if -e $desc; my $clone = "$old->{inboxdir}/cloneurl"; if (-e $clone) { warn <<""; $clone may not be valid after migrating to v2, not copying } }); my $state = ''; my $head = $old->{ref_head} || 'HEAD'; my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head); $v2w->idx_init($opt); my $im = $v2w->importer; my ($r, $w) = $im->gfi_start; my $h = '[0-9a-f]'; my %D; my $last; while (<$rd>) { if ($_ eq "blob\n") { $state = 'blob'; } elsif (/^commit /) { $state = 'commit'; } elsif (/^data ([0-9]+)/) { my $len = $1; print $w $_ or $im->wfail; while ($len) { my $n = read($rd, my $tmp, $len) or die "read: $!"; warn "$n != $len\n" if $n != $len; $len -= $n; print $w $tmp or $im->wfail; } next; } elsif ($state eq 'commit') { if (m{^M 100644 :([0-9]+) (${h}{2}/${h}{38})}o) { my ($mark, $path) = ($1, $2); $D{$path} = $mark; if ($last && $last ne 'm') { print $w "D $last\n" or $im->wfail; } print $w "M 100644 :$mark m\n" or $im->wfail; $last = 'm'; next; } if (m{^D (${h}{2}/${h}{38})}o) { my $mark = delete $D{$1}; defined $mark or die "undeleted path: $1\n"; if ($last && $last ne 'd') { print $w "D $last\n" or $im->wfail; } print $w "M 100644 :$mark d\n" or $im->wfail; $last = 'd'; next; } } last if $_ eq "done\n"; print $w $_ or $im->wfail; } close $rd or die "close fast-export: $!\n"; waitpid($pid, 0) or die "waitpid failed: $!\n"; $? == 0 or die "fast-export failed: $?\n"; $r = $w = undef; # v2w->done does the actual close and error checking $v2w->done; if (my $old_mm = $old->mm) { $old->cleanup; $old_mm = $old_mm->{dbh}->sqlite_db_filename; # we want to trigger a reindex, not a from scratch index if # we're reusing the msgmap from an existing v1 installation. $v2w->idx_init($opt); $v2w->{mm}->{dbh}->sqlite_backup_from_file($old_mm); my $epoch0 = PublicInbox::Git->new($v2w->git_init(0)); chop(my $cmt = $epoch0->qx(qw(rev-parse --verify), $head)); $v2w->last_epoch_commit(0, $cmt); } $v2w->index_sync($opt) if delete $opt->{'index'}; $v2w->done; public-inbox-1.6.1/script/public-inbox-edit000077500000000000000000000165231377346120300206770ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # # Used for editing messages in a public-inbox. # Supports v2 inboxes only, for now. use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use PublicInbox::AdminEdit; use File::Temp 0.19 (); # 0.19 for TMPDIR use PublicInbox::ContentHash qw(content_hash); use PublicInbox::MID qw(mid_clean mids); PublicInbox::Admin::check_require('-index'); use PublicInbox::Eml; use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::Import; my $help = <<'EOF'; usage: public-inbox-edit -m MESSAGE-ID [--all] [INBOX_DIRS] destructively edit messages in a public inbox options: --all edit all configured inboxes -m MESSAGE-ID edit the message with a given Message-ID -F FILE edit the message matching the contents of FILE --force forcibly edit even if Message-ID is ambiguous --raw do not perform "From " line escaping See public-inbox-edit(1) man page for full documentation. EOF my $opt = { verbose => 1, all => 0, -min_inbox_version => 2, raw => 0 }; my @opt = qw(mid|m=s file|F=s raw); GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or die $help; if ($opt->{help}) { print $help; exit 0 }; my $cfg = PublicInbox::Config->new; my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f" unless (defined $editor) { my $k = 'publicinbox.mailEditor'; $editor = $cfg->{lc($k)} if $cfg; unless (defined $editor) { warn "\`$k' not configured, trying \`git var GIT_EDITOR'\n"; chomp($editor = `git var GIT_EDITOR`); warn "Will use $editor to edit mail\n"; } } my $mid = $opt->{mid}; my $file = $opt->{file}; if (defined $mid && defined $file) { die "the --mid and --file options are mutually exclusive\n"; } my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::AdminEdit::check_editable(\@ibxs); my $found = {}; # chash => [ [ibx, smsg] [, [ibx, smsg] ] ] sub find_mid ($$$) { my ($found, $mid, $ibxs) = @_; foreach my $ibx (@$ibxs) { my $over = $ibx->over; my ($id, $prev); while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) { my $ref = $ibx->msg_by_smsg($smsg); my $mime = PublicInbox::Eml->new($ref); my $chash = content_hash($mime); my $tuple = [ $ibx, $smsg ]; push @{$found->{$chash} ||= []}, $tuple } PublicInbox::InboxWritable::cleanup($ibx); } $found; } sub show_cmd ($$) { my ($ibx, $smsg) = @_; " GIT_DIR=$ibx->{inboxdir}/all.git \\\n git show $smsg->{blob}\n"; } sub show_found ($) { my ($found) = @_; foreach my $to_edit (values %$found) { foreach my $tuple (@$to_edit) { my ($ibx, $smsg) = @$tuple; warn show_cmd($ibx, $smsg); } } } if (defined($mid)) { $mid = mid_clean($mid); find_mid($found, $mid, \@ibxs); my $nr = scalar(keys %$found); die "No message found for <$mid>\n" unless $nr; if ($nr > 1) { warn <<""; Multiple messages with different content found matching <$mid>: show_found($found); die "Use --force to edit all of them\n" if !$opt->{force}; warn "Will edit all of them\n"; } } else { my $eml = eml_from_path($file) or die "open($file) failed: $!"; my $mids = mids($eml); find_mid($found, $_, \@ibxs) for (@$mids); # populates $found my $chash = content_hash($eml); my $to_edit = $found->{$chash}; unless ($to_edit) { my $nr = scalar(keys %$found); if ($nr > 0) { warn <<""; $nr matches to Message-ID(s) in $file, but none matched content Partial matches below: show_found($found); } elsif ($nr == 0) { $mids = join('', map { " <$_>\n" } @$mids); warn <<""; No matching messages found matching Message-ID(s) in $file $mids } exit 1; } $found = { $chash => $to_edit }; } my %tmpopt = ( TEMPLATE => 'public-inbox-edit-XXXXXX', TMPDIR => 1, SUFFIX => $opt->{raw} ? '.eml' : '.mbox', ); foreach my $to_edit (values %$found) { my $edit_fh = File::Temp->new(%tmpopt); $edit_fh->autoflush(1); my $edit_fn = $edit_fh->filename; my ($ibx, $smsg) = @{$to_edit->[0]}; my $old_raw = $ibx->msg_by_smsg($smsg); PublicInbox::InboxWritable::cleanup($ibx); my $tmp = $$old_raw; if (!$opt->{raw}) { my $oid = $smsg->{blob}; print $edit_fh "From mboxrd\@$oid Thu Jan 1 00:00:00 1970\n" or die "failed to write From_ line: $!"; $tmp =~ s/^(>*From )/>$1/gm; } print $edit_fh $tmp or die "failed to write tempfile for editing: $!"; # run the editor, respecting spaces/quote retry_edit: if (system(qw(sh -c), $editor.' "$@"', $editor, $edit_fn)) { if (!(-t STDIN) && !$opt->{force}) { die "E: $editor failed: $?\n"; } print STDERR "$editor failed, "; print STDERR "continuing as forced\n" if $opt->{force}; while (!$opt->{force}) { print STDERR "(r)etry, (c)ontinue, (q)uit?\n"; chomp(my $op = || ''); $op = lc($op); goto retry_edit if $op eq 'r'; if ($op eq 'q') { # n.b. we'll lose the exit signal, here, # oh well; "q" is user-specified anyways. exit($? >> 8); } last if $op eq 'c'; # continuing print STDERR "\`$op' not recognized\n"; } } # reread the edited file, not using $edit_fh since $EDITOR may # rename/relink $edit_fn open my $new_fh, '<', $edit_fn or die "can't read edited file ($edit_fn): $!\n"; my $new_raw = do { local $/; <$new_fh> }; if (!$opt->{raw}) { # get rid of the From we added $new_raw =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; # check if user forgot to purge (in mutt) after editing if ($new_raw =~ /^From /sm) { if (-t STDIN) { print STDERR <<''; Extra "From " lines detected in new mbox. Did you forget to purge the original message from the mbox after editing? while (1) { print STDERR <<""; (y)es to re-edit, (n)o to continue chomp(my $op = || ''); $op = lc($op); goto retry_edit if $op eq 'y'; last if $op eq 'n'; # continuing print STDERR "\`$op' not recognized\n"; } } else { # non-interactive path # unlikely to happen, as extra From lines are # only a common mistake (for me) with # interactive use warn <<""; W: possible message boundary splitting error } } # unescape what we escaped: $new_raw =~ s/^>(>*From )/$1/gm; } my $new_mime = PublicInbox::Eml->new(\$new_raw); my $old_mime = PublicInbox::Eml->new($old_raw); # make sure we don't compare unwanted headers, since mutt adds # Content-Length, Status, and Lines headers: PublicInbox::Import::drop_unwanted_headers($new_mime); PublicInbox::Import::drop_unwanted_headers($old_mime); # allow changing Received: and maybe other headers which can # contain sensitive info. my $nhdr = $new_mime->header_obj->as_string; my $ohdr = $old_mime->header_obj->as_string; if (($nhdr eq $ohdr) && (content_hash($new_mime) eq content_hash($old_mime))) { warn "No change detected to:\n", show_cmd($ibx, $smsg); next unless $opt->{verbose}; # should we consider this machine-parseable? PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, []); next; } foreach my $tuple (@$to_edit) { $ibx = PublicInbox::InboxWritable->new($tuple->[0]); $smsg = $tuple->[1]; my $im = $ibx->importer(0); my $commits = $im->replace($old_mime, $new_mime); $im->done; unless ($commits) { warn "Failed to replace:\n", show_cmd($ibx, $smsg); next; } next unless $opt->{verbose}; # should we consider this machine-parseable? PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits); } } public-inbox-1.6.1/script/public-inbox-httpd000077500000000000000000000022631377346120300210710ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # # Standalone HTTP server for public-inbox. use strict; use PublicInbox::Daemon; BEGIN { for (qw(Plack::Builder Plack::Util)) { eval("require $_") or die "E: Plack is required for $0\n"; } Plack::Builder->import; require PublicInbox::HTTP; require PublicInbox::HTTPD; } my %httpds; my $app; my $refresh = sub { if (@ARGV) { eval { $app = Plack::Util::load_psgi(@ARGV) }; if ($@) { die $@, "$0 runs in /, command-line paths must be absolute\n"; } } else { require PublicInbox::WWW; my $www = PublicInbox::WWW->new; $www->preload; $app = builder { eval { enable 'ReverseProxy' }; $@ and warn "Plack::Middleware::ReverseProxy missing,\n", "URL generation for redirects may be wrong if behind a reverse proxy\n"; enable 'Head'; sub { $www->call(@_) }; }; } }; PublicInbox::Daemon::run('0.0.0.0:8080', $refresh, sub ($$$) { # post_accept my ($client, $addr, $srv) = @_; my $fd = fileno($srv); my $h = $httpds{$fd} ||= PublicInbox::HTTPD->new($srv, $app); PublicInbox::HTTP->new($client, $addr, $h), }); public-inbox-1.6.1/script/public-inbox-imapd000077500000000000000000000007771377346120300210500ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ # # Standalone read-only IMAP server for public-inbox. use strict; use PublicInbox::Daemon; use PublicInbox::IMAPdeflate; # loads PublicInbox::IMAP use PublicInbox::IMAPD; my $imapd = PublicInbox::IMAPD->new; PublicInbox::Daemon::run('0.0.0.0:143', sub { $imapd->refresh_groups(@_) }, # refresh sub ($$$) { PublicInbox::IMAP->new($_[0], $imapd) }, # post_accept $imapd); public-inbox-1.6.1/script/public-inbox-index000077500000000000000000000075051377346120300210610ustar00rootroot00000000000000#!perl -w # Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # Basic tool to create a Xapian search index for a public-inbox. # Usage with libeatmydata # highly recommended: eatmydata public-inbox-index INBOX_DIR use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = < -1, compact => 0, max_size => undef, fsync => 1 }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune fsync|sync! xapian_only|xapian-only indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s sequential_shard|seq-shard|sequential-shard skip-docdata all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; if ($opt->{xapian_only} && !$opt->{reindex}) { die "--xapian-only requires --reindex\n"; } # require lazily to speed up --help require PublicInbox::Admin; PublicInbox::Admin::require_or_die('-index'); my $cfg = PublicInbox::Config->new; # Config is loaded by Admin my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die('-index'); unless (@ibxs) { print STDERR $help; exit 1 } my $mods = {}; foreach my $ibx (@ibxs) { # detect_indexlevel may also set $ibx->{-skip_docdata} my $detected = PublicInbox::Admin::detect_indexlevel($ibx); # XXX: users can shoot themselves in the foot, with opt->{indexlevel} $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ? 'full' : $detected); PublicInbox::Admin::scan_ibx_modules($mods, $ibx); } # "Search::Xapian" includes SWIG "Xapian", too: $opt->{compact} = 0 if !$mods->{'Search::Xapian'}; PublicInbox::Admin::require_or_die(keys %$mods); my $env = PublicInbox::Admin::index_prepare($opt, $cfg); local %ENV = (%ENV, %$env) if $env; require PublicInbox::InboxWritable; PublicInbox::Xapcmd::check_compact() if $opt->{compact}; PublicInbox::Admin::progress_prepare($opt); for my $ibx (@ibxs) { $ibx = PublicInbox::InboxWritable->new($ibx); if ($opt->{compact} >= 2) { PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt}); } $ibx->{-no_fsync} = 1 if !$opt->{fsync}; $ibx->{-skip_docdata} //= $opt->{'skip-docdata'}; my $ibx_opt = $opt; if (defined(my $s = $ibx->{lc('indexSequentialShard')})) { defined(my $v = $cfg->git_bool($s)) or die <{name}.indexSequentialShard not boolean EOL $ibx_opt = { %$opt, sequential_shard => $v }; } PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); if (my $copt = $opt->{compact_opt}) { local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard}; PublicInbox::Xapcmd::run($ibx, 'compact', $copt); } } public-inbox-1.6.1/script/public-inbox-init000077500000000000000000000153661377346120300207210ustar00rootroot00000000000000#!perl -w # Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; use Fcntl qw(:DEFAULT); my $help = < \$version, 'L|index-level|indexlevel=s' => \$indexlevel, 'S|skip|skip-epoch=i' => \$skip_epoch, 'skip-artnum=i' => \$skip_artnum, 'j|jobs=i' => \$jobs, 'ng|newsgroup=s' => \$ng, 'skip-docdata' => \$skip_docdata, 'help|h' => \$show_help, ); my $usage_cb = sub { print STDERR $help; exit 1; }; GetOptions(%opts) or $usage_cb->(); if ($show_help) { print $help; exit 0 }; PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel; my $name = shift @ARGV or $usage_cb->(); my $inboxdir = shift @ARGV or $usage_cb->(); my $http_url = shift @ARGV or $usage_cb->(); my (@address) = @ARGV; @address or $usage_cb->(); $ng =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! and die "--newsgroup `$ng' is not valid\n"; ($ng =~ m!\A\.! || $ng =~ m!\.\z!) and die "--newsgroup `$ng' must not start or end with `.'\n"; require PublicInbox::Config; my $pi_config = PublicInbox::Config->default_file; require File::Basename; my $dir = File::Basename::dirname($pi_config); require File::Path; File::Path::mkpath($dir); # will croak on fatal errors # first, we grab a flock to prevent simultaneous public-inbox-init # processes from trampling over each other, or exiting with 255 on # O_EXCL failure below. This gets unlocked automatically on exit: require PublicInbox::Lock; my $lock_obj = { lock_path => "$pi_config.flock" }; PublicInbox::Lock::lock_acquire($lock_obj); # git-config will operate on this (and rename on success): require File::Temp; my $fh = File::Temp->new(TEMPLATE => 'pi-init-XXXXXXXX', DIR => $dir); # Now, we grab another lock to use git-config(1) locking, so it won't # wait on the lock, unlike some of our internal flock()-based locks. # This is to prevent direct git-config(1) usage from clobbering our # changes. my $lockfile = "$pi_config.lock"; my $lockfh; sysopen($lockfh, $lockfile, O_RDWR|O_CREAT|O_EXCL) or do { warn "could not open config file: $lockfile: $!\n"; exit(255); }; my $auto_unlink = UnlinkMe->new($lockfile); my ($perm, %seen); if (-e $pi_config) { open(my $oh, '<', $pi_config) or die "unable to read $pi_config: $!\n"; my @st = stat($oh); $perm = $st[2]; defined $perm or die "(f)stat failed on $pi_config: $!\n"; chmod($perm & 07777, $fh) or die "(f)chmod failed on future $pi_config: $!\n"; my $old; { local $/; $old = <$oh>; } print $fh $old or die "failed to write: $!\n"; close $oh or die "failed to close $pi_config: $!\n"; # yes, this conflict checking is racy if multiple instances of this # script are run by the same $PI_DIR my $cfg = PublicInbox::Config->new; my $conflict; foreach my $addr (@address) { my $found = $cfg->lookup($addr); if ($found) { if ($found->{name} ne $name) { print STDERR "`$addr' already defined for ", "`$found->{name}',\n", "does not match intend `$name'\n"; $conflict = 1; } else { $seen{lc($addr)} = 1; } } } exit(1) if $conflict; my $ibx = $cfg->lookup_name($name); $indexlevel //= $ibx->{indexlevel} if $ibx; } my $pi_config_tmp = $fh->filename; close($fh) or die "failed to close $pi_config_tmp: $!\n"; my $pfx = "publicinbox.$name"; my @x = (qw/git config/, "--file=$pi_config_tmp"); require File::Spec; $inboxdir = File::Spec->canonpath($inboxdir); die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; if (-f "$inboxdir/inbox.lock") { if (!defined $version) { $version = 2; } elsif ($version != 2) { die "$inboxdir is a -V2 inbox, -V$version specified\n" } } elsif (-d "$inboxdir/objects") { if (!defined $version) { $version = 1; } elsif ($version != 1) { die "$inboxdir is a -V1 inbox, -V$version specified\n" } } $version = 1 unless defined $version; if ($version == 1 && defined $skip_epoch) { die "--skip-epoch is only supported for -V2 inboxes\n"; } my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => $name, version => $version, -primary_address => $address[0], indexlevel => $indexlevel, }); my $creat_opt = {}; if (defined $jobs) { die "--jobs is only supported for -V2 inboxes\n" if $version == 1; die "--jobs=$jobs must be >= 1\n" if $jobs <= 0; $creat_opt->{nproc} = $jobs; } require PublicInbox::InboxWritable; $ibx = PublicInbox::InboxWritable->new($ibx, $creat_opt); if ($skip_docdata) { $ibx->{indexlevel} //= 'full'; # ensure init_inbox writes xdb $ibx->{indexlevel} eq 'basic' and die "--skip-docdata ignored with --indexlevel=basic\n"; $ibx->{-skip_docdata} = $skip_docdata; } $ibx->init_inbox(0, $skip_epoch, $skip_artnum); require Cwd; my $tmp = Cwd::abs_path($inboxdir); defined($tmp) or die "failed to resolve $inboxdir: $!\n"; $inboxdir = $tmp; die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; # needed for git prior to v2.1.0 umask(0077) if defined $perm; foreach my $addr (@address) { next if $seen{lc($addr)}; PublicInbox::Import::run_die([@x, "--add", "$pfx.address", $addr]); } PublicInbox::Import::run_die([@x, "$pfx.url", $http_url]); PublicInbox::Import::run_die([@x, "$pfx.inboxdir", $inboxdir]); if (defined($indexlevel)) { PublicInbox::Import::run_die([@x, "$pfx.indexlevel", $indexlevel]); } PublicInbox::Import::run_die([@x, "$pfx.newsgroup", $ng]) if $ng ne ''; # needed for git prior to v2.1.0 if (defined $perm) { chmod($perm & 07777, $pi_config_tmp) or die "(f)chmod failed on future $pi_config: $!\n"; } rename $pi_config_tmp, $pi_config or die "failed to rename `$pi_config_tmp' to `$pi_config': $!\n"; $auto_unlink->DESTROY; package UnlinkMe; use strict; sub new { my ($klass, $file) = @_; bless { file => $file }, $klass; } sub DESTROY { my $f = delete($_[0]->{file}); unlink($f) if defined($f); } 1; public-inbox-1.6.1/script/public-inbox-learn000077500000000000000000000067001377346120300210470ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ # # Used for training spam (via SpamAssassin) and removing messages from a # public-inbox my $help = < 0); GetOptions(\%opt, qw(all help|h)) or die $help; my $train = shift or die $help; if ($train !~ /\A(?:ham|spam|rm)\z/) { die "`$train' not recognized.\n$help"; } die "--all only works with `rm'\n" if $opt{all} && $train ne 'rm'; my $spamc = PublicInbox::Spamcheck::Spamc->new; my $pi_config = PublicInbox::Config->new; my $err; my $mime = PublicInbox::Eml->new(do{ local $/; my $data = ; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; if ($train ne 'rm') { eval { if ($train eq 'ham') { $spamc->hamlearn(\$data); } elsif ($train eq 'spam') { $spamc->spamlearn(\$data); } die "spamc failed with: $?\n" if $?; }; $err = $@; } \$data }); sub remove_or_add ($$$$) { my ($ibx, $train, $mime, $addr) = @_; # We do not touch GIT_COMMITTER_* env here so we can track # who trained the message. $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name}; $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr; $ibx = PublicInbox::InboxWritable->new($ibx); my $im = $ibx->importer(0); if ($train eq "rm") { # This needs to be idempotent, as my inotify trainer # may train for each cross-posted message, and this # script already learns for every list in # ~/.public-inbox/config $im->remove($mime, $train); } elsif ($train eq "ham") { # no checking for spam here, we assume the message has # been reviewed by a human at this point: PublicInbox::MDA->set_list_headers($mime, $ibx); # Ham messages are trained when they're marked into # a SEEN state, so this is idempotent: $im->add($mime); } $im->done; } # spam is removed from all known inboxes since it is often Bcc:-ed if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) { $pi_config->each_inbox(sub { my ($ibx) = @_; $ibx = PublicInbox::InboxWritable->new($ibx); my $im = $ibx->importer(0); $im->remove($mime, $train); $im->done; }); } else { require PublicInbox::MDA; # get all recipients my %dests; # address => for ($mime->header('Cc'), $mime->header('To')) { foreach my $addr (PublicInbox::Address::emails($_)) { $addr = lc($addr); $dests{$addr} //= $pi_config->lookup($addr) // 0; } } # n.b. message may be cross-posted to multiple public-inboxes my %seen; while (my ($addr, $ibx) = each %dests) { next unless ref($ibx); # $ibx may be 0 next if $seen{"$ibx"}++; remove_or_add($ibx, $train, $mime, $addr); } my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_config, $mime); for my $ibx (@$dests) { next if $seen{"$ibx"}++; remove_or_add($ibx, $train, $mime, $ibx->{-primary_address}); } } if ($err) { warn $err; exit 1; } public-inbox-1.6.1/script/public-inbox-mda000077500000000000000000000101051377346120300205010ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2013-2020 all contributors # License: AGPL-3.0+ # # Mail delivery agent for public-inbox, run from your MTA upon mail delivery my $help = < \$precheck, 'help|h' => \$show_help) or do { print STDERR $help; exit 1 }; my $do_exit = sub { my ($code) = shift; $emm = $ems = undef; # trigger DESTROY exit $code; }; use PublicInbox::Eml; use PublicInbox::MDA; use PublicInbox::Config; use PublicInbox::Emergency; use PublicInbox::Filter::Base; use PublicInbox::InboxWritable; use PublicInbox::Spamcheck; # n.b: hopefully we can setup the emergency path without bailing due to # user error, we really want to setup the emergency destination ASAP # in case there's bugs in our code or user error. my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/"; $ems = PublicInbox::Emergency->new($emergency); my $str = do { local $/; }; $str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; $ems->prepare(\$str); my $eml = PublicInbox::Eml->new(\$str); my $config = PublicInbox::Config->new; my $key = 'publicinboxmda.spamcheck'; my $default = 'PublicInbox::Spamcheck::Spamc'; my $spamc = PublicInbox::Spamcheck::get($config, $key, $default); my $dests = []; my $recipient = $ENV{ORIGINAL_RECIPIENT}; if (defined $recipient) { my $ibx = $config->lookup($recipient); # first check push @$dests, $ibx if $ibx; } if (!scalar(@$dests)) { $dests = PublicInbox::MDA->inboxes_for_list_id($config, $eml); if (!scalar(@$dests) && !defined($recipient)) { die "ORIGINAL_RECIPIENT not defined in ENV\n"; } scalar(@$dests) or $do_exit->(67); # EX_NOUSER 5.1.1 user unknown } my $err; @$dests = grep { my $ibx = PublicInbox::InboxWritable->new($_); eval { $ibx->assert_usable_dir }; if ($@) { warn $@; $err = 1; 0; # pre-check, MDA has stricter rules than an importer might; } elsif ($precheck) { !!PublicInbox::MDA->precheck($eml, $ibx->{address}); } else { 1; } } @$dests; $do_exit->(67) if $err && scalar(@$dests) == 0; $eml = undef; my $spam_ok; if ($spamc) { $str = ''; $spam_ok = $spamc->spamcheck($ems->fh, \$str); # update the emergency dump with the new message: $emm = PublicInbox::Emergency->new($emergency); $emm->prepare(\$str); $ems = $ems->abort; } else { # no spam checking configured: $spam_ok = 1; $emm = $ems; my $fh = $emm->fh; read($fh, $str, -s $fh); } $do_exit->(0) unless $spam_ok; # -mda defaults to the strict base filter which we won't use anywhere else sub mda_filter_adjust ($) { my ($ibx) = @_; my $fcfg = $ibx->{filter} || ''; if ($fcfg eq '') { $ibx->{filter} = 'PublicInbox::Filter::Base'; } elsif ($fcfg eq 'scrub') { # legacy alias, undocumented, remove? $ibx->{filter} = 'PublicInbox::Filter::Mirror'; } } my @rejects; for my $ibx (@$dests) { mda_filter_adjust($ibx); my $filter = $ibx->filter; my $mime = PublicInbox::Eml->new($str); my $ret = $filter->delivery($mime); if (ref($ret) && ($ret->isa('PublicInbox::Eml') || $ret->isa('Email::MIME'))) { # filter altered message $mime = $ret; } elsif ($ret == PublicInbox::Filter::Base::IGNORE) { next; # nothing, keep looping } elsif ($ret == PublicInbox::Filter::Base::REJECT) { push @rejects, $filter->err; next; } PublicInbox::MDA->set_list_headers($mime, $ibx); my $im = $ibx->importer(0); if (defined $im->add($mime)) { # ->abort is idempotent, no emergency if a single # destination succeeds $emm->abort; } else { # v1-only my $mid = $mime->header_raw('Message-ID'); # this message is similar to what ssoma-mda shows: print STDERR "CONFLICT: Message-ID: $mid exists\n"; } $im->done; } if (scalar(@rejects) && scalar(@rejects) == scalar(@$dests)) { $! = 65; # EX_DATAERR 5.6.0 data format error die join("\n", @rejects, ''); } $do_exit->(0); public-inbox-1.6.1/script/public-inbox-nntpd000077500000000000000000000010151377346120300210630ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # # Standalone NNTP server for public-inbox. use strict; use warnings; use PublicInbox::Daemon; use PublicInbox::NNTPdeflate; # loads PublicInbox::NNTP use PublicInbox::NNTPD; my $nntpd = PublicInbox::NNTPD->new; PublicInbox::Daemon::run('0.0.0.0:119', sub { $nntpd->refresh_groups }, # refresh sub ($$$) { PublicInbox::NNTP->new($_[0], $nntpd) }, # post_accept $nntpd); public-inbox-1.6.1/script/public-inbox-purge000077500000000000000000000035211377346120300210660ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # # Used for purging messages entirely from a public-inbox. Currently # supports v2 inboxes only, for now. use strict; use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use PublicInbox::AdminEdit; PublicInbox::Admin::check_require('-index'); use PublicInbox::Filter::Base qw(REJECT); use PublicInbox::Eml; require PublicInbox::V2Writable; my $help = < 1, all => 0, -min_inbox_version => 2 }; GetOptions($opt, @PublicInbox::AdminEdit::OPT) or die $help; if ($opt->{help}) { print $help; exit 0 }; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); PublicInbox::AdminEdit::check_editable(\@ibxs); my $data = do { local $/; }; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; my $n_purged = 0; foreach my $ibx (@ibxs) { my $mime = PublicInbox::Eml->new($data); my $v2w = PublicInbox::V2Writable->new($ibx, 0); my $commits = $v2w->purge($mime) || []; if (my $scrub = $ibx->filter($v2w)) { my $scrubbed = $scrub->scrub($mime, 1); if ($scrubbed && $scrubbed != REJECT()) { my $scrub_commits = $v2w->purge($scrubbed); push @$commits, @$scrub_commits if $scrub_commits; } } $v2w->done; if ($opt->{verbose}) { # should we consider this machine-parseable? PublicInbox::AdminEdit::show_rewrites(\*STDOUT, $ibx, $commits); } $n_purged += scalar @$commits; } # behave like "rm -f" exit(0) if ($opt->{force} || $n_purged); warn "Not found\n" if $opt->{verbose}; exit(1); public-inbox-1.6.1/script/public-inbox-watch000077500000000000000000000035131377346120300210530ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ my $help = <autoflush use PublicInbox::Watch; use PublicInbox::Config; use PublicInbox::DS; use PublicInbox::Sigfd; use PublicInbox::Syscall qw($SFD_NONBLOCK); my $do_scan = 1; GetOptions('scan!' => \$do_scan, # undocumented, testing only 'help|h' => \(my $show_help)) or do { print STDERR $help; exit 1 }; if ($show_help) { print $help; exit 0 }; my $oldset = PublicInbox::Sigfd::block_signals(); STDOUT->autoflush(1); STDERR->autoflush(1); local $0 = $0; # local since this script may be eval-ed my $watch = PublicInbox::Watch->new(PublicInbox::Config->new); my $reload = sub { my $prev = $watch or return; # SIGQUIT issued $watch->quit; $watch = PublicInbox::Watch->new(PublicInbox::Config->new); if ($watch) { warn("I: reloaded\n"); } else { warn("E: reloading failed\n"); $watch = $prev; } }; if ($watch) { my $scan = sub { return if !$watch; warn "I: scanning\n"; $watch->trigger_scan('full'); }; my $quit = sub { $watch->quit if $watch; $watch = undef; $0 .= ' quitting'; }; my $sig = { HUP => $reload, USR1 => $scan, CHLD => \&PublicInbox::DS::enqueue_reap, }; $sig->{QUIT} = $sig->{TERM} = $sig->{INT} = $quit; # --no-scan is only intended for testing atm, undocumented. PublicInbox::DS::requeue($scan) if $do_scan; my $sigfd = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK); local %SIG = (%SIG, %$sig) if !$sigfd; if (!$sigfd) { PublicInbox::Sigfd::sig_setmask($oldset); PublicInbox::DS->SetLoopTimeout(1000); } $watch->watch($sig, $oldset) while ($watch); } public-inbox-1.6.1/script/public-inbox-xcpdb000077500000000000000000000043301377346120300210430ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = < -1, compact => 0, fsync => 1 }; GetOptions($opt, qw( fsync|sync! compact|c reshard|R=i max_size|max-size=s batch_size|batch-size=s sequential_shard|seq-shard|sequential-shard jobs|j=i quiet|q verbose|v blocksize|b=s no-full|n fuller|F all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; use PublicInbox::Admin; PublicInbox::Admin::require_or_die('-search'); require PublicInbox::Config; my $cfg = PublicInbox::Config->new; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg) or die $help; my $idx_env = PublicInbox::Admin::index_prepare($opt, $cfg); # we only set XAPIAN_FLUSH_THRESHOLD for index, since cpdb doesn't # know sizes, only doccounts $opt->{-idx_env} = $idx_env; if ($opt->{sequential_shard} && ($opt->{jobs} // 1) > 1) { warn "W: --jobs=$opt->{jobs} ignored with --sequential-shard\n"; $opt->{jobs} = 0; } require PublicInbox::InboxWritable; require PublicInbox::Xapcmd; foreach (@ibxs) { my $ibx = PublicInbox::InboxWritable->new($_); # we rely on --no-renumber to keep docids synched for NNTP PublicInbox::Xapcmd::run($ibx, 'cpdb', $opt); } public-inbox-1.6.1/script/public-inbox.cgi000077500000000000000000000015071377346120300205110ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ or later # # Enables using PublicInbox::WWW as a CGI script use strict; BEGIN { for (qw(Plack::Builder Plack::Handler::CGI)) { eval("require $_") or die "E: Plack is required for $0\n"; } Plack::Builder->import; require PublicInbox::WWW; PublicInbox::WWW->preload if $ENV{MOD_PERL}; } my $www = PublicInbox::WWW->new; my $app = builder { # Enable to ensure redirects and Atom feed URLs are generated # properly when running behind a reverse proxy server which # sets the X-Forwarded-Proto request header. # See Plack::Middleware::ReverseProxy documentation for details # enable 'ReverseProxy'; enable 'Head'; sub { $www->call(@_) }; }; Plack::Handler::CGI->new->run($app); public-inbox-1.6.1/scripts/000077500000000000000000000000001377346120300156075ustar00rootroot00000000000000public-inbox-1.6.1/scripts/README000066400000000000000000000004061377346120300164670ustar00rootroot00000000000000This directory contains informal scripts and random tools used in the development of public-inbox. Some only exist only for historical purposes, and some may not work anymore. See the "script/" directory (not "scripts/") for supported and documented commands. public-inbox-1.6.1/scripts/dc-dlvr000077500000000000000000000033251377346120300170730ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2008-2020 all contributors # License: GPL-3.0+ # This is installed as /etc/dc-dcvr on my system # to use with postfix main.cf: mailbox_command = /etc/dc-dlvr "$EXTENSION" DELIVER=/usr/lib/dovecot/deliver CLAMDSCAN=clamdscan # change if your spamc/spamd listens elsewhere spamc='spamc' # allow plus addressing to train spam filters, $1 is the $EXTENSION # which may be "trainspam" or "trainham". Only allow spam training # when $CLIENT_ADDRESS is empty (local client) case $1,$CLIENT_ADDRESS in trainspam,) exec $spamc -L spam > /dev/null 2>&1 ;; trainham,) exec $spamc -L ham > /dev/null 2>&1 ;; esac TMPMSG=$(mktemp -t dc-dlvr.orig.$USER.XXXXXX || exit 1) CDMSG=$(mktemp -t dc-dlvr.orig.$USER.XXXXXX || exit 1) rm_list="$TMPMSG $CDMSG" cat >$CDMSG $CLAMDSCAN --quiet - <$CDMSG if test $? -eq 1 then $DELIVER -m INBOX.spam <$CDMSG exec rm -f $rm_list fi # pre-filter, for infrequently read lists which do their own spam filtering: if test -r ~/.dc-dlvr.pre then set -e mv -f $CDMSG $TMPMSG DEFAULT_INBOX=$(. ~/.dc-dlvr.pre) case $DEFAULT_INBOX in '') exec rm -f $rm_list ;; INBOX) ;; # do nothing *) $DELIVER -m $DEFAULT_INBOX <$TMPMSG exec rm -f $rm_list ;; esac PREMSG=$(mktemp -t dc-dlvr.orig.$USER.XXXXXX || exit 1) rm_list="$rm_list $PREMSG" set +e mv -f $TMPMSG $PREMSG $spamc -E --headers <$PREMSG >$TMPMSG else $spamc -E --headers <$CDMSG >$TMPMSG fi err=$? # normal delivery set -e case $err in 1) $DELIVER -m INBOX.spam <$TMPMSG ;; *) # users may override normal delivery and have it go elsewhere if test -r ~/.dc-dlvr.rc then . ~/.dc-dlvr.rc else $DELIVER -m INBOX <$TMPMSG fi ;; esac exec rm -f $rm_list public-inbox-1.6.1/scripts/dc-dlvr.pre000066400000000000000000000007351377346120300176570ustar00rootroot00000000000000# Copyright (C) 2014, Eric Wong # License: AGPL-3.0+ # sourced by /etc/dc-dlvr in ~$PI_USER/.dc-dlvr.rc, this just exits, # aborting /etc/dc-dlvr export PATH=/usr/local/bin:/usr/bin:/bin trap 'err=$?; set +e; test $err -eq 0 || rm -f $TMPMSG; exit $err' EXIT case $1,$CLIENT_ADDRESS in pispam,) exec public-inbox-learn spam <$TMPMSG ;; piham,) exec public-inbox-learn ham <$TMPMSG ;; esac exec public-inbox-mda <$TMPMSG public-inbox-1.6.1/scripts/dupe-finder000066400000000000000000000025521377346120300177400ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ # # ad-hoc tool for finding duplicates, unstable! use strict; use warnings; use PublicInbox::Inbox; use PublicInbox::Over; use PublicInbox::Search; use PublicInbox::Config; my $repo = shift; my $ibx; if (index($repo, '@') > 0) { $ibx = PublicInbox::Config->new->lookup($repo); } elsif (-d $repo) { $ibx = { inboxdir => $repo, address => 'unnamed@example.com' }; $ibx = PublicInbox::Inbox->new($ibx); } else { $ibx = PublicInbox::Config->new->lookup_name($repo); } $ibx or die "No inbox"; $ibx->search or die "search not available for inbox"; my $over = $ibx->over; my $dbh = $over->dbh; sub emit ($) { my ($nums) = @_; foreach my $n (@$nums) { my $smsg = $over->get_art($n) or next; print STDERR "$n $smsg->{blob} $smsg->{mid}\n"; my $msg = $ibx->msg_by_smsg($smsg) or next; print "From $smsg->{blob}\@$n Thu Jan 1 00:00:00 1970\n"; $$msg =~ s/^(>*From )/>$1/gm; print $$msg, "\n"; } } my $sth = $dbh->prepare(<<''); SELECT id,num FROM id2num WHERE num > 0 ORDER BY id $sth->execute; my $prev_id = -1; my ($id, $num, @nums); while (1) { ($id, $num) = $sth->fetchrow_array; defined $id or last; if ($prev_id != $id) { emit(\@nums) if scalar(@nums) > 1; @nums = (); } $prev_id = $id; push @nums, $num; } public-inbox-1.6.1/scripts/edit-sa-prefs000077500000000000000000000015741377346120300202070ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2014, Eric Wong # License: GPLv3 or later # edit and atomically update ~/.spamasassin/user_prefs safely set -e cd ~/.spamassassin cp user_prefs user_prefs.edit.$$ # don't care if we clobber old files # non-blocking lock if ! ln user_prefs.edit.$$ user_prefs.edit then rm user_prefs.edit.$$ echo >&2 "we are already editing user_prefs.edit" exit 1 fi rm user_prefs.edit.$$ ${VISUAL-vi} user_prefs.edit if diff -u user_prefs user_prefs.edit then rm -f user_prefs.edit echo 'no changes' exit 0 fi # check until we're good or $EDITOR fails while ! spamassassin -p user_prefs.edit --lint do echo >&2 "respawning editor, press Enter to continue" read ignored_var ${VISUAL-vi} user_prefs.edit done # atomically replace user_prefs mv user_prefs.edit user_prefs echo '~/.spamassassin/user_prefs updated' public-inbox-1.6.1/scripts/import_maildir000077500000000000000000000027611377346120300205560ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2014, Eric Wong and all contributors # License: AGPL-3.0+ # # Script to import a Maildir into a public-inbox =begin usage export GIT_DIR=/path/to/your/repo.git export GIT_AUTHOR_EMAIL='list@example.com' export GIT_AUTHOR_NAME='list name' ./import_maildir /path/to/maildir/ =cut use strict; use warnings; use Date::Parse qw/str2time/; use PublicInbox::Eml; use PublicInbox::Git; use PublicInbox::Import; sub usage { "Usage:\n".join('', grep(/\t/, `head -n 24 $0`)) } my $dir = shift @ARGV or die usage(); my $git_dir = `git rev-parse --git-dir`; chomp $git_dir; foreach my $sub (qw(cur new tmp)) { -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; } my @msgs; foreach my $sub (qw(cur new)) { foreach my $fn (glob("$dir/$sub/*")) { open my $fh, '<', $fn or next; my $s = PublicInbox::Eml->new(do { local $/; <$fh> }); my $date = $s->header('Date'); my $t = eval { str2time($date) }; defined $t or next; my @fn = split(m!/!, $fn); push @msgs, [ $t, "$sub/" . pop @fn, $date ]; } } my $git = PublicInbox::Git->new($git_dir); chomp(my $name = `git config user.name`); chomp(my $email = `git config user.email`); my $im = PublicInbox::Import->new($git, $name, $email); @msgs = sort { $b->[0] <=> $a->[0] } @msgs; while (my $ary = pop @msgs) { my $fn = "$dir/$ary->[1]"; open my $fh, '<', $fn or next; my $mime = PublicInbox::Eml->new(do { local $/; <$fh> }); $im->add($mime); } $im->done; 1; public-inbox-1.6.1/scripts/import_slrnspool000077500000000000000000000040341377346120300211630ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # # Incremental (or one-shot) importer of a slrnpull news spool =begin usage export ORIGINAL_RECIPIENT=address@example.com public-inbox-init $INBOX $GIT_DIR $HTTP_URL $ORIGINAL_RECIPIENT ./import_slrnspool SLRNPULL_ROOT/news/foo/bar =cut use strict; use warnings; use PublicInbox::Config; use PublicInbox::Eml; use PublicInbox::Import; use PublicInbox::Git; sub usage { "Usage:\n".join('',grep(/\t/, `head -n 10 $0`)) } my $exit = 0; my $sighandler = sub { $exit = 1 }; $SIG{INT} = $sighandler; $SIG{TERM} = $sighandler; my $spool = shift @ARGV or die usage(); my $recipient = $ENV{ORIGINAL_RECIPIENT}; defined $recipient or die usage(); my $config = PublicInbox::Config->new; my $ibx = $config->lookup($recipient); my $git = $ibx->git; my $im; if ($ibx->version == 2) { require PublicInbox::V2Writable; $im = PublicInbox::V2Writable->new($ibx); $im->{parallel} = 0; # pointless to be parallel for a single message } else { $im = PublicInbox::Import->new($git, $ibx->{name}, $ibx->{-primary_address}); } $ibx->{filter} ||= 'PublicInbox::Filter::Gmane'; my $filter = $ibx->filter; sub key { "publicinbox.$ibx->{name}.importslrnspoolstate"; } sub get_min { my $f = PublicInbox::Config->default_file; my $out = $git->qx('config', "--file=$f", key($ibx)); $out ||= 0; chomp $out; $out =~ /\A[0-9]+\z/ and return $out; 0; } sub set_min { my ($num) = @_; my $f = PublicInbox::Config->default_file; my @cmd = (qw/git config/, "--file=$f", key($ibx), $num); system(@cmd) == 0 or die join(' ', @cmd). " failed: $?\n"; } my $n = get_min(); my $ok; my $max_gap = 200000; my $max = $n + $max_gap; $spool =~ s!/+\z!!; for (; $exit == 0 && $n < $max; $n++) { my $fn = "$spool/$n"; open(my $fh, '<', $fn) or next; $max = $n + $max_gap; print STDERR $fn, "\n"; my $mime = PublicInbox::Eml->new(do { local $/; <$fh> }); $filter->scrub($mime); $im->add($mime); $ok = $n + 1; set_min($ok); } $im->done; public-inbox-1.6.1/scripts/import_vger_from_mbox000066400000000000000000000024101377346120300221340ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; use PublicInbox::InboxWritable; my $usage = "usage: $0 NAME EMAIL DIR \$dry_run, 'V|version=i' => \$version, 'F|format=s' => \$variant, 'filter=s' => \$filter, ); GetOptions(%opts) or die $usage; if ($variant ne 'mboxrd' && $variant ne 'mboxo') { die "Unsupported mbox variant: $variant\n"; } my $name = shift or die $usage; # git my $email = shift or die $usage; # git@vger.kernel.org my $inboxdir = shift or die $usage; # /path/to/v2/repo my $ibx = { inboxdir => $inboxdir, name => $name, version => $version, address => [ $email ], filter => $filter, }; $ibx = PublicInbox::Inbox->new($ibx); unless ($dry_run) { if ($version >= 2) { require PublicInbox::V2Writable; PublicInbox::V2Writable->new($ibx, 1)->init_inbox(0); } else { system(qw(git init --bare -q), $inboxdir) == 0 or die; } } $ibx = PublicInbox::InboxWritable->new($ibx); binmode STDIN; $ibx->import_mbox(\*STDIN, $variant); public-inbox-1.6.1/scripts/report-spam000077500000000000000000000032621377346120300200110ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2008-2014, Eric Wong # License: GPLv3 or later # Usage: report-spam /path/to/message/in/maildir # This is intended for use with incron or similar systems. # my incrontab(5) looks like this: # /path/to/maildir/.INBOX.good/cur IN_MOVED_TO /path/to/report-spam $@/$# # /path/to/maildir/.INBOX.spam/cur IN_MOVED_TO /path/to/report-spam $@/$# # gigantic emails tend not to be spam (but they suck anyways...) bytes=$(stat -c %s $1) if test $bytes -gt 512000 then exit fi # Only tested with the /usr/sbin/sendmail which ships with postfix # *** Why not call spamc directly in this script? *** # I route this through my MTA so it gets queued properly. # incrond has no concurrency limits and will fork a new process on # every single event, which sucks with rename storms when a client # commits folder changes. The sendmail executable exits quickly and # queues up the message for training. This should also ensure fairness # to newly arriving mail. Instead of installing/configuring # another queueing system, I reuse the queue in the MTA. # See scripts/dc-dlvr for corresponding trainspam/trainham handlers, # which are for my personal bayes training, and scripts/dc-dlvr.pre # for the pispam/piham handlers for training emails going to public-inbox DO_SENDMAIL='/usr/sbin/sendmail -oi' PI_USER=pi case $1 in *[/.]spam/cur/*) # non-new messages in spam get trained $DO_SENDMAIL $PI_USER+pispam <$1 exec $DO_SENDMAIL $USER+trainspam <$1 ;; *:2,*S*) # otherwise, seen messages only case $1 in *:2,*T*) exit 0 ;; # ignore trashed messages esac $DO_SENDMAIL $PI_USER+piham <$1 exec $DO_SENDMAIL $USER+trainham <$1 ;; esac public-inbox-1.6.1/scripts/slrnspool2maildir000077500000000000000000000026551377346120300212240ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2013-2020 all contributors # License: AGPL-3.0+ # # One-off script to convert an slrnpull news spool to Maildir =begin usage ./slrnspool2maildir SLRNPULL_ROOT/news/foo/bar /path/to/maildir/ =cut use strict; use warnings; use Email::Filter; use Email::LocalDelivery; use File::Glob qw(bsd_glob GLOB_NOSORT); sub usage { "Usage:\n".join('',grep(/\t/, `head -n 12 $0`)) } my $spool = shift @ARGV or die usage(); my $dir = shift @ARGV or die usage(); -d $dir or die "$dir is not a directory\n"; $dir .= '/' unless $dir =~ m!/\z!; foreach my $sub (qw(cur new tmp)) { my $nd = "$dir/$sub"; -d $nd and next; mkdir $nd or die "mkdir $nd failed: $!\n"; } foreach my $n (grep(/\d+\z/, bsd_glob("$spool/*", GLOB_NOSORT))) { if (open my $fh, '<', $n) { my $f = Email::Filter->new(data => do { local $/; <$fh> }); my $s = $f->simple; # gmane rewrites Received headers, which increases spamminess # Some older archives set Original-To foreach my $x (qw(Received To)) { my @h = $s->header("Original-$x"); if (@h) { $s->header_set($x, @h); $s->header_set("Original-$x"); } } # triggers for the SA HEADER_SPAM rule foreach my $drop (qw(Approved)) { $s->header_set($drop) } # appears to be an old gmane bug: $s->header_set('connect()'); $f->exit(0); $f->accept($dir); } else { warn "Failed to open $n: $!\n"; } } public-inbox-1.6.1/scripts/ssoma-replay000077500000000000000000000064141377346120300201560ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ # # A work-in-progress, but one day I hope this script is no longer # necessary and users will all pull from public-inboxes instead # of having mail pushed to them via mlmmj. # # This is for use with ssoma, using "command:" delivery mechanism # (as opposed to normal Maildir or mbox). # It assumes mlmmj-process is in /usr/bin (mlmmj requires absolute paths) # and assumes FOO@domain.example.com has web archives available at: # https://domain.example.com/FOO/ # # The goal here is _anybody_ can setup a mirror of any public-inbox # repository and run their own mlmmj instance to replay traffic. =begin usage with ssoma: NAME=meta URL=https://public-inbox.org/meta/ ssoma add $NAME $URL "command:/path/to/ssoma-replay -L /path/to/spool/$NAME" ; $GIT_DIR/ssoma.state should have something like the following target: ; (where GIT_DIR is ~/.ssoma/meta.git/ in the above example) [target "local"] command = /path/to/ssoma-replay -L /path/to/spool/meta =cut use strict; use Email::Simple; use URI::Escape qw/uri_escape_utf8/; use File::Temp qw/tempfile/; my ($fh, $filename) = tempfile('ssoma-replay-XXXXXXXX', TMPDIR => 1); my $msg = Email::Simple->new(do { local $/; }); select $fh; # Note: the archive URL makes assumptions about where the # archive is hosted. It is currently true of all the domains # hosted by me. my $header_obj = $msg->header_obj; my $body = $msg->body; my $list_id = $header_obj->header('List-Id'); my ($archive_url, $user, $domain); if (defined $list_id) { # due to a bug in old versions of public-inbox, was used # as the list-Id instead of as recommended in RFC2919 ($user, $domain) = ($list_id =~ /<([^\.@]+)[\.@](.+)>/g); if (defined $domain) { $archive_url = "https://$domain/$user/"; my $mid = $header_obj->header('Message-Id'); if ($mid =~ /<[ \t]*([^>]+)?[ \t]*>/s) { $mid = $1; } $mid = uri_escape_utf8($mid, '^A-Za-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@'); $header_obj->header_set('List-Archive', "<$archive_url>"); foreach my $h (qw(Help Unsubscribe Subscribe Owner)) { my $lch = lc $h; my $v = ""; $header_obj->header_set("List-$h", $v); } $header_obj->header_set('List-Post', ""); # RFC 5064 $header_obj->header_set('Archived-At', "<$archive_url$mid/>"); $header_obj->header_set('X-Archived-At'); } } print $header_obj->as_string, $msg->crlf, $body; # don't break inline signatures goto out if ($body =~ /^-----BEGIN PGP SIG.+-----/sm); # try not to break dkim/dmarc/spf crap, either foreach (qw(domainkey-signature dkim-signature authentication-results)) { goto out if defined $header_obj->header($_); } my $ct = $header_obj->header('Content-Type'); if (!defined($ct) || $ct =~ m{\A\s*text/plain\b}i) { print "\n" unless $body =~ /\n\z/s; defined $archive_url or goto out; # Do not add a space after '--' as is standard for user-generated # signatures, we want to preserve the "-- \n" in original user sigs # for mail software which splits on that. print "--\n", "unsubscribe: $user+unsubscribe\@$domain\n", "archive: $archive_url\n"; } out: $| = 1; exec '/usr/bin/mlmmj-process', @ARGV, '-m', $filename; public-inbox-1.6.1/scripts/xhdr-num2mid000077500000000000000000000030641377346120300200560ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # Useful for mapping article IDs from existing NNTP servers to MIDs use strict; use warnings; use Net::NNTP; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $usage = "usage: NNTPSERVER=news.example.org $0 GROUP [FIRST_NUM]\n"; my ($msgmap, $mm); my %opts = ( '--msgmap=s' => \$msgmap ); GetOptions(%opts) or die "bad command-line args\n$usage"; if ($msgmap) { require PublicInbox::Msgmap; require PublicInbox::MID; # mid_clean $mm = PublicInbox::Msgmap->new_file($msgmap, 1); } my $group = shift or die $usage; my $nntp = Net::NNTP->new($ENV{NNTPSERVER} || '127.0.0.1'); my ($num, $first, $last) = $nntp->group($group); die "Invalid group\n" if !(defined $num && defined $first && defined $last); my $arg_first = shift; if (defined $arg_first) { $arg_first =~ /\A[0-9]+\z/ or die $usage; $first = $arg_first; } elsif ($mm) { my $last_article = $mm->meta_accessor('last_article'); $first = $last_article + 1 if defined $last_article; } my $batch = 1000; my $i; for ($i = $first; $i < $last; $i += $batch) { my $j = $i + $batch - 1; $j = $last if $j > $last; my $num2mid = $nntp->xhdr('Message-ID', "$i-$j"); $mm->{dbh}->begin_work if $mm; for my $n ($i..$j) { defined(my $mid = $num2mid->{$n}) or next; print "$n $mid\n"; if ($mm) { $mid = PublicInbox::MID::mid_clean($mid); $mm->mid_set($n, $mid); } } if ($mm) { $mm->meta_accessor('last_article', $j); $mm->{dbh}->commit; } } public-inbox-1.6.1/t/000077500000000000000000000000001377346120300143635ustar00rootroot00000000000000public-inbox-1.6.1/t/.gitconfig000066400000000000000000000002021377346120300163270ustar00rootroot00000000000000; this becomes ~/.gitconfig for tests where we use ; "$ENV{HOME} = '/path/to/worktree/t'" in tests [gc] writeCommitGraph = false public-inbox-1.6.1/t/address.t000066400000000000000000000033241377346120300161770ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use_ok 'PublicInbox::Address'; sub test_pkg { my ($pkg) = @_; my $emails = \&{"${pkg}::emails"}; my $names = \&{"${pkg}::names"}; is_deeply([qw(e@example.com e@example.org)], [$emails->('User , e@example.org')], 'address extraction works as expected'); is_deeply(['user@example.com'], [$emails->('')], 'comment after domain accepted before >'); my @names = $names->( 'User , e@e, "John A. Doe" , , (xyz), '. 'U Ser (do not use)'); is_deeply(\@names, ['User', 'e', 'John A. Doe', 'x', 'xyz', 'U Ser'], 'name extraction works as expected'); @names = $names->('"user@example.com" '); is_deeply(['user'], \@names, 'address-as-name extraction works as expected'); { my $backwards = 'u@example.com (John Q. Public)'; @names = $names->($backwards); is_deeply(\@names, ['John Q. Public'], 'backwards name OK'); my @emails = $emails->($backwards); is_deeply(\@emails, ['u@example.com'], 'backwards emails OK'); } @names = $names->('"Quote Unneeded" '); is_deeply(['Quote Unneeded'], \@names, 'extra quotes dropped'); my @emails = $emails->('Local User '); is_deeply([], \@emails , 'no address for local address'); @names = $emails->('Local User '); is_deeply([], \@names, 'no address, no name'); } test_pkg('PublicInbox::Address'); SKIP: { if ($INC{'PublicInbox/AddressPP.pm'}) { skip 'Email::Address::XS missing', 8; } use_ok 'PublicInbox::AddressPP'; test_pkg('PublicInbox::AddressPP'); } done_testing; public-inbox-1.6.1/t/admin.t000066400000000000000000000061551377346120300156470ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Import; use_ok 'PublicInbox::Admin', qw(resolve_repo_dir); my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/v1"; my $v2_dir = "$tmpdir/v2"; my ($res, $err, $v); PublicInbox::Import::init_bare($git_dir); # v1 is(resolve_repo_dir($git_dir), $git_dir, 'top-level GIT_DIR resolved'); is(resolve_repo_dir("$git_dir/objects"), $git_dir, 'GIT_DIR/objects resolved'); ok(chdir($git_dir), 'chdir GIT_DIR works'); is(resolve_repo_dir(), $git_dir, 'resolve_repo_dir works in GIT_DIR'); ok(chdir("$git_dir/objects"), 'chdir GIT_DIR/objects works'); is(resolve_repo_dir(), $git_dir, 'resolve_repo_dir works in GIT_DIR'); $res = resolve_repo_dir(undef, \$v); is($v, 1, 'version 1 detected'); is($res, $git_dir, 'detects directory along with version'); # $tmpdir could be inside a git working, directory, so we test '/' SKIP: { my $no_vcs_dir = '/'; # do people version-control "/"? skip "$no_vcs_dir is version controlled by git", 4 if -d '/.git'; open my $null, '>', '/dev/null' or die "open /dev/null: $!"; open my $olderr, '>&', \*STDERR or die "dup stderr: $!"; ok(chdir($no_vcs_dir), 'chdir to a non-inbox'); open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!"; $res = eval { resolve_repo_dir() }; open STDERR, '>&', $olderr or die "restore stderr: $!"; is($res, undef, 'fails inside non-version-controlled dir'); ok(chdir($tmpdir), 'back to test-specific $tmpdir'); open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!"; $res = eval { resolve_repo_dir($no_vcs_dir) }; $err = $@; open STDERR, '>&', $olderr or die "restore stderr: $!"; is($res, undef, 'fails on non-version-controlled dir'); ok($err, '$@ set on failure'); } # v2 SKIP: { for my $m (qw(DBD::SQLite)) { skip "$m missing", 5 unless eval "require $m"; } use_ok 'PublicInbox::V2Writable'; use_ok 'PublicInbox::Inbox'; my $ibx = PublicInbox::Inbox->new({ inboxdir => $v2_dir, name => 'test-v2writable', version => 2, -primary_address => 'test@example.com', indexlevel => 'basic', }); PublicInbox::V2Writable->new($ibx, 1)->idx_init; ok(-e "$v2_dir/inbox.lock", 'exists'); is(resolve_repo_dir($v2_dir), $v2_dir, 'resolve_repo_dir works on v2_dir'); ok(chdir($v2_dir), 'chdir v2_dir OK'); is(resolve_repo_dir(), $v2_dir, 'resolve_repo_dir works inside v2_dir'); $res = resolve_repo_dir(undef, \$v); is($v, 2, 'version 2 detected'); is($res, $v2_dir, 'detects directory along with version'); # TODO: should work from inside Xapian dirs, and git dirs, here... } chdir '/'; my @pairs = ( '1g' => 1024 ** 3, 666 => 666, '1500K' => 1500 * 1024, '15m' => 15 * (1024 ** 2), ); while (@pairs) { my ($in, $out) = splice(@pairs, 0, 2); my $orig = $in; ok(PublicInbox::Admin::parse_unsigned(\$in), "parse_unsigned $orig"); is($in, $out, "got $orig => ($in == $out)"); } for my $v ('', 'bogus', '1p', '1gig') { ok(!PublicInbox::Admin::parse_unsigned(\$v), "parse_unsigned rejects $v"); } done_testing(); public-inbox-1.6.1/t/altid.t000066400000000000000000000033711377346120300156510ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Eml; require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::Msgmap'; use_ok 'PublicInbox::SearchIdx'; use_ok 'PublicInbox::Import'; use_ok 'PublicInbox::Inbox'; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; my $alt_file = "$tmpdir/another-nntp.sqlite3"; my $altid = [ "serial:gmane:file=$alt_file" ]; my $ibx; { my $mm = PublicInbox::Msgmap->new_file($alt_file, 1); is($mm->mid_set(1234, 'a@example.com'), 1, 'mid_set once OK'); ok(0 == $mm->mid_set(1234, 'a@example.com'), 'mid_set not idempotent'); ok(0 == $mm->mid_set(1, 'a@example.com'), 'mid_set fails with dup MID'); } { my $git = PublicInbox::Git->new($git_dir); my $im = PublicInbox::Import->new($git, 'testbox', 'test@example'); $im->init_bare; $im->add(PublicInbox::Eml->new(<<'EOF')); From: a@example.com To: b@example.com Subject: boo! Message-ID: hello world gmane:666 EOF $im->done; } { $ibx = PublicInbox::Inbox->new({inboxdir => $git_dir}); $ibx->{altid} = $altid; my $rw = PublicInbox::SearchIdx->new($ibx, 1); $rw->index_sync; } { my $mset = $ibx->search->mset("gmane:1234"); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); $msgs = [ map { $_->{mid} } @$msgs ]; is_deeply($msgs, ['a@example.com'], 'got one match'); $mset = $ibx->search->mset('gmane:666'); is($mset->size, 0, 'body did NOT match'); }; { my $mm = PublicInbox::Msgmap->new_file($alt_file, 1); my ($min, $max) = $mm->minmax; my $num = $mm->mid_insert('b@example.com'); ok($num > $max, 'auto-increment goes beyond mid_set'); } done_testing(); 1; public-inbox-1.6.1/t/altid_v2.t000066400000000000000000000030041377346120300162510ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::V2Writable'; use_ok 'PublicInbox::Inbox'; my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = "$tmpdir/inbox"; my $full = "$tmpdir/inbox/another-nntp.sqlite3"; my $altid = [ 'serial:gmane:file=another-nntp.sqlite3' ]; { ok(mkdir($inboxdir), 'created repo for msgmap'); my $mm = PublicInbox::Msgmap->new_file($full, 1); is($mm->mid_set(1234, 'a@example.com'), 1, 'mid_set once OK'); ok(0 == $mm->mid_set(1234, 'a@example.com'), 'mid_set not idempotent'); ok(0 == $mm->mid_set(1, 'a@example.com'), 'mid_set fails with dup MID'); } my $ibx = { inboxdir => $inboxdir, name => 'test-v2writable', version => 2, -primary_address => 'test@example.com', altid => $altid, }; $ibx = PublicInbox::Inbox->new($ibx); my $v2w = PublicInbox::V2Writable->new($ibx, 1); $v2w->add(PublicInbox::Eml->new(<<'EOF')); From: a@example.com To: b@example.com Subject: boo! Message-ID: hello world gmane:666 EOF $v2w->done; my $mset = $ibx->search->reopen->mset('gmane:1234'); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); $msgs = [ map { $_->{mid} } @$msgs ]; is_deeply($msgs, ['a@example.com'], 'got one match'); $mset = $ibx->search->mset('gmane:666'); is($mset->size, 0, 'body did NOT match'); done_testing(); 1; public-inbox-1.6.1/t/cgi.t000066400000000000000000000111041377346120300153070ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ # FIXME: this test is too slow and most non-CGI-requirements # should be moved over to things which use test_psgi use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; use PublicInbox::Import; require_mods(qw(Plack::Handler::CGI Plack::Util)); my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/pi-home"; my $pi_home = "$home/.public-inbox"; my $pi_config = "$pi_home/config"; my $maindir = "$tmpdir/main.git"; my $addr = 'test-public@example.com'; { is(1, mkdir($home, 0755), "setup ~/ for testing"); is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox"); PublicInbox::Import::init_bare($maindir); open my $fh, '>', "$maindir/description" or die "open: $!\n"; print $fh "test for public-inbox\n"; close $fh or die "close: $!\n"; open $fh, '>>', $pi_config or die; print $fh <new($pi_config); my $ibx = $cfg->lookup_name('test'); my $im = PublicInbox::InboxWritable->new($ibx)->importer(0); { local $ENV{HOME} = $home; # inject some messages: my $mime = PublicInbox::Eml->new(< To: You Cc: $addr Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 zzzzzz EOF ok($im->add($mime), 'added initial message'); $mime->header_set('Message-ID', ''); $mime->body_str_set("z\n" x 1024); ok($im->add($mime), 'added big message'); # deliver a reply, too $mime = PublicInbox::Eml->new(< To: Me Cc: $addr In-Reply-To: Message-Id: Subject: Re: hihi Date: Thu, 01 Jan 1970 00:00:01 +0000 Me wrote: > zzzzzz what? EOF ok($im->add($mime), 'added reply'); my $slashy_mid = 'slashy/asdf@example.com'; my $slashy = PublicInbox::Eml->new(< To: Me Cc: $addr Message-Id: <$slashy_mid> Subject: Re: hihi Date: Thu, 01 Jan 1970 00:00:01 +0000 slashy EOF ok($im->add($slashy), 'added slash'); $im->done; my $res = cgi_run("/test/slashy/asdf\@example.com/raw"); like($res->{body}, qr/Message-Id: <\Q$slashy_mid\E>/, "slashy mid raw hit"); } # retrieve thread as an mbox { local $ENV{HOME} = $home; my $path = "/test/blahblah\@example.com/t.mbox.gz"; my $res = cgi_run($path); like($res->{head}, qr/^Status: 501 /, "search not-yet-enabled"); my $cmd = ['-index', $ibx->{inboxdir}, '--max-size=2k']; my $opt = { 2 => \(my $err) }; my $indexed = run_script($cmd, undef, $opt); if ($indexed) { $res = cgi_run($path); like($res->{head}, qr/^Status: 200 /, "search returned mbox"); eval { require IO::Uncompress::Gunzip; my $in = $res->{body}; my $out; IO::Uncompress::Gunzip::gunzip(\$in => \$out); like($out, qr/^From /m, "From lines in mbox"); }; $res = cgi_run('/test/toobig@example.com/'); like($res->{head}, qr/^Status: 300 /, 'did not index or return >max-size message'); like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping large OID'); } else { like($res->{head}, qr/^Status: 501 /, "search not available"); SKIP: { skip 'DBD::SQLite not available', 4 }; } my $have_xml_treepp = eval { require XML::TreePP; 1 } if $indexed; if ($have_xml_treepp) { $path = "/test/blahblah\@example.com/t.atom"; $res = cgi_run($path); like($res->{head}, qr/^Status: 200 /, "atom returned 200"); like($res->{head}, qr!^Content-Type: application/atom\+xml!m, "search returned atom"); my $t = XML::TreePP->new->parse($res->{body}); is(scalar @{$t->{feed}->{entry}}, 3, "parsed three entries"); like($t->{feed}->{-xmlns}, qr/\bAtom\b/, 'looks like an an Atom feed'); } else { SKIP: { skip 'DBD::SQLite or XML::TreePP missing', 2 }; } } done_testing(); sub cgi_run { my %env = ( PATH_INFO => $_[0], QUERY_STRING => $_[1] || "", SCRIPT_NAME => '', REQUEST_URI => $_[0] . ($_[1] ? "?$_[1]" : ''), REQUEST_METHOD => $_[2] || "GET", GATEWAY_INTERFACE => 'CGI/1.1', HTTP_ACCEPT => '*/*', HTTP_HOST => 'test.example.com', ); my ($in, $out, $err) = ("", "", ""); my $rdr = { 0 => \$in, 1 => \$out, 2 => \$err }; run_script(['.cgi'], \%env, $rdr); die "unexpected error: \$?=$? ($err)" if $?; my ($head, $body) = split(/\r\n\r\n/, $out, 2); { head => $head, body => $body, err => $err } } public-inbox-1.6.1/t/check-www-inbox.perl000066400000000000000000000121661377346120300202710ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # Parallel WWW checker my $usage = "$0 [-j JOBS] [-s SLOW_THRESHOLD] URL_OF_INBOX\n"; use strict; use warnings; use File::Temp qw(tempfile); use GDBM_File; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use IO::Socket; use LWP::ConnCache; use POSIX qw(:sys_wait_h); use Time::HiRes qw(gettimeofday tv_interval); use WWW::Mechanize; use Data::Dumper; # we want to use vfork+exec with spawn, WWW::Mechanize can use too much # memory and fork(2) fails use PublicInbox::Spawn qw(spawn which); $ENV{PERL_INLINE_DIRECTORY} or warn "PERL_INLINE_DIRECTORY unset, may OOM\n"; our $tmp_owner = $$; my $nproc = 4; my $slow = 0.5; my %opts = ( '-j|jobs=i' => \$nproc, '-s|slow-threshold=f' => \$slow, ); GetOptions(%opts) or die "bad command-line args\n$usage"; my $root_url = shift or die $usage; chomp(my $xmlstarlet = which('xmlstarlet')); my $atom_check = eval { my $cmd = [ qw(xmlstarlet val -e -) ]; sub { my ($in, $out, $err) = @_; use autodie; open my $in_fh, '+>', undef; open my $out_fh, '+>', undef; open my $err_fh, '+>', undef; print $in_fh $$in; $in_fh->flush; sysseek($in_fh, 0, 0); my $rdr = { 0 => fileno($in_fh), 1 => fileno($out_fh), 2 => fileno($err_fh), }; my $pid = spawn($cmd, undef, $rdr); while (waitpid($pid, 0) != $pid) { next if $!{EINTR}; warn "waitpid(xmlstarlet, $pid) $!"; return $!; } sysseek($out_fh, 0, 0); sysread($out_fh, $$out, -s $out_fh); sysseek($err_fh, 0, 0); sysread($err_fh, $$err, -s $err_fh); $? } } if $xmlstarlet; my %workers; $SIG{INT} = sub { exit 130 }; $SIG{TERM} = sub { exit 0 }; $SIG{CHLD} = sub { while (1) { my $pid = waitpid(-1, WNOHANG); return if !defined $pid || $pid <= 0; my $p = delete $workers{$pid} || '(unknown)'; warn("$pid [$p] exited with $?\n") if $?; } }; my @todo = IO::Socket->socketpair(AF_UNIX, SOCK_SEQPACKET, 0); die "socketpair failed: $!" unless $todo[1]; my @done = IO::Socket->socketpair(AF_UNIX, SOCK_SEQPACKET, 0); die "socketpair failed: $!" unless $done[1]; $| = 1; foreach my $p (1..$nproc) { my $pid = fork; die "fork failed: $!\n" unless defined $pid; if ($pid) { $workers{$pid} = $p; } else { $todo[1]->close; $done[0]->close; worker_loop($todo[0], $done[1]); } } my ($fh, $tmp) = tempfile('www-check-XXXXXXXX', SUFFIX => '.gdbm', UNLINK => 1, TMPDIR => 1); my $gdbm = tie my %seen, 'GDBM_File', $tmp, &GDBM_WRCREAT, 0600; defined $gdbm or die "gdbm open failed: $!\n"; $todo[0]->close; $done[1]->close; my ($rvec, $wvec); $todo[1]->blocking(0); $done[0]->blocking(0); $seen{$root_url} = 1; my $ndone = 0; my $nsent = 1; my @queue = ($root_url); my $timeout = $slow * 4; while (keys %workers) { # reacts to SIGCHLD $wvec = $rvec = ''; my $u; vec($rvec, fileno($done[0]), 1) = 1; if (@queue) { vec($wvec, fileno($todo[1]), 1) = 1; } elsif ($ndone == $nsent) { kill 'TERM', keys %workers; exit; } if (!select($rvec, $wvec, undef, $timeout)) { while (my ($k, $v) = each %seen) { next if $v == 2; print "WAIT ($ndone/$nsent) <$k>\n"; } } while ($u = shift @queue) { my $s = $todo[1]->send($u, MSG_EOR); if ($!{EAGAIN}) { unshift @queue, $u; last; } } my $r; do { $r = $done[0]->recv($u, 65535, 0); } while (!defined $r && $!{EINTR}); next unless $u; if ($u =~ s/\ADONE\t//) { $ndone++; $seen{$u} = 2; } else { next if $seen{$u}; $seen{$u} = 1; $nsent++; push @queue, $u; } } sub worker_loop { my ($todo_rd, $done_wr) = @_; $SIG{CHLD} = 'DEFAULT'; my $m = WWW::Mechanize->new(autocheck => 0); my $cc = LWP::ConnCache->new; $m->stack_depth(0); # no history $m->conn_cache($cc); while (1) { $todo_rd->recv(my $u, 65535, 0); next unless $u; my $t = [ gettimeofday ]; my $r = $m->get($u); $t = tv_interval($t); printf "SLOW %0.06f % 5d %s\n", $t, $$, $u if $t > $slow; my @links; if ($r->is_success) { my %links = map { (split('#', $_->URI->abs->as_string))[0] => 1; } grep { $_->tag && $_->url !~ /:/ } $m->links; @links = keys %links; } elsif ($r->code != 300) { warn "W: ".$r->code . " $u\n" } my $s; # blocking foreach my $l (@links, "DONE\t$u") { next if $l eq '' || $l =~ /\.mbox(?:\.gz)\z/; do { $s = $done_wr->send($l, MSG_EOR); } while (!defined $s && $!{EINTR}); die "$$ send $!\n" unless defined $s; my $n = length($l); die "$$ send truncated $s < $n\n" if $s != $n; } # make sure the HTML source doesn't screw up terminals # when people curl the source (not remotely an expert # on languages or encodings, here). my $ct = $r->header('Content-Type') || ''; warn "no Content-Type: $u\n" if $ct eq ''; if ($atom_check && $ct =~ m!\bapplication/atom\+xml\b!) { my $raw = $r->decoded_content; my ($out, $err) = ('', ''); my $fail = $atom_check->(\$raw, \$out, \$err); warn "Atom ($fail) - $u - <1:$out> <2:$err>\n" if $fail; } next if $ct !~ m!\btext/html\b!; my $dc = $r->decoded_content; if ($dc =~ /([\x00-\x08\x0d-\x1f\x7f-\x{99999999}]+)/s) { my $o = $1; my $c = Dumper($o); warn "bad: $u $c\n"; } } } public-inbox-1.6.1/t/config.t000066400000000000000000000166471377346120300160330ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Config; use PublicInbox::TestCommon; use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); { PublicInbox::Import::init_bare($tmpdir); my $inboxdir = "$tmpdir/new\nline"; my @cmd = ('git', "--git-dir=$tmpdir", qw(config publicinbox.foo.inboxdir), $inboxdir); is(xsys(@cmd), 0, "set config"); my $tmp = PublicInbox::Config->new("$tmpdir/config"); is($tmp->{'publicinbox.foo.inboxdir'}, $inboxdir, 'config read correctly'); is($tmp->{'core.bare'}, 'true', 'init used --bare repo'); my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; $tmp = PublicInbox::Config->new("$tmpdir/config"); is($tmp->lookup_name('foo'), undef, 'reject invalid inboxdir'); like("@warn", qr/^E:.*must not contain `\\n'/sm, 'warned about newline'); } { my $f = "examples/public-inbox-config"; ok(-r $f, "$f is readable"); my $cfg = PublicInbox::Config->new($f); is_deeply($cfg->lookup('meta@public-inbox.org'), { 'inboxdir' => '/home/pi/meta-main.git', 'address' => [ 'meta@public-inbox.org' ], 'domain' => 'public-inbox.org', 'url' => [ 'http://example.com/meta' ], -primary_address => 'meta@public-inbox.org', 'name' => 'meta', feedmax => 25, -httpbackend_limiter => undef, nntpserver => undef, }, "lookup matches expected output"); is($cfg->lookup('blah@example.com'), undef, "non-existent lookup returns undef"); my $test = $cfg->lookup('test@public-inbox.org'); is_deeply($test, { 'address' => ['try@public-inbox.org', 'sandbox@public-inbox.org', 'test@public-inbox.org'], -primary_address => 'try@public-inbox.org', 'inboxdir' => '/home/pi/test-main.git', 'domain' => 'public-inbox.org', 'name' => 'test', feedmax => 25, 'url' => [ 'http://example.com/test' ], -httpbackend_limiter => undef, nntpserver => undef, }, "lookup matches expected output for test"); } { my $cfgpfx = "publicinbox.test"; my @altid = qw(serial:gmane:file=a serial:enamg:file=b); my $config = PublicInbox::Config->new(\<lookup_name('test'); is_deeply($ibx->{altid}, [ @altid ]); $config = PublicInbox::Config->new(\<lookup_name('test'); is($ibx->{inboxdir}, '/path/to/non/existent', 'mainrepo still works'); $config = PublicInbox::Config->new(\<lookup_name('test'); is($ibx->{inboxdir}, '/path/to/non/existent', 'inboxdir takes precedence'); } { my $pfx = "publicinbox.test"; my $str = <new(\$str); my $ibx = $cfg->lookup_name('test'); is($ibx->{nntpserver}, 'news.example.com', 'global NNTP server'); $str = <new(\$str); $ibx = $cfg->lookup_name('test'); is($ibx->{nntpserver}, 'news.alt.example.com','per-inbox NNTP server'); } # no obfuscate domains { my $pfx = "publicinbox.test"; my $pfx2 = "publicinbox.foo"; my $str = <new(\$str); my $ibx = $cfg->lookup_name('test'); my $re = $ibx->{-no_obfuscate_re}; like('meta@public-inbox.org', $re, 'public-inbox.org address not to be obfuscated'); like('t@example.com', $re, 'example.com address not to be obfuscated'); unlike('t@example.comM', $re, 'example.comM address does not match'); is_deeply($ibx->{-no_obfuscate}, { 'test@example.com' => 1, 'foo@example.com' => 1, 'z@example.com' => 1, }, 'known addresses populated'); } my @invalid = ( # git rejects this because it locks refnames, but we don't have # this problem with inbox names: # 'inbox.lock', # git rejects these: '', '..', '.', 'stash@{9}', 'inbox.', '^caret', '~tilde', '*asterisk', 's p a c e s', ' leading-space', 'trailing-space ', 'question?', 'colon:', '[square-brace]', "\fformfeed", "\0zero", "\bbackspace", ); my %X = ("\0" => '\\0', "\b" => '\\b', "\f" => '\\f', "'" => "\\'"); my $xre = join('|', keys %X); for my $s (@invalid) { my $d = $s; $d =~ s/($xre)/$X{$1}/g; ok(!PublicInbox::Config::valid_inbox_name($s), "`$d' name rejected"); } # obviously-valid examples my @valid = qw(a a@example a@example.com); # Rejecting more was considered, but then it dawned on me that # people may intentionally use inbox names which are not URL-friendly # to prevent the PSGI interface from displaying them... # URL-unfriendly # '<', '>', '%', '#', '?', '&', '(', ')', # maybe these aren't so bad, they're common in Message-IDs, even: # '!', '$', '=', '+' push @valid, qw[bang! ca$h less< more> 1% (parens) &more eql= +plus], '#hash'; for my $s (@valid) { ok(PublicInbox::Config::valid_inbox_name($s), "`$s' name accepted"); } { my $f = "$tmpdir/ordered"; open my $fh, '>', $f or die "open: $!"; my @expect; foreach my $i (0..3) { push @expect, "$i"; print $fh <<"" or die "print: $!"; [publicinbox "$i"] inboxdir = /path/to/$i.git address = $i\@example.com } close $fh or die "close: $!"; my $cfg = PublicInbox::Config->new($f); my @result; $cfg->each_inbox(sub { push @result, $_[0]->{name} }); is_deeply(\@result, \@expect); } { my $pfx1 = "publicinbox.test1"; my $pfx2 = "publicinbox.test2"; my $str = <new(\$str); my $t1 = $cfg->lookup_name('test1'); my $t2 = $cfg->lookup_name('test2'); is($t1->{-repo_objs}->[0], $t2->{-repo_objs}->[0], 'inboxes share ::Git object'); } { for my $t (qw(TRUE true yes on 1 +1 -1 13 0x1 0x12 0X5)) { is(PublicInbox::Config::git_bool($t), 1, "$t is true"); is(xqx([qw(git -c), "test.val=$t", qw(config --bool test.val)]), "true\n", "$t matches git-config behavior"); } for my $f (qw(FALSE false no off 0 +0 +000 00 0x00 0X0)) { is(PublicInbox::Config::git_bool($f), 0, "$f is false"); is(xqx([qw(git -c), "test.val=$f", qw(config --bool test.val)]), "false\n", "$f matches git-config behavior"); } is(PublicInbox::Config::git_bool('bogus'), undef, 'bogus is undef'); } SKIP: { # XXX wildcard match requires git 2.26+ require_git('1.8.5', 2) or skip 'git 1.8.5+ required for --url-match', 2; my $f = "$tmpdir/urlmatch"; open my $fh, '>', $f or BAIL_OUT $!; print $fh <new; my $url = 'imap://mail.example.com/INBOX'; is($cfg->urlmatch('imap.pollInterval', $url), 9, 'urlmatch hit'); is($cfg->urlmatch('imap.idleInterval', $url), undef, 'urlmatch miss'); }; done_testing(); public-inbox-1.6.1/t/config_limiter.t000066400000000000000000000026051377346120300175450ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Config; my $cfgpfx = "publicinbox.test"; { my $config = PublicInbox::Config->new(\<lookup_name('test'); my $git = $ibx->git; my $old = "$git"; my $lim = $git->{-httpbackend_limiter}; ok($lim, 'Limiter exists'); is($lim->{max}, 12, 'limiter has expected slots'); $ibx->{git} = undef; $git = $ibx->git; isnt($old, "$git", 'got new Git object'); is("$git->{-httpbackend_limiter}", "$lim", 'same limiter'); } { my $config = PublicInbox::Config->new(\<lookup_name('test'); my $git = $ibx->git; ok($git, 'got git object'); my $old = "$git"; # stringify object ref "Git(0xDEADBEEF)" my $lim = $git->{-httpbackend_limiter}; ok($lim, 'Limiter exists'); is($lim->{max}, 3, 'limiter has expected slots'); $ibx->{git} = undef; my $new = $ibx->git; isnt($old, "$new", 'got new Git object'); is("$new->{-httpbackend_limiter}", "$lim", 'same limiter'); is($lim->{max}, 3, 'limiter has expected slots'); } done_testing; public-inbox-1.6.1/t/content_hash.t000066400000000000000000000016701377346120300172310ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::ContentHash qw(content_hash); use PublicInbox::Eml; my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: b@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $orig = content_hash($mime); my $reload = content_hash(PublicInbox::Eml->new($mime->as_string)); is($orig, $reload, 'content_hash matches after serialization'); foreach my $h (qw(From To Cc)) { my $n = q("Quoted N'Ame" ); $mime->header_set($h, "$n"); my $q = content_hash($mime); is($mime->header($h), $n, "content_hash does not mutate $h:"); $mime->header_set($h, 'Quoted N\'Ame '); my $nq = content_hash($mime); is($nq, $q, "quotes ignored in $h:"); } done_testing(); public-inbox-1.6.1/t/convert-compact.t000066400000000000000000000100251377346120300176520ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); have_xapian_compact or plan skip_all => 'xapian-compact missing for '.__FILE__; use_ok 'PublicInbox::V2Writable'; use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); my $ibx = { inboxdir => "$tmpdir/v1", name => 'test-v1', -primary_address => 'test@example.com', }; PublicInbox::Import::init_bare($ibx->{inboxdir}); ok(umask(077), 'set restrictive umask'); ok(PublicInbox::Import::run_die([qw(git) , "--git-dir=$ibx->{inboxdir}", qw(config core.sharedRepository 0644)]), 'set sharedRepository'); $ibx = PublicInbox::Inbox->new($ibx); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: b@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF ok($im->add($mime), 'added one message'); ok($im->remove($mime), 'remove message'); ok($im->add($mime), 'added message again'); $im->done; for (1..2) { eval { PublicInbox::SearchIdx->new($ibx, 1)->index_sync; }; is($@, '', 'no errors syncing'); } is(((stat("$ibx->{inboxdir}/public-inbox"))[2]) & 07777, 0755, 'sharedRepository respected for v1'); is(((stat("$ibx->{inboxdir}/public-inbox/msgmap.sqlite3"))[2]) & 07777, 0644, 'sharedRepository respected for v1 msgmap'); my @xdir = glob("$ibx->{inboxdir}/public-inbox/xap*/*"); foreach (@xdir) { my @st = stat($_); is($st[2] & 07777, -f _ ? 0644 : 0755, 'sharedRepository respected on file after convert'); } local $ENV{PI_CONFIG} = '/dev/null'; my ($out, $err) = ('', ''); my $rdr = { 1 => \$out, 2 => \$err }; my $cmd = [ '-compact', $ibx->{inboxdir} ]; ok(run_script($cmd, undef, $rdr), 'v1 compact works'); @xdir = glob("$ibx->{inboxdir}/public-inbox/xap*"); is(scalar(@xdir), 1, 'got one xapian directory after compact'); is(((stat($xdir[0]))[2]) & 07777, 0755, 'sharedRepository respected on v1 compact'); my $hwm = do { my $mm = $ibx->mm; $ibx->cleanup; $mm->num_highwater; }; ok(defined($hwm) && $hwm > 0, "highwater mark set #$hwm"); $cmd = [ '-convert', '--no-index', $ibx->{inboxdir}, "$tmpdir/no-index" ]; ok(run_script($cmd, undef, $rdr), 'convert --no-index works'); $cmd = [ '-convert', $ibx->{inboxdir}, "$tmpdir/x/v2" ]; ok(run_script($cmd, undef, $rdr), 'convert works'); @xdir = glob("$tmpdir/x/v2/xap*/*"); foreach (@xdir) { my @st = stat($_); is($st[2] & 07777, -f _ ? 0644 : 0755, 'sharedRepository respected after convert'); } $cmd = [ '-compact', "$tmpdir/x/v2" ]; my $env = { NPROC => 2 }; ok(run_script($cmd, $env, $rdr), 'v2 compact works'); $ibx->{inboxdir} = "$tmpdir/x/v2"; $ibx->{version} = 2; is($ibx->mm->num_highwater, $hwm, 'highwater mark unchanged in v2 inbox'); @xdir = glob("$tmpdir/x/v2/xap*/*"); foreach (@xdir) { my @st = stat($_); is($st[2] & 07777, -f _ ? 0644 : 0755, 'sharedRepository respected after v2 compact'); } is(((stat("$tmpdir/x/v2/msgmap.sqlite3"))[2]) & 07777, 0644, 'sharedRepository respected for v2 msgmap'); @xdir = (glob("$tmpdir/x/v2/git/*.git/objects/*/*"), glob("$tmpdir/x/v2/git/*.git/objects/pack/*")); foreach (@xdir) { my @st = stat($_); is($st[2] & 07777, -f _ ? 0444 : 0755, 'sharedRepository respected after v2 compact'); } my $msgs = $ibx->recent({limit => 1000}); is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); is(scalar @$msgs, 1, 'only one message in history'); $ibx = undef; $err = ''; $cmd = [ qw(-index -j0 --reindex -c), "$tmpdir/x/v2" ]; ok(run_script($cmd, undef, $rdr), '--reindex -c'); like($err, qr/xapian-compact/, 'xapian-compact ran (-c)'); $rdr->{2} = \(my $err2 = ''); $cmd = [ qw(-index -j0 --reindex -cc), "$tmpdir/x/v2" ]; ok(run_script($cmd, undef, $rdr), '--reindex -c -c'); like($err2, qr/xapian-compact/, 'xapian-compact ran (-c -c)'); ok(($err2 =~ tr/\n/\n/) > ($err =~ tr/\n/\n/), '-compacted twice'); done_testing(); public-inbox-1.6.1/t/data/000077500000000000000000000000001377346120300152745ustar00rootroot00000000000000public-inbox-1.6.1/t/data/0001.patch000066400000000000000000000024601377346120300166770ustar00rootroot00000000000000From: Eric Wong Date: Fri, 20 Jul 2018 07:21:41 +0000 To: test@example.com Subject: [PATCH] search: use boolean prefix for filenames in diffs, too Message-ID: <20180720072141.GA15957@example> Filenames within a project tend to be reasonably stable within a project and I plan on having automated searches hit these. Also, using no term prefix at all (the default for searching) still allows probabilistic searches on everything that's in a "git diff", including the blob names which were just made boolean. Note, attachment filenames ("n:" prefix) will still use probabilistic search, as they're hardly standardized. --- lib/PublicInbox/Search.pm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 090d998b6c2c..6e006fd73b1d 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -53,6 +53,9 @@ my %bool_pfx_external = ( dfpre => 'XDFPRE', dfpost => 'XDFPOST', dfblob => 'XDFPRE XDFPOST', + dfn => 'XDFN', + dfa => 'XDFA', + dfb => 'XDFB', ); my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST'; @@ -72,9 +75,6 @@ my %prob_prefix = ( q => 'XQUOT', nq => $non_quoted_body, - dfn => 'XDFN', - dfa => 'XDFA', - dfb => 'XDFB', dfhh => 'XDFHH', dfctx => 'XDFCTX', -- ^_^ public-inbox-1.6.1/t/data/message_embed.eml000066400000000000000000000111171377346120300205540ustar00rootroot00000000000000Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 977481F45A; Sat, 18 Apr 2020 22:25:08 +0000 (UTC) Date: Sat, 18 Apr 2020 22:25:08 +0000 From: Eric Wong To: test@public-inbox.org Subject: Re: embedded message test Message-ID: <20200418222508.GA13918@dcvr> References: <20200418222020.GA2745@dcvr> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="TB36FDmn/VVEgNH/" Content-Disposition: inline In-Reply-To: <20200418222020.GA2745@dcvr> --TB36FDmn/VVEgNH/ Content-Type: text/plain; charset=utf-8 Content-Disposition: inline testing embedded message harder --TB36FDmn/VVEgNH/ Content-Type: message/rfc822 Content-Disposition: attachment; filename="embed2x.eml" Date: Sat, 18 Apr 2020 22:20:20 +0000 From: Eric Wong To: test@public-inbox.org Subject: embedded message test Message-ID: <20200418222020.GA2745@dcvr> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="/04w6evG8XlLl3ft" Content-Disposition: inline --/04w6evG8XlLl3ft Content-Type: text/plain; charset=utf-8 Content-Disposition: inline testing embedded message --/04w6evG8XlLl3ft Content-Type: message/rfc822 Content-Disposition: attachment; filename="test.eml" From: Eric Wong To: spew@80x24.org Subject: [PATCH] mail header experiments Date: Sat, 18 Apr 2020 21:41:14 +0000 Message-Id: <20200418214114.7575-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit --- lib/PublicInbox/MailHeader.pm | 55 +++++++++++++++++++++++++++++++++++ t/mail_header.t | 31 ++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 lib/PublicInbox/MailHeader.pm create mode 100644 t/mail_header.t diff --git a/lib/PublicInbox/MailHeader.pm b/lib/PublicInbox/MailHeader.pm new file mode 100644 index 00000000..166baf91 --- /dev/null +++ b/lib/PublicInbox/MailHeader.pm @@ -0,0 +1,55 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +package PublicInbox::MailHeader; +use strict; +use HTTP::Parser::XS qw(parse_http_response HEADERS_AS_ARRAYREF); +use bytes (); #bytes::length +my %casemap; + +sub _headerx_to_list { + my (undef, $head, $crlf) = @_; + + # picohttpparser uses `int' as the return value, so the + # actual limit is 2GB on most platforms. However, headers + # exceeding (or even close to) 1MB seems unreasonable + die 'headers too big' if bytes::length($$head) > 0x100000; + my ($ret, undef, undef, undef, $headers) = + parse_http_response('HTTP/1.0 1 X'. $crlf . $$head, + HEADERS_AS_ARRAYREF); + die 'failed to parse headers' if $ret <= 0; + # %casemap = map {; lc($_) => $_ } ($$head =~ m/^([^:]+):/gsm); + # my $nr = @$headers; + for (my $i = 0; $i < @$headers; $i += 2) { + my $key = $headers->[$i]; # = $casemap{$headers->[$i]}; + my $val = $headers->[$i + 1]; + (my $trimmed = $val) =~ s/\r?\n\s+/ /; + $headers->[$i + 1] = [ + $trimmed, + "$key: $val" + ] + } + $headers; +} + +sub _header_to_list { + my (undef, $head, $crlf) = @_; + my @tmp = ($$head =~ m/^(([^ \t:][^:\n]*):[ \t]* + ([^\n]*\n(?:[ \t]+[^\n]*\n)*))/gsmx); + my @headers; + $#headers = scalar @tmp; + @headers = (); + while (@tmp) { + my ($orig, $key, $val) = splice(@tmp, 0, 3); + # my $v = $tmp[$i + 2]; + # $v =~ s/\r?\n[ \t]+/ /sg; + # $v =~ s/\r?\n\z//s; + $val =~ s/\n[ \t]+/ /sg; + chomp($val, $orig); + # $val =~ s/\r?\n\z//s; + # $orig =~ s/\r?\n\z//s; + push @headers, $key, [ $val, $orig ]; + } + \@headers; +} + +1; diff --git a/t/mail_header.t b/t/mail_header.t new file mode 100644 index 00000000..4dc62c50 --- /dev/null +++ b/t/mail_header.t @@ -0,0 +1,31 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use Test::More; +use PublicInbox::TestCommon; +require_mods('PublicInbox::MailHeader'); + +my $head = <<'EOF'; +From d0147582e289fdd4cdd84e91d8b0f8ae9c230124 Mon Sep 17 00:00:00 2001 +From: Eric Wong +Date: Fri, 17 Apr 2020 09:28:49 +0000 +Subject: [PATCH] searchthread: reduce indirection by removing container + +EOF +my $orig = $head; +use Email::Simple; +my $xshdr = PublicInbox::MailHeader->_header_to_list(\$head, "\n"); +my $simpl = Email::Simple::Header->_header_to_list(\$head, "\n"); +is_deeply($xshdr, $simpl); +use Benchmark qw(:all); +my $res = timethese(100000, { + pmh => sub { + PublicInbox::MailHeader->_header_to_list(\$head, "\n"); + }, + esh => sub { + PublicInbox::MailHeader->_header_to_list(\$head, "\n"); + } +}); +is($head, $orig); +use Data::Dumper; diag Dumper($res); +done_testing; --/04w6evG8XlLl3ft-- --TB36FDmn/VVEgNH/-- public-inbox-1.6.1/t/dir_idle.t000066400000000000000000000003061377346120300163220ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use Test::More; use_ok 'PublicInbox::DirIdle'; done_testing; public-inbox-1.6.1/t/ds-kqxs.t000066400000000000000000000027451377346120300161520ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # Licensed the same as Danga::Socket (and Perl5) # License: GPL-1.0+ or Artistic-1.0-Perl # # use strict; use Test::More; unless (eval { require IO::KQueue }) { my $m = $^O !~ /bsd/ ? 'DSKQXS is only for *BSD systems' : "no IO::KQueue, skipping $0: $@"; plan skip_all => $m; } if ('ensure nested kqueue works for signalfd emulation') { require POSIX; my $new = POSIX::SigSet->new(POSIX::SIGHUP()); my $old = POSIX::SigSet->new; my $hup = 0; local $SIG{HUP} = sub { $hup++ }; POSIX::sigprocmask(POSIX::SIG_SETMASK(), $new, $old) or die; my $kqs = IO::KQueue->new or die; $kqs->EV_SET(POSIX::SIGHUP(), IO::KQueue::EVFILT_SIGNAL(), IO::KQueue::EV_ADD()); kill('HUP', $$) or die; my @events = $kqs->kevent(3000); is(scalar(@events), 1, 'got one event'); is($events[0]->[0], POSIX::SIGHUP(), 'got SIGHUP'); my $parent = IO::KQueue->new or die; my $kqfd = $$kqs; $parent->EV_SET($kqfd, IO::KQueue::EVFILT_READ(), IO::KQueue::EV_ADD()); kill('HUP', $$) or die; @events = $parent->kevent(3000); is(scalar(@events), 1, 'got one event'); is($events[0]->[0], $kqfd, 'got kqfd'); is($hup, 0, '$SIG{HUP} did not fire'); POSIX::sigprocmask(POSIX::SIG_SETMASK(), $old) or die; defined(POSIX::close($kqfd)) or die; defined(POSIX::close($$parent)) or die; } local $ENV{TEST_IOPOLLER} = 'PublicInbox::DSKQXS'; require './t/ds-poll.t'; public-inbox-1.6.1/t/ds-leak.t000066400000000000000000000036171377346120300160770ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # Licensed the same as Danga::Socket (and Perl5) # License: GPL-1.0+ or Artistic-1.0-Perl # # use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use_ok 'PublicInbox::DS'; if ('close-on-exec for epoll and kqueue') { use PublicInbox::Spawn qw(spawn which); my $pid; my $evfd_re = qr/(?:kqueue|eventpoll)/i; PublicInbox::DS->SetLoopTimeout(0); PublicInbox::DS->SetPostLoopCallback(sub { 0 }); # make sure execve closes if we're using fork() my ($r, $w); pipe($r, $w) or die "pipe: $!"; PublicInbox::DS::add_timer(0, sub { $pid = spawn([qw(sleep 10)]) }); PublicInbox::DS->EventLoop; ok($pid, 'subprocess spawned'); # wait for execve, we need to ensure lsof sees sleep(1) # and not the fork of this process: close $w or die "close: $!"; my $l = <$r>; is($l, undef, 'cloexec works and sleep(1) is running'); SKIP: { my $lsof = which('lsof') or skip 'lsof missing', 1; my $rdr = { 2 => \(my $null) }; my @of = grep(/$evfd_re/, xqx([$lsof, '-p', $pid], {}, $rdr)); my $err = $?; skip "lsof broken ? (\$?=$err)", 1 if $err; is_deeply(\@of, [], 'no FDs leaked to subprocess'); }; if (defined $pid) { kill(9, $pid); waitpid($pid, 0); } PublicInbox::DS->Reset; } SKIP: { require_mods('BSD::Resource', 1); my $rlim = BSD::Resource::RLIMIT_NOFILE(); my ($n,undef) = BSD::Resource::getrlimit($rlim); # FreeBSD 11.2 with 2GB RAM gives RLIMIT_NOFILE=57987! if ($n > 1024 && !$ENV{TEST_EXPENSIVE}) { skip "RLIMIT_NOFILE=$n too big w/o TEST_EXPENSIVE for $0", 1; } my $cb = sub {}; for my $i (0..$n) { PublicInbox::DS->SetLoopTimeout(0); PublicInbox::DS->SetPostLoopCallback($cb); PublicInbox::DS->EventLoop; PublicInbox::DS->Reset; } ok(1, "Reset works and doesn't hit RLIMIT_NOFILE ($n)"); }; done_testing; public-inbox-1.6.1/t/ds-poll.t000066400000000000000000000033061377346120300161240ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # Licensed the same as Danga::Socket (and Perl5) # License: GPL-1.0+ or Artistic-1.0-Perl # # use strict; use warnings; use Test::More; use PublicInbox::Syscall qw(:epoll); my $cls = $ENV{TEST_IOPOLLER} // 'PublicInbox::DSPoll'; use_ok $cls; my $p = $cls->new; my ($r, $w, $x, $y); pipe($r, $w) or die; pipe($x, $y) or die; is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($r), EPOLLIN), 0, 'add EPOLLIN'); my $events = []; my $n = $p->epoll_wait(9, 0, $events); is_deeply($events, [], 'no events set'); is($n, 0, 'nothing ready, yet'); is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($w), EPOLLOUT|EPOLLONESHOT), 0, 'add EPOLLOUT|EPOLLONESHOT'); $n = $p->epoll_wait(9, -1, $events); is($n, 1, 'got POLLOUT event'); is($events->[0]->[0], fileno($w), '$w ready'); $n = $p->epoll_wait(9, 0, $events); is($n, 0, 'nothing ready after oneshot'); is_deeply($events, [], 'no events set after oneshot'); syswrite($w, '1') == 1 or die; for my $t (0..1) { $n = $p->epoll_wait(9, $t, $events); is($events->[0]->[0], fileno($r), "level-trigger POLLIN ready #$t"); is($n, 1, "only event ready #$t"); } syswrite($y, '1') == 1 or die; is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($x), EPOLLIN|EPOLLONESHOT), 0, 'EPOLLIN|EPOLLONESHOT add'); is($p->epoll_wait(9, -1, $events), 2, 'epoll_wait has 2 ready'); my @fds = sort(map { $_->[0] } @$events); my @exp = sort((fileno($r), fileno($x))); is_deeply(\@fds, \@exp, 'got both ready FDs'); is($p->epoll_ctl(EPOLL_CTL_DEL, fileno($r), 0), 0, 'EPOLL_CTL_DEL OK'); $n = $p->epoll_wait(9, 0, $events); is($n, 0, 'nothing ready after EPOLL_CTL_DEL'); done_testing; public-inbox-1.6.1/t/edit.t000066400000000000000000000154631377346120300155060ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # edit frontend behavior test (t/replace.t for backend) use strict; use warnings; use Test::More; use PublicInbox::TestCommon; require_git(2.6); require PublicInbox::Inbox; require PublicInbox::InboxWritable; require PublicInbox::Config; use PublicInbox::MID qw(mid_clean); require_mods('DBD::SQLite'); my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = "$tmpdir/v2"; my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'test-v2edit', version => 2, -primary_address => 'test@example.com', indexlevel => 'basic', }); $ibx = PublicInbox::InboxWritable->new($ibx, {nproc=>1}); my $cfgfile = "$tmpdir/config"; local $ENV{PI_CONFIG} = $cfgfile; my $im = $ibx->importer(0); my $file = 't/data/0001.patch'; my $mime = eml_load($file); my $mid = mid_clean($mime->header('Message-Id')); ok($im->add($mime), 'add message to be edited'); $im->done; my ($in, $out, $err, $cmd, $cur, $t); my $git = PublicInbox::Git->new("$ibx->{inboxdir}/git/0.git"); my $opt = { 0 => \$in, 1 => \$out, 2 => \$err }; $t = '-F FILE'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/boolean prefix/bool pfx/'"; $cmd = [ '-edit', "-F$file", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t edit OK"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->header('Subject'), qr/bool pfx/, "$t message edited"); like($out, qr/[a-f0-9]{40,}/, "$t shows commit on success"); } $t = '-m MESSAGE_ID'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t edit OK"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->header('Subject'), qr/boolean prefix/, "$t message edited"); like($out, qr/[a-f0-9]{40,}/, "$t shows commit on success"); } $t = 'no-op -m MESSAGE_ID'; { $in = $out = $err = ''; my $before = $git->qx(qw(rev-parse HEAD)); local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds"); my $prev = $cur; $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); is_deeply($cur, $prev, "$t makes no change"); like($cur->header('Subject'), qr/boolean prefix/, "$t does not change message"); like($out, qr/NONE/, 'noop shows NONE'); my $after = $git->qx(qw(rev-parse HEAD)); is($after, $before, 'git head unchanged'); } $t = 'no-op -m MESSAGE_ID w/Status: header'; { # because mutt does it $in = $out = $err = ''; my $before = $git->qx(qw(rev-parse HEAD)); local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^Subject:.*/Status: RO\\n\$&/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds"); my $prev = $cur; $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); is_deeply($cur, $prev, "$t makes no change"); like($cur->header('Subject'), qr/boolean prefix/, "$t does not change message"); is($cur->header('Status'), undef, 'Status header not added'); like($out, qr/NONE/, 'noop shows NONE'); my $after = $git->qx(qw(rev-parse HEAD)); is($after, $before, 'git head unchanged'); } $t = '-m MESSAGE_ID can change Received: headers'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^Subject:.*/Received: x\\n\$&/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->header('Subject'), qr/boolean prefix/, "$t does not change Subject"); is($cur->header('Received'), 'x', 'added Received header'); } $t = '-m miss'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/boolean/FAIL/'"; $cmd = [ '-edit', "-m$mid-miss", $inboxdir ]; ok(!run_script($cmd, undef, $opt), "$t fails on invalid MID"); like($err, qr/No message found/, "$t shows error"); } $t = 'non-interactive editor failure'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 'END { exit 1 }'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(!run_script($cmd, undef, $opt), "$t detected"); like($err, qr/END \{ exit 1 \}' failed:/, "$t shows error"); } $t = 'mailEditor set in config'; { $in = $out = $err = ''; my $rc = xsys(qw(git config), "--file=$cfgfile", 'publicinbox.maileditor', "$^X -i -p -e 's/boolean prefix/bool pfx/'"); is($rc, 0, 'set publicinbox.mailEditor'); local $ENV{MAIL_EDITOR}; delete $ENV{MAIL_EDITOR}; local $ENV{GIT_EDITOR} = 'echo should not run'; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t edited message"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->header('Subject'), qr/bool pfx/, "$t message edited"); unlike($out, qr/should not run/, 'did not run GIT_EDITOR'); } $t = '--raw and mbox escaping'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^\$/\\nFrom not mbox\\n/'"; $cmd = [ '-edit', "-m$mid", '--raw', $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->body, qr/^From not mbox/sm, 'put "From " line into body'); local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^>From not/\$& an/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds with mbox escaping"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); like($cur->body, qr/^From not an mbox/sm, 'changed "From " line unescaped'); local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^From not an mbox\\n//s'"; $cmd = [ '-edit', "-m$mid", '--raw', $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds again"); $cur = PublicInbox::Eml->new($ibx->msg_by_mid($mid)); unlike($cur->body, qr/^From not an mbox/sm, "$t restored body"); } $t = 'reuse Message-ID'; { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; ok($im->add($mime), "$t and re-add"); $im->done; like($warn[0], qr/reused for mismatched content/, "$t got warning"); } $t = 'edit ambiguous Message-ID with -m'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/bool pfx/boolean prefix/'"; $cmd = [ '-edit', "-m$mid", $inboxdir ]; ok(!run_script($cmd, undef, $opt), "$t fails w/o --force"); like($err, qr/Multiple messages with different content found matching/, "$t shows matches"); like($err, qr/GIT_DIR=.*git show/is, "$t shows git commands"); } $t .= ' and --force'; { $in = $out = $err = ''; local $ENV{MAIL_EDITOR} = "$^X -i -p -e 's/^Subject:.*/Subject:x/i'"; $cmd = [ '-edit', "-m$mid", '--force', $inboxdir ]; ok(run_script($cmd, undef, $opt), "$t succeeds"); like($err, qr/Will edit all of them/, "$t notes all will be edited"); my @dump = $git->qx(qw(cat-file --batch --batch-all-objects)); chomp @dump; is_deeply([grep(/^Subject:/i, @dump)], [qw(Subject:x Subject:x)], "$t edited both messages"); } done_testing(); public-inbox-1.6.1/t/emergency.t000066400000000000000000000033511377346120300165300ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); use_ok 'PublicInbox::Emergency'; { my $md = "$tmpdir/a"; my $em = PublicInbox::Emergency->new($md); ok(-d $md, 'Maildir a auto-created'); my @tmp = <$md/tmp/*>; is(scalar @tmp, 0, 'no temporary files exist, yet'); $em->prepare(\"BLAH"); @tmp = <$md/tmp/*>; is(scalar @tmp, 1, 'globbed one temporary file'); open my $fh, '<', $tmp[0] or die "failed to open: $!"; is("BLAH", <$fh>, 'wrote contents to temporary location'); my @new = <$md/new/*>; is(scalar @new, 0, 'no new files exist, yet'); $em = undef; @tmp = <$md/tmp/*>; is(scalar @tmp, 0, 'temporary file no longer exists'); @new = <$md/new/*>; is(scalar @new, 1, 'globbed one new file'); open $fh, '<', $new[0] or die "failed to open: $!"; is("BLAH", <$fh>, 'wrote contents to new location'); } { my $md = "$tmpdir/b"; my $em = PublicInbox::Emergency->new($md); ok(-d $md, 'Maildir b auto-created'); my @tmp = <$md/tmp/*>; is(scalar @tmp, 0, 'no temporary files exist, yet'); $em->prepare(\"BLAH"); @tmp = <$md/tmp/*>; is(scalar @tmp, 1, 'globbed one temporary file'); open my $fh, '<', $tmp[0] or die "failed to open: $!"; is("BLAH", <$fh>, 'wrote contents to temporary location'); my @new = <$md/new/*>; is(scalar @new, 0, 'no new files exist, yet'); is(sysread($em->fh, my $buf, 9), 4, 'read file handle exposed'); is($buf, 'BLAH', 'got expected data'); $em->abort; @tmp = <$md/tmp/*>; is(scalar @tmp, 0, 'temporary file no longer exists'); @new = <$md/new/*>; is(scalar @new , 0, 'new file no longer exists'); } done_testing(); public-inbox-1.6.1/t/eml.t000066400000000000000000000322741377346120300153350ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::MsgIter qw(msg_part_text); my @classes = qw(PublicInbox::Eml); SKIP: { require_mods('Email::MIME', 1); # TODO: Email::MIME behavior is not consistent in newer versions # we need to evaluate and possibly adjust our behavior to decide # between DWIM-ness with historical mail... push @classes, 'PublicInbox::MIME'; }; use_ok $_ for @classes; sub mime_load ($) { my ($path) = @_; open(my $fh, '<', $path) or die "open $path: $!"; PublicInbox::MIME->new(\(do { local $/; <$fh> })); } { my $eml = PublicInbox::Eml->new(\(my $str = "a: b\n\nhi\n")); is($str, "hi\n", '->new modified body like Email::Simple'); is($eml->body, "hi\n", '->body works'); is($eml->as_string, "a: b\n\nhi\n", '->as_string'); } for my $cls (@classes) { my $mime = $cls->new(my $orig = "From: x\n\nb"); is($mime->as_string, $orig, '->as_string works'); is($mime->header_obj->as_string, "From: x\n", 'header ->as_string works'); # headers is($mime->header_raw('From'), 'x', 'header_raw scalar context'); $mime = $cls->new("R:\n\tx\nR:\n 1\n"); is_deeply([$mime->header_raw('r')], [ 'x', '1' ], 'multi-value'); $mime = $cls->new("R:x\nR: 1\n"); is_deeply([$mime->header_raw('r')], [ 'x', '1' ], 'multi-value header'); $mime = $cls->new("R:x\n R: 1\nR:\n f\n"); is_deeply([$mime->header_raw('r')], [ 'x R: 1', 'f' ], 'multi-line, multi-value header'); $mime->header_set('r'); is_deeply([$mime->header_raw('r')], [], 'header_set clears'); $mime->header_set('r'); is_deeply([$mime->header_raw('r')], [], 'header_set clears idempotent'); $mime->header_set('r', 'h'); is_deeply([$mime->header_raw('r')], ['h'], 'header_set'); $mime->header_set('r', 'h', 'i'); is_deeply([$mime->header_raw('r')], ['h', 'i'], 'header_set ary'); $mime->header_set('rr', 'b'); is_deeply([$mime->header_raw('r')], ['h', 'i'], "header_set `rr' did not clobber `r'"); is($mime->header_raw('rr'), 'b', 'got set scalar'); $mime->header_set('rr', 'b'x100); is($mime->header_raw('rr'), 'b'x100, 'got long set scalar'); if ($cls eq 'PublicInbox::Eml') { like($mime->as_string, qr/^rr: b{100}\n(?:\n|\z)/sm, 'single token not wrapped'); } $mime->header_set('rr', ('b'x100) . ' wrap me'); if ($cls eq 'PublicInbox::Eml') { like($mime->as_string, qr/^rr: b{100}\n\twrap me\n/sm, 'wrapped after long token'); } my $exp = "pre\tformatted\n with\n breaks"; $mime->header_set('r', $exp); like($mime->as_string, qr/^r: \Q$exp\E/sm, 'preformatted preserved'); } # for @classes for my $cls (@classes) { # make sure we don't add quotes if not needed my $eml = $cls->new("From: John Smith \n\n"); is($eml->header('From'), 'John Smith ', "name not unnecessarily quoted $cls"); } for my $cls (@classes) { my $eml = $cls->new("Subject: foo\n\n"); $eml->header_str_set('Subject', "\x{100}"); like($eml->header_raw('Subject'), qr/utf-8\?B\?/i, 'MIME-B encoded UTF-8 Subject'); is_deeply([$eml->header('Subject')], [ "\x{100}" ], 'got wide character back'); } # linux-mips apparently got some messages injected w/o Message-ID # and long Subject: lines w/o leading whitespace. # What appears in the blobs was generated by V2Writable. for my $cls (@classes) { my $eml = $cls->new(<<'EOF'); Message-ID: <20101130193431@z> Subject: something really long and really wrong From: linux-mips archive injection Object-Id: 8c56b7abdd551b1264e6522ededbbed9890cccd0 EOF is_deeply([ $eml->header('Subject') ], [ 'something really long and really wrong' ], 'continued long line w/o leading spaces '.$cls); is_deeply([ $eml->header('From') ], [ 'linux-mips archive injection' ], 'subsequent line not corrupted'); is_deeply([ $eml->header('Message-ID') ], ['<20101130193431@z>'], 'preceding line readable'); } # for @classes { my $eml = eml_load 't/msg_iter-order.eml'; my @parts; my $orig = $eml->as_string; $eml->each_part(sub { my ($part, $level, @ex) = @{$_[0]}; my $s = $part->body_str; $s =~ s/\s+//sg; push @parts, [ $s, $level, @ex ]; }); is_deeply(\@parts, [ [ qw(a 1 1) ], [ qw(b 1 2) ] ], 'order is fine'); is($eml->as_string, $orig, 'unchanged by ->each_part'); $eml->each_part(sub {}, undef, 1); is(defined($eml) ? $eml->body_raw : '', # old msg_iter clobbers $eml '', 'each_part can clobber body'); } if ('descend into message/rfc822') { my $eml = eml_load 't/data/message_embed.eml'; my @parts; $eml->each_part(sub { my ($part, $level, @ex) = @{$_[0]}; push @parts, [ $part, $level, @ex ]; }); is(scalar(@parts), 6, 'got all parts'); like($parts[0]->[0]->body, qr/^testing embedded message harder\n/sm, 'first part found'); is_deeply([ @{$parts[0]}[1..2] ], [ 1, '1' ], 'got expected depth and level for part #0'); is($parts[1]->[0]->filename, 'embed2x.eml', 'attachment filename found'); is_deeply([ @{$parts[1]}[1..2] ], [ 1, '2' ], 'got expected depth and level for part #1'); is_deeply([ @{$parts[2]}[1..2] ], [ 2, '2.1' ], 'got expected depth and level for part #2'); is_deeply([ @{$parts[3]}[1..2] ], [ 3, '2.1.1' ], 'got expected depth and level for part #3'); is_deeply([ @{$parts[4]}[1..2] ], [ 3, '2.1.2' ], 'got expected depth and level for part #4'); is($parts[4]->[0]->filename, 'test.eml', 'another attachment filename found'); is_deeply([ @{$parts[5]}[1..2] ], [ 4, '2.1.2.1' ], 'got expected depth and level for part #5'); } # body-less, boundary-less for my $cls (@classes) { my $call = 0; $cls->new(<<'EOF')->each_part(sub { $call++ }, 0, 1); Content-Type: multipart/mixed; boundary="body-less" EOF is($call, 1, 'called on bodyless multipart'); my @tmp; $cls->new(<<'EOF')->each_part(sub { push @tmp, \@_; }, 0, 1); Content-Type: multipart/mixed; boundary="boundary-less" hello world EOF is(scalar(@tmp), 1, 'got one part even w/o boundary'); is($tmp[0]->[0]->[0]->body, "hello world\n", 'body preserved'); is($tmp[0]->[0]->[1], 0, '$depth is zero'); is($tmp[0]->[0]->[2], 1, '@idx is one'); } # I guess the following only worked in PI::M because of a happy accident # involving inheritance: for my $cls (@classes) { my @tmp; my $header_less = <<'EOF'; Archived-At: <85k5su9k59.fsf_-_@lola.goethe.zz> Content-Type: multipart/mixed; boundary="header-less" --header-less this is the body --header-less i-haz: header something else --header-less-- EOF my $expect = "this is the body\n"; $cls->new($header_less)->each_part(sub { push @tmp, \@_ }, 0, 1); my $body = $tmp[0]->[0]->[0]->body; if ($cls eq 'PublicInbox::Eml') { is($body, $expect, 'body-only subpart in '.$cls); } elsif ($body ne $expect) { diag "W: $cls `$body' != `$expect'"; } is($tmp[1]->[0]->[0]->body, "something else\n"); is(scalar(@tmp), 2, 'two parts'); } if ('one newline before headers') { my $eml = PublicInbox::Eml->new("\nNewline: no Header \n"); my @v = $eml->header_raw('Newline'); is_deeply(\@v, ['no Header'], 'no header'); is($eml->crlf, "\n", 'got CRLF as "\n"'); is($eml->body, ""); } for my $cls (@classes) { # XXX: matching E::M, but not sure about this my $s = <new(\$s); my $nr = 0; my @v; $eml->each_part(sub { @v = $_[0]->[0]->header_raw('Header'); $nr++; }); is($nr, 1, 'only one part'); is_deeply(\@v, [], "nothing w/o body $cls"); } for my $cls (@classes) { my $s = <new(\$s); my $nr = 0; $eml->each_part(sub { my $part = $_[0]->[0]; is_deeply([$part->header_raw('should')], ['appear'], 'only got one header'); is($part->body, "yes\n", 'got expected body'); $nr++; }); is($nr, 1, 'only one part'); } for my $cls (@classes) { SKIP: { skip 'newer Email::MIME behavior inconsistent', 1 if $cls eq 'PublicInbox::MIME'; my $s = <new(\$s); my ($str, $err) = msg_part_text($eml, $eml->content_type); is($str, "\x{100}\n", "got wide character by assuming utf-8 ($cls)"); } # SKIP } if ('we differ from Email::MIME with final "\n" on missing epilogue') { my $s = <new(\$s); is(($eml->subparts)[-1]->body, "no epilogue\n", 'final "\n" preserved on missing epilogue'); } if ('header_size_limit stolen from postfix') { local $PublicInbox::Eml::header_size_limit = 4; my @w; local $SIG{__WARN__} = sub { push @w, @_ }; my $eml = PublicInbox::Eml->new("a:b\na:d\n\nzz"); is_deeply([$eml->header('a')], ['b'], 'no overrun header'); is($eml->body_raw, 'zz', 'body not damaged'); is($eml->header_obj->as_string, "a:b\n", 'header truncated'); is(grep(/truncated/, @w), 1, 'truncation warned'); $eml = PublicInbox::Eml->new("a:b\na:d\n"); is_deeply([$eml->header('a')], ['b'], 'no overrun header w/o body'); local $PublicInbox::Eml::header_size_limit = 5; $eml = PublicInbox::Eml->new("a:b\r\na:d\r\n\nzz"); is_deeply([$eml->header('a')], ['b'], 'no overrun header on CRLF'); is($eml->body_raw, 'zz', 'body not damaged'); @w = (); $eml = PublicInbox::Eml->new("too:long\n"); $eml = PublicInbox::Eml->new("too:long\n\n"); $eml = PublicInbox::Eml->new("too:long\r\n\r\n"); is(grep(/ignored/, @w), 3, 'ignored header warned'); } if ('maxparts is a feature unique to us') { my $eml = eml_load 't/psgi_attach.eml'; my @orig; $eml->each_part(sub { push @orig, $_[0]->[0] }); local $PublicInbox::Eml::mime_parts_limit = scalar(@orig); my $i = 0; $eml->each_part(sub { my $cur = $_[0]->[0]; my $prv = $orig[$i++]; is($cur->body_raw, $prv->body_raw, "part #$i matches"); }); is($i, scalar(@orig), 'maxparts honored'); $PublicInbox::Eml::mime_parts_limit--; my @ltd; $eml->each_part(sub { push @ltd, $_[0]->[0] }); for ($i = 0; $i <= $#ltd; $i++) { is($ltd[$i]->body_raw, $orig[$i]->body_raw, "part[$i] matches"); } is(scalar(@ltd), scalar(@orig) - 1, 'maxparts honored'); } SKIP: { require_mods('PublicInbox::MIME', 1); my $eml = eml_load 't/utf8.eml'; my $mime = mime_load 't/utf8.eml'; for my $h (qw(Subject From To)) { my $v = $eml->header($h); my $m = $mime->header($h); is($v, $m, "decoded -8 $h matches Email::MIME"); ok(utf8::is_utf8($v), "$h is UTF-8"); ok(utf8::valid($v), "UTF-8 valid $h"); } my $s = $eml->body_str; ok(utf8::is_utf8($s), 'body_str is UTF-8'); ok(utf8::valid($s), 'UTF-8 valid body_str'); my $ref = \(my $x = 'ref'); for my $msg ($eml, $mime) { $msg->body_str_set($s .= "\nHI\n"); ok(!utf8::is_utf8($msg->body_raw), 'raw octets after body_str_set'); $s = $msg->body_str; ok(utf8::is_utf8($s), 'body_str is UTF-8 after set'); ok(utf8::valid($s), 'UTF-8 valid body_str after set'); $msg->body_set($ref); is($msg->body_raw, $$ref, 'body_set worked on scalar ref'); $msg->body_set($$ref); is($msg->body_raw, $$ref, 'body_set worked on scalar'); } $eml = eml_load 't/iso-2202-jp.eml'; $mime = mime_load 't/iso-2202-jp.eml'; $s = $eml->body_str; is($s, $mime->body_str, 'ISO-2202-JP body_str'); ok(utf8::is_utf8($s), 'ISO-2202-JP => UTF-8 body_str'); ok(utf8::valid($s), 'UTF-8 valid body_str'); $eml = eml_load 't/psgi_attach.eml'; $mime = mime_load 't/psgi_attach.eml'; is_deeply([ map { $_->body_raw } $eml->subparts ], [ map { $_->body_raw } $mime->subparts ], 'raw ->subparts match deeply'); is_deeply([ map { $_->body } $eml->subparts ], [ map { $_->body } $mime->subparts ], '->subparts match deeply'); for my $msg ($eml, $mime) { my @old = $msg->subparts; $msg->parts_set([]); is_deeply([$msg->subparts], [], 'parts_set can clear'); $msg->parts_set([$old[-1]]); is(scalar $msg->subparts, 1, 'only last remains'); } # some versions of Email::MIME or Email::MIME::* will drop # unnecessary ", while PublicInbox::Eml will preserve the original my $exp = $mime->as_string; $exp =~ s/; boundary=b\b/; boundary="b"/; is($eml->as_string, $exp, 'as_string matches after parts_set'); } for my $cls (@classes) { my $s = <<'EOF'; Content-Type: text/x-patch; name="=?utf-8?q?vtpm-fakefile.patch?=" Content-Disposition: attachment; filename="=?utf-8?q?vtpm-makefile.patch?=" EOF is($cls->new($s)->filename, 'vtpm-makefile.patch', 'filename decoded'); $s =~ s/^Content-Disposition:.*$//sm; is($cls->new($s)->filename, 'vtpm-fakefile.patch', "filename fallback ($cls)") if $cls ne 'PublicInbox::MIME'; is($cls->new($s)->content_type, 'text/x-patch; name="vtpm-fakefile.patch"', 'matches Email::MIME output, "correct" or not'); $s = <<'EOF'; Content-Type: multipart/foo; boundary=b --b Content-Disposition: attachment; filename="=?utf-8?q?vtpm-makefile.patch?=" a --b Content-Type: text/x-patch; name="=?utf-8?q?vtpm-fakefile.patch?=" b --b-- EOF SKIP: { skip 'newer Email::MIME is inconsistent here', 1 if $cls eq 'PublicInbox::MIME'; my @x; $cls->new($s)->each_part(sub { push @x, $_[0]->[0]->filename }); is_deeply(['vtpm-makefile.patch', 'vtpm-fakefile.patch'], \@x, "got filename for both attachments ($cls)"); } } done_testing; public-inbox-1.6.1/t/eml_content_disposition.t000066400000000000000000000055441377346120300215130ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES # This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # License: GPL-1.0+ or Artistic-1.0-Perl # # use strict; use Test::More; use PublicInbox::EmlContentFoo qw(parse_content_disposition); my %cd_tests = ( '' => { type => 'attachment', attributes => {} }, 'inline' => { type => 'inline', attributes => {} }, 'attachment' => { type => 'attachment', attributes => {} }, 'attachment; filename=genome.jpeg;' . ' modification-date="Wed, 12 Feb 1997 16:29:51 -0500"' => { type => 'attachment', attributes => { filename => 'genome.jpeg', 'modification-date' => 'Wed, 12 Feb 1997 16:29:51 -0500' } }, q(attachment; filename*=UTF-8''genome.jpeg;) . q( modification-date="Wed, 12 Feb 1997 16:29:51 -0500") => { type => 'attachment', attributes => { filename => 'genome.jpeg', 'modification-date' => 'Wed, 12 Feb 1997 16:29:51 -0500' } }, q(attachment; filename*0*=us-ascii'en'This%20is%20even%20more%20;) . q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { type => 'attachment', attributes => { filename => "This is even more ***fun*** isn't it!" } }, q(attachment; filename*0*='en'This%20is%20even%20more%20;) . q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { type => 'attachment', attributes => { filename => "This is even more ***fun*** isn't it!" } }, q(attachment; filename*0*=''This%20is%20even%20more%20;) . q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { type => 'attachment', attributes => { filename => "This is even more ***fun*** isn't it!" } }, q(attachment; filename*0*=us-ascii''This%20is%20even%20more%20;). q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { type => 'attachment', attributes => { filename => "This is even more ***fun*** isn't it!" } }, ); my %non_strict_cd_tests = ( 'attachment; filename=genome.jpeg;' . ' modification-date="Wed, 12 Feb 1997 16:29:51 -0500";' => { type => 'attachment', attributes => { filename => 'genome.jpeg', 'modification-date' => 'Wed, 12 Feb 1997 16:29:51 -0500' } }, ); sub test { my ($string, $expect, $info) = @_; local $_; $info =~ s/\r/\\r/g; $info =~ s/\n/\\n/g; is_deeply(parse_content_disposition($string), $expect, $info); } for (sort keys %cd_tests) { test($_, $cd_tests{$_}, "Can parse C-D <$_>"); } local $PublicInbox::EmlContentFoo::STRICT_PARAMS = 0; for (sort keys %cd_tests) { test($_, $cd_tests{$_}, "Can parse non-strict C-D <$_>"); } for (sort keys %non_strict_cd_tests) { test($_, $non_strict_cd_tests{$_}, "Can parse non-strict C-D <$_>"); } done_testing; public-inbox-1.6.1/t/eml_content_type.t000066400000000000000000000166641377346120300201350ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES # This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # License: GPL-1.0+ or Artistic-1.0-Perl # # use strict; use Test::More; use PublicInbox::EmlContentFoo qw(parse_content_type); my %ct_tests = ( '' => { type => "text", subtype => "plain", attributes => { charset => "us-ascii" } }, "text/plain" => { type => "text", subtype => "plain", attributes => {} }, 'text/plain; charset=us-ascii' => { type => "text", subtype => "plain", attributes => { charset => "us-ascii" } }, 'text/plain; charset="us-ascii"' => { type => "text", subtype => "plain", attributes => { charset => "us-ascii" } }, "text/plain; charset=us-ascii (Plain text)" => { type => "text", subtype => "plain", attributes => { charset => "us-ascii" } }, 'text/plain; charset=ISO-8859-1' => { type => "text", subtype => "plain", attributes => { charset => "ISO-8859-1" } }, 'text/plain; charset="ISO-8859-1"' => { type => "text", subtype => "plain", attributes => { charset => "ISO-8859-1" } }, 'text/plain; charset="ISO-8859-1" (comment)' => { type => "text", subtype => "plain", attributes => { charset => "ISO-8859-1" } }, '(c) text/plain (c); (c) charset=ISO-8859-1 (c)' => { type => "text", subtype => "plain", attributes => { charset => "ISO-8859-1" } }, '(c \( \\\\) (c) text/plain (c) (c) ; (c) (c) charset=utf-8 (c)' => { type => "text", subtype => "plain", attributes => { charset => "utf-8" } }, 'text/plain; (c (nested ()c)another c)() charset=ISO-8859-1' => { type => "text", subtype => "plain", attributes => { charset => "ISO-8859-1" } }, 'text/plain (c \(!nested ()c\)\)(nested\(c())); charset=utf-8' => { type => "text", subtype => "plain", attributes => { charset => "utf-8" } }, "application/foo" => { type => "application", subtype => "foo", attributes => {} }, "multipart/mixed; boundary=unique-boundary-1" => { type => "multipart", subtype => "mixed", attributes => { boundary => "unique-boundary-1" } }, 'message/external-body; access-type=local-file; name="/u/n/m.jpg"' => { type => "message", subtype => "external-body", attributes => { "access-type" => "local-file", "name" => "/u/n/m.jpg" } }, 'multipart/mixed; boundary="----------=_1026452699-10321-0" ' => { 'type' => 'multipart', 'subtype' => 'mixed', 'attributes' => { 'boundary' => '----------=_1026452699-10321-0' } }, 'multipart/report; boundary= "=_0=73e476c3-cd5a-5ba3-b910-2="' => { 'type' => 'multipart', 'subtype' => 'report', 'attributes' => { 'boundary' => '=_0=73e476c3-cd5a-5ba3-b910-2=' } }, 'multipart/report; boundary=' . " \t" . '"=_0=7-c-5-b-2="' => { 'type' => 'multipart', 'subtype' => 'report', 'attributes' => { 'boundary' => '=_0=7-c-5-b-2=' } }, 'message/external-body; access-type=URL;' . ' URL*0="ftp://";' . ' URL*1="example.com/"' => { 'type' => 'message', 'subtype' => 'external-body', 'attributes' => { 'access-type' => 'URL', 'url' => 'ftp://example.com/' } }, 'message/external-body; access-type=URL; URL="ftp://example.com/"' => { 'type' => 'message', 'subtype' => 'external-body', 'attributes' => { 'access-type' => 'URL', 'url' => 'ftp://example.com/', } }, "application/x-stuff; title*=us-ascii'en-us'This%20is%20f%2Ad" => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => 'This is f*d' } }, "application/x-stuff; title*=us-ascii''This%20is%20f%2Ad" => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => 'This is f*d' } }, "application/x-stuff; title*=''This%20is%20f%2Ad" => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => 'This is f*d' } }, "application/x-stuff; title*='en-us'This%20is%20f%2Ad" => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => 'This is f*d' } }, q(application/x-stuff;) . q( title*0*=us-ascii'en'This%20is%20even%20more%20;) . q(title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => "This is even more ***fun*** isn't it!" } }, q(application/x-stuff;) . q( title*0*='en'This%20is%20even%20more%20;) . q( title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => "This is even more ***fun*** isn't it!" } }, q(application/x-stuff;) . q( title*0*=''This%20is%20even%20more%20;) . q( title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => "This is even more ***fun*** isn't it!" } }, q(application/x-stuff;). q( title*0*=us-ascii''This%20is%20even%20more%20;). q( title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") => { 'type' => 'application', 'subtype' => 'x-stuff', 'attributes' => { 'title' => "This is even more ***fun*** isn't it!" } }, 'text/plain; attribute="v\"v\\\\v\(v\>\<\)\@\,\;\:\/\]\[\?\=v v";' . ' charset=us-ascii' => { 'type' => 'text', 'subtype' => 'plain', 'attributes' => { 'attribute' => 'v"v\\v(v><)@,;:/][?=v v', 'charset' => 'us-ascii', }, }, qq(text/plain;\r charset=us-ascii;\r attribute="\r value1 \r value2\r\n value3\r\n value4\r\n "\r\n ) => { 'type' => 'text', 'subtype' => 'plain', 'attributes' => { 'attribute' => ' value1 value2 value3 value4 ', 'charset' => 'us-ascii', }, }, ); my %non_strict_ct_tests = ( "text/plain;" => { type => "text", subtype => "plain", attributes => {} }, "text/plain; " => { type => "text", subtype => "plain", attributes => {} }, 'image/jpeg;' . ' x-mac-type="3F3F3F3F";'. ' x-mac-creator="3F3F3F3F" name="file name.jpg";' => { type => "image", subtype => "jpeg", attributes => { 'x-mac-type' => "3F3F3F3F", 'x-mac-creator' => "3F3F3F3F", 'name' => "file name.jpg" } }, "text/plain; key=very long value" => { type => "text", subtype => "plain", attributes => { key => "very long value" } }, "text/plain; key=very long value key2=value2" => { type => "text", subtype => "plain", attributes => { key => "very long value", key2 => "value2" } }, 'multipart/mixed; boundary = "--=_Next_Part_24_Nov_2016_08.09.21"' => { type => "multipart", subtype => "mixed", attributes => { boundary => "--=_Next_Part_24_Nov_2016_08.09.21" } }, ); sub test { my ($string, $expect, $info) = @_; local $_; $info =~ s/\r/\\r/g; $info =~ s/\n/\\n/g; is_deeply(parse_content_type($string), $expect, $info); } for (sort keys %ct_tests) { test($_, $ct_tests{$_}, "Can parse C-T <$_>"); } local $PublicInbox::EmlContentFoo::STRICT_PARAMS = 0; for (sort keys %ct_tests) { test($_, $ct_tests{$_}, "Can parse non-strict C-T <$_>"); } for (sort keys %non_strict_ct_tests) { test( $_, $non_strict_ct_tests{$_}, "Can parse non-strict C-T <$_>" ); } done_testing; public-inbox-1.6.1/t/epoll.t000066400000000000000000000013011377346120300156560ustar00rootroot00000000000000use strict; use Test::More; use IO::Handle; use PublicInbox::Syscall qw(:epoll); plan skip_all => 'not Linux' if $^O ne 'linux'; my $epfd = epoll_create(); ok($epfd >= 0, 'epoll_create'); my $hnd = IO::Handle->new_from_fd($epfd, 'r+'); # close on exit pipe(my ($r, $w)) or die "pipe: $!"; is(epoll_ctl($epfd, EPOLL_CTL_ADD, fileno($w), EPOLLOUT), 0, 'epoll_ctl socket EPOLLOUT'); my @events; is(epoll_wait($epfd, 100, 10000, \@events), 1, 'epoll_wait returns'); is(scalar(@events), 1, 'got one event'); is($events[0]->[0], fileno($w), 'got expected FD'); is($events[0]->[1], EPOLLOUT, 'got expected event'); close $w; is(epoll_wait($epfd, 100, 0, \@events), 0, 'epoll_wait timeout'); done_testing; public-inbox-1.6.1/t/fail-bin/000077500000000000000000000000001377346120300160445ustar00rootroot00000000000000public-inbox-1.6.1/t/fail-bin/spamc000077500000000000000000000000251377346120300170720ustar00rootroot00000000000000#!/bin/sh cat exit 1 public-inbox-1.6.1/t/fake_inotify.t000066400000000000000000000027261377346120300172260ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ # # Ensure FakeInotify can pick up rename(2) and link(2) operations # used by Maildir writing tools use strict; use Test::More; use PublicInbox::TestCommon; use_ok 'PublicInbox::FakeInotify'; my $MIN_FS_TICK = 0.011; # for low-res CONFIG_HZ=100 systems my ($tmpdir, $for_destroy) = tmpdir(); mkdir "$tmpdir/new" or BAIL_OUT "mkdir: $!"; open my $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!"; close $fh or BAIL_OUT "close: $!"; my $fi = PublicInbox::FakeInotify->new; my $mask = PublicInbox::FakeInotify::MOVED_TO_OR_CREATE(); my $w = $fi->watch("$tmpdir/new", $mask); select undef, undef, undef, $MIN_FS_TICK; rename("$tmpdir/tst", "$tmpdir/new/tst") or BAIL_OUT "rename: $!"; my @events = map { $_->fullname } $fi->read; is_deeply(\@events, ["$tmpdir/new/tst"], 'rename(2) detected'); select undef, undef, undef, $MIN_FS_TICK; open $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!"; close $fh or BAIL_OUT "close: $!"; link("$tmpdir/tst", "$tmpdir/new/link") or BAIL_OUT "link: $!"; @events = map { $_->fullname } $fi->read; is_deeply(\@events, ["$tmpdir/new/link"], 'link(2) detected'); $w->cancel; select undef, undef, undef, $MIN_FS_TICK; link("$tmpdir/new/tst", "$tmpdir/new/link2") or BAIL_OUT "link: $!"; @events = map { $_->fullname } $fi->read; is_deeply(\@events, [], 'link(2) not detected after cancel'); PublicInbox::DS->Reset; done_testing; public-inbox-1.6.1/t/feed.t000066400000000000000000000063611377346120300154610ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::Feed; use PublicInbox::Import; use PublicInbox::Inbox; my $have_xml_treepp = eval { require XML::TreePP; 1 }; use PublicInbox::TestCommon; sub string_feed { my $res = PublicInbox::Feed::generate($_[0]); my $body = $res->[2]; my $str = ''; while (defined(my $chunk = $body->getline)) { $str .= $chunk; } $body->close; $str; } my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/gittest"; my $ibx = PublicInbox::Inbox->new({ address => 'test@example', name => 'testbox', inboxdir => $git_dir, url => [ 'http://example.com/test' ], feedmax => 3, }); my $git = $ibx->git; my $im = PublicInbox::Import->new($git, $ibx->{name}, 'test@example'); { $im->init_bare; foreach my $i (1..6) { my $mime = PublicInbox::Eml->new(< To: U Message-Id: <$i\@example.com> Subject: zzz #$i Date: Thu, 01 Jan 1970 00:00:00 +0000 > This is a long multi line quote so it should not be allowed to > show up in its entirty in the Atom feed. drop me > I quote to much > I quote to much > I quote to much > I quote to much > I quote to much > I quote to much > I quote to much > I quote to much > I quote to much > I quote to much > I quote to much > I quote to much > I quote to much msg $i > inline me here > this is a short quote keep me EOF like($im->add($mime), qr/\A:\d+/, 'added'); } $im->done; } # spam check { # check initial feed { my $feed = string_feed({ -inbox => $ibx }); SKIP: { skip 'XML::TreePP missing', 3 unless $have_xml_treepp; my $t = XML::TreePP->new->parse($feed); like($t->{feed}->{-xmlns}, qr/\bAtom\b/, 'looks like an an Atom feed'); is(scalar @{$t->{feed}->{entry}}, 3, 'parsed three entries'); is($t->{feed}->{id}, 'mailto:test@example', 'id is set to default'); } like($feed, qr/drop me/, "long quoted text kept"); like($feed, qr/inline me here/, "short quoted text kept"); like($feed, qr/keep me/, "unquoted text saved"); } # add a new spam message my $spam; { $spam = PublicInbox::Eml->new(< To: U Message-Id: Subject: SPAM!!!!!!!! Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF $im->add($spam); $im->done; } # check spam shows up { my $spammy_feed = string_feed({ -inbox => $ibx }); SKIP: { skip 'XML::TreePP missing', 2 unless $have_xml_treepp; my $t = XML::TreePP->new->parse($spammy_feed); like($t->{feed}->{-xmlns}, qr/\bAtom\b/, 'looks like an an Atom feed'); is(scalar @{$t->{feed}->{entry}}, 3, 'parsed three entries'); } like($spammy_feed, qr/SPAM/s, "spam showed up :<"); } # nuke spam $im->remove($spam); $im->done; # spam no longer shows up { my $feed = string_feed({ -inbox => $ibx }); SKIP: { skip 'XML::TreePP missing', 2 unless $have_xml_treepp; my $t = XML::TreePP->new->parse($feed); like($t->{feed}->{-xmlns}, qr/\bAtom\b/, 'looks like an an Atom feed'); is(scalar @{$t->{feed}->{entry}}, 3, 'parsed three entries'); } unlike($feed, qr/SPAM/, "spam gone :>"); } } done_testing(); public-inbox-1.6.1/t/filter_base-junk.eml000066400000000000000000000004601377346120300203060ustar00rootroot00000000000000From: a@example.com Subject: blah Content-Type: multipart/mixed; boundary="b" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit --b Content-Type: application/vnd.ms-excel Content-Transfer-Encoding: base64 anVuaw== --b Content-Type: text/plain Content-Transfer-Encoding: quoted-printable junk= --b-- public-inbox-1.6.1/t/filter_base-xhtml.eml000066400000000000000000000005371377346120300205000ustar00rootroot00000000000000From: a@example.com Subject: blah Content-Type: multipart/alternative; boundary="b" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit --b Content-Type: text/xhtml; charset=UTF-8 Content-Transfer-Encoding: base64 PGh0bWw+PGJvZHk+aGk8L2JvZHk+PC9odG1sPg== --b Content-Type: text/plain Content-Transfer-Encoding: quoted-printable hi =3D "bye"= --b-- public-inbox-1.6.1/t/filter_base.t000066400000000000000000000017051377346120300170320ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use_ok 'PublicInbox::Filter::Base'; { my $f = PublicInbox::Filter::Base->new; ok($f, 'created stock object'); ok(defined $f->{reject_suffix}, 'rejected suffix redefined'); is(ref($f->{reject_suffix}), 'Regexp', 'reject_suffix should be a RE'); } { my $f = PublicInbox::Filter::Base->new(reject_suffix => undef); ok($f, 'created base object q/o reject_suffix'); ok(!defined $f->{reject_suffix}, 'reject_suffix not defined'); } { my $f = PublicInbox::Filter::Base->new; my $email = eml_load 't/filter_base-xhtml.eml'; is($f->delivery($email), 100, "xhtml rejected"); } { my $f = PublicInbox::Filter::Base->new; my $email = eml_load 't/filter_base-junk.eml'; is($f->delivery($email), 100, 'proprietary format rejected on glob'); } done_testing(); public-inbox-1.6.1/t/filter_mirror.t000066400000000000000000000007261377346120300174340ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use_ok 'PublicInbox::Filter::Mirror'; my $f = PublicInbox::Filter::Mirror->new; ok($f, 'created PublicInbox::Filter::Mirror object'); { my $email = eml_load 't/mda-mime.eml'; is($f->ACCEPT, $f->delivery($email), 'accept any trash that comes'); } done_testing(); public-inbox-1.6.1/t/filter_rubylang.t000066400000000000000000000033151377346120300177420ustar00rootroot00000000000000# Copyright (C) 2017-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; use_ok 'PublicInbox::Filter::RubyLang'; my $f = PublicInbox::Filter::RubyLang->new; ok($f, 'created PublicInbox::Filter::RubyLang object'); my $msg = <<'EOF'; Subject: test keep this Unsubscribe: EOF my $mime = PublicInbox::Eml->new($msg); my $ret = $f->delivery($mime); is($ret, $mime, "delivery successful"); is($mime->body, "keep this\n", 'normal message filtered OK'); SKIP: { require_mods('DBD::SQLite', 4); use_ok 'PublicInbox::Inbox'; my ($git_dir, $for_destroy) = tmpdir(); is(mkdir("$git_dir/public-inbox"), 1, "created public-inbox dir"); my $altid = [ # 'serial:ruby-core:file=msgmap.sqlite3' can be used here # for documentation purposes, but Xapian ignores everything # up to and including the '-' 'serial:core:file=msgmap.sqlite3' ]; my $ibx = PublicInbox::Inbox->new({ inboxdir => $git_dir, altid => $altid }); $f = PublicInbox::Filter::RubyLang->new(-inbox => $ibx); $msg = <<'EOF'; X-Mail-Count: 12 Message-ID: EOF $mime = PublicInbox::Eml->new($msg); $ret = $f->delivery($mime); is($ret, $mime, "delivery successful"); my $mm = PublicInbox::Msgmap->new($git_dir); is($mm->num_for('a@b'), 12, 'MM entry created based on X-ML-Count'); $msg = <<'EOF'; X-Mail-Cout: 12 Message-ID: EOF $mime = PublicInbox::Eml->new($msg); $ret = $f->delivery($mime); is($ret, 100, "delivery rejected without X-Mail-Count"); } done_testing(); public-inbox-1.6.1/t/filter_subjecttag.t000066400000000000000000000022011377346120300202430ustar00rootroot00000000000000# Copyright (C) 2017-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use_ok 'PublicInbox::Filter::SubjectTag'; my $f = eval { PublicInbox::Filter::SubjectTag->new }; like($@, qr/tag not defined/, 'error without args'); $f = PublicInbox::Filter::SubjectTag->new('-tag', '[foo]'); is(ref $f, 'PublicInbox::Filter::SubjectTag', 'new object created'); my $mime = PublicInbox::Eml->new(< Subject: =?UTF-8?B?UmU6IFtmb29dIEVsw4PCqWFub3I=?= EOF $mime = $f->delivery($mime); is($mime->header('Subject'), "Re: El\xc3\xa9anor", 'filtered with Re:'); $mime->header_str_set('Subject', '[FOO] bar'); $mime = $f->delivery($mime); is($mime->header('Subject'), 'bar', 'filtered non-reply'); $f = PublicInbox::Filter::SubjectTag->new(-tag => '[sox-devel]'); my $eml = PublicInbox::Eml->new(<delivery($eml); my $s = $eml->header('Subject'); utf8::encode($s); # to octets is($s, "Re: \xc5\xa1", 'subject filtered correctly'); done_testing(); public-inbox-1.6.1/t/filter_vger.t000066400000000000000000000022751377346120300170660ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use_ok 'PublicInbox::Filter::Vger'; my $f = PublicInbox::Filter::Vger->new; ok($f, 'created PublicInbox::Filter::Vger object'); { my $lkml = <<'EOF'; From: foo@example.com Subject: test keep this -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ EOF my $mime = PublicInbox::Eml->new($lkml); $mime = $f->delivery($mime); is("keep this\n", $mime->body, 'normal message filtered OK'); } { my $no_nl = <<'EOF'; From: foo@example.com Subject: test OSX users :P-- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html EOF my $mime = PublicInbox::Eml->new($no_nl); $mime = $f->delivery($mime); is('OSX users :P', $mime->body, 'missing trailing LF in original OK'); } done_testing(); public-inbox-1.6.1/t/git-http-backend.psgi000066400000000000000000000013131377346120300203720ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use PublicInbox::GitHTTPBackend; use PublicInbox::Git; use Plack::Builder; use BSD::Resource qw(getrusage); my $git_dir = $ENV{GIANT_GIT_DIR} or die 'GIANT_GIT_DIR not defined in env'; my $git = PublicInbox::Git->new($git_dir); builder { enable 'Head'; sub { my ($env) = @_; if ($env->{PATH_INFO} =~ m!\A/(.+)\z!s) { PublicInbox::GitHTTPBackend::serve($env, $git, $1); } else { my $ru = getrusage(); my $b = $ru->maxrss . "\n"; [ 200, [ qw(Content-Type text/plain Content-Length), length($b) ], [ $b ] ] } } } public-inbox-1.6.1/t/git.fast-import-data000066400000000000000000000026771377346120300202600ustar00rootroot00000000000000blob mark :1 data 6 hello reset refs/heads/header commit refs/heads/header mark :2 author AU Thor 0 +0000 committer AU Thor 0 +0000 data 8 initial M 100644 :1 foo.txt blob mark :3 data 12 hello world commit refs/heads/master mark :4 author AU Thor 0 +0000 committer AU Thor 0 +0000 data 7 second from :2 M 100644 :3 foo.txt blob mark :5 data 12 ----- hello commit refs/heads/header mark :6 author AU Thor 0 +0000 committer AU Thor 0 +0000 data 11 add header from :2 M 100644 :5 foo.txt blob mark :7 data 18 ----- hello world commit refs/heads/master mark :8 author AU Thor 0 +0000 committer AU Thor 0 +0000 data 46 Merge branch 'header' * header: add header from :4 merge :6 M 100644 :7 foo.txt blob mark :9 data 0 blob mark :10 data 16 dir/dur/der/derp commit refs/heads/master mark :11 author AU Thor 0 +0000 committer AU Thor 0 +0000 data 26 add symlink and deep file from :8 M 100644 :9 dir/dur/der/derp M 120000 :10 link blob mark :12 data 78 [submodule "git"] path = git url = git://git.kernel.org/pub/scm/git/git.git commit refs/heads/master mark :13 author AU Thor 0 +0000 committer AU Thor 749520000 +0000 data 18 add git submodule from :11 M 100644 :12 .gitmodules M 160000 f3adf457e046f92f039353762a78dcb3afb2cb13 git reset refs/heads/master from :13 public-inbox-1.6.1/t/git.t000066400000000000000000000125251377346120300153400ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; my ($dir, $for_destroy) = tmpdir(); use PublicInbox::Spawn qw(popen_rd); use PublicInbox::Import; use_ok 'PublicInbox::Git'; { PublicInbox::Import::init_bare($dir); my $fi_data = './t/git.fast-import-data'; open my $fh, '<', $fi_data or die "fast-import data readable (or run test at top level: $!"; my $rdr = { 0 => $fh }; xsys([qw(git fast-import --quiet)], { GIT_DIR => $dir }, $rdr); is($?, 0, 'fast-import succeeded'); } { my $gcf = PublicInbox::Git->new($dir); is($gcf->modified, 749520000, 'modified time detected from commit'); my $f = 'HEAD:foo.txt'; my @x = $gcf->check($f); is(scalar @x, 3, 'returned 3 element array for existing file'); like($x[0], qr/\A[a-f0-9]{40}\z/, 'returns obj ID in 1st element'); is('blob', $x[1], 'returns obj type in 2nd element'); like($x[2], qr/\A\d+\z/, 'returns obj size in 3rd element'); my $raw = $gcf->cat_file($f); is($x[2], length($$raw), 'length matches'); is(${$gcf->cat_file($f)}, $$raw, 'not broken after failures'); is(${$gcf->cat_file($f)}, $$raw, 'not broken after partial read'); my $oid = $x[0]; my $arg = { 'foo' => 'bar' }; my $res = []; my $missing = []; $gcf->cat_async($oid, sub { my ($bref, $oid_hex, $type, $size, $arg) = @_; $res = [ @_ ]; }, $arg); $gcf->cat_async('non-existent', sub { my ($bref, $oid_hex, $type, $size, $arg) = @_; $missing = [ @_ ]; }, $arg); $gcf->cat_async_wait; my ($bref, $oid_hex, $type, $size, $arg_res) = @$res; is_deeply([$oid_hex, $type, $size], \@x, 'got expected header'); is($arg_res, $arg, 'arg passed to cat_async'); is_deeply($raw, $bref, 'blob result matches'); is_deeply($missing, [ undef, 'non-existent', 'missing', undef, $arg], 'non-existent blob gives expected result'); } if (1) { # need a big file, use the AGPL-3.0 :p my $big_data = './COPYING'; ok(-r $big_data, 'COPYING readable'); my $size = -s $big_data; ok($size > 8192, 'file is big enough'); open my $fh, '<', $big_data or die; my $cmd = [ 'git', "--git-dir=$dir", qw(hash-object -w --stdin) ]; my $buf = xqx($cmd, { GIT_DIR => $dir }, { 0 => $fh }); is(0, $?, 'hashed object successfully'); chomp $buf; my $gcf = PublicInbox::Git->new($dir); my $rsize; my $x = $gcf->cat_file($buf, \$rsize); is($rsize, $size, 'got correct size ref on big file'); is(length($$x), $size, 'read correct number of bytes'); my $ref = $gcf->qx(qw(cat-file blob), $buf); my @ref = $gcf->qx(qw(cat-file blob), $buf); my $nl = scalar @ref; ok($nl > 1, "qx returned array length of $nl"); $gcf->qx(qw(repack -adq)); ok($gcf->packed_bytes > 0, 'packed size is positive'); } SKIP: { require_git(2.6, 7) or skip('need git 2.6+ for --batch-all-objects', 7); my ($alt, $alt_obj) = tmpdir(); my $hash_obj = [ 'git', "--git-dir=$alt", qw(hash-object -w --stdin) ]; PublicInbox::Import::init_bare($alt); open my $fh, '<', "$alt/config" or die "open failed: $!\n"; chomp(my $remote = xqx($hash_obj, undef, { 0 => $fh })); my $gcf = PublicInbox::Git->new($dir); is($gcf->cat_file($remote), undef, "remote file not found"); open $fh, '>>', "$dir/objects/info/alternates" or die "open failed: $!\n"; print $fh "$alt/objects\n" or die "print failed: $!\n"; close $fh or die "close failed: $!"; my $found = $gcf->cat_file($remote); open $fh, '<', "$alt/config" or die "open failed: $!\n"; my $config = eval { local $/; <$fh> }; is($$found, $config, 'alternates reloaded'); # with the async interface my ($async_alt, $async_dir_obj) = tmpdir(); PublicInbox::Import::init_bare($async_alt); my @exist = map { chomp; [ split / / ] } (xqx(['git', "--git-dir=$dir", qw(cat-file --batch-all-objects --batch-check)])); my $results = []; my $cb = sub { my ($bref, $oid, $type, $size) = @_; push @$results, [ $oid, $type, $size ]; }; for my $i (0..5) { $gcf->cat_async($exist[$i]->[0], $cb, $results); next if $i != 3; # stick a new alternate into a running async pipeline $hash_obj->[1] = "--git-dir=$async_alt"; $remote = xqx($hash_obj, undef, { 0 => \'async' }); chomp $remote; open $fh, '>>', "$dir/objects/info/alternates" or die "open failed: $!\n"; print $fh "$async_alt/objects\n" or die "print failed: $!\n"; close $fh or die "close failed: $!"; # trigger cat_async_retry: $gcf->cat_async($remote, $cb, $results); } $gcf->cat_async_wait; my $expect = [ @exist[0..3], [ $remote, 'blob', 5 ], @exist[4..5] ]; is_deeply($results, $expect, 'got expected results'); ok(!$gcf->cleanup, 'cleanup can expire'); ok(!$gcf->cleanup, 'cleanup idempotent'); my $t = $gcf->modified; ok($t <= time, 'repo not modified in the future'); isnt($t, 0, 'repo not modified in 1970') } use_ok 'PublicInbox::Git', qw(git_unquote git_quote); my $s; is("foo\nbar", git_unquote($s = '"foo\\nbar"'), 'unquoted newline'); is("Eléanor", git_unquote($s = '"El\\303\\251anor"'), 'unquoted octal'); is(git_unquote($s = '"I\"m"'), 'I"m', 'unquoted dq'); is(git_unquote($s = '"I\\m"'), 'I\\m', 'unquoted backslash'); is(git_quote($s = "Eléanor"), '"El\\303\\251anor"', 'quoted octal'); is(git_quote($s = "hello\"world"), '"hello\"world"', 'quoted dq'); is(git_quote($s = "hello\\world"), '"hello\\\\world"', 'quoted backslash'); is(git_quote($s = "hello\nworld"), '"hello\\nworld"', 'quoted LF'); done_testing(); public-inbox-1.6.1/t/gzip_filter.t000066400000000000000000000021421377346120300170650ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use IO::Handle (); # autoflush use Fcntl qw(SEEK_SET); use PublicInbox::TestCommon; require_mods(qw(Compress::Zlib IO::Uncompress::Gunzip)); require_ok 'PublicInbox::GzipFilter'; { open my $fh, '+>', undef or die "open: $!"; open my $dup, '>&', $fh or die "dup $!"; $dup->autoflush(1); my $filter = PublicInbox::GzipFilter->new->attach($dup); ok($filter->write("hello"), 'wrote something'); ok($filter->write("world"), 'wrote more'); $filter->close; seek($fh, 0, SEEK_SET) or die; IO::Uncompress::Gunzip::gunzip($fh => \(my $buf)); is($buf, 'helloworld', 'buffer matches'); } { pipe(my ($r, $w)) or die "pipe: $!"; $w->autoflush(1); close $r or die; my $filter = PublicInbox::GzipFilter->new->attach($w); my $sigpipe; local $SIG{PIPE} = sub { $sigpipe = 1 }; open my $fh, '<', 'COPYING' or die "open(COPYING): $!"; my $buf = do { local $/; <$fh> }; while ($filter->write($buf .= rand)) {} ok($sigpipe, 'got SIGPIPE'); close $w; } done_testing; public-inbox-1.6.1/t/hl_mod.t000066400000000000000000000040521377346120300160130ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Spawn qw(which); use PublicInbox::TestCommon; use IO::Handle; # ->autoflush use Fcntl qw(:seek); eval { require highlight } or plan skip_all => "failed to load highlight.pm for $0"; use_ok 'PublicInbox::HlMod'; my $hls = PublicInbox::HlMod->new; ok($hls, 'initialized OK'); is($hls->_shebang2lang(\"#!/usr/bin/perl -w\n"), 'perl', 'perl shebang OK'); is($hls->{-ext2lang}->{'pm'}, 'perl', '.pm suffix OK'); is($hls->{-ext2lang}->{'pl'}, 'perl', '.pl suffix OK'); is($hls->_path2lang('Makefile'), 'make', 'Makefile OK'); my $str = do { local $/; open(my $fh, __FILE__); <$fh> }; my $orig = $str; { my $ref = $hls->do_hl(\$str, 'foo.perl'); is(ref($ref), 'SCALAR', 'got a scalar reference back'); ok(utf8::valid($$ref), 'resulting string is utf8::valid'); like($$ref, qr/I can see you!/, 'we can see ourselves in output'); like($$ref, qr/&&/, 'escaped &&'); my $lref = $hls->do_hl_lang(\$str, 'perl'); is($$ref, $$lref, 'do_hl_lang matches do_hl'); SKIP: { my $w3m = which('w3m') or skip('w3m(1) missing to check output', 1); my $cmd = [ $w3m, qw(-T text/html -dump -config /dev/null) ]; my $in = '
    ' . $$ref . '
    '; my $out = xqx($cmd, undef, { 0 => \$in }); # expand tabs and normalize whitespace, # w3m doesn't preserve tabs $orig =~ s/\t/ /gs; $out =~ s/\s*\z//sg; $orig =~ s/\s*\z//sg; is($out, $orig, 'w3m output matches'); } } if ('experimental, only for help text') { my $tmp = <<'EOF'; :> ```perl my $foo = 1 & 2; ``` :< EOF $hls->do_hl_text(\$tmp); my @hl = split(/^/m, $tmp); is($hl[0], ":>\n", 'first line escaped'); is($hl[1], "```perl\n", '2nd line preserved'); like($hl[2], qr/ # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::Feed; use PublicInbox::Git; use PublicInbox::Import; use PublicInbox::Inbox; use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/gittest"; my $ibx = PublicInbox::Inbox->new({ address => 'test@example', name => 'tester', inboxdir => $git_dir, url => 'http://example.com/test', }); my $git = $ibx->git; my $im = PublicInbox::Import->new($git, 'tester', 'test@example'); # setup { $im->init_bare; my $prev = ""; foreach my $i (1..6) { my $mid = "<$i\@example.com>"; my $mid_line = "Message-ID: $mid"; if ($prev) { $mid_line .= "In-Reply-To: $prev"; } $prev = $mid; my $mime = PublicInbox::Eml->new(< To: U $mid_line Subject: zzz #$i Date: Thu, 01 Jan 1970 00:00:00 +0000 > This is a long multi line quote so it should not be allowed to > show up in its entirty in the Atom feed. drop me msg $i > inline me here, short quote keep me EOF like($im->add($mime), qr/\A:\d+\z/, 'inserted message'); } $im->done; } done_testing(); public-inbox-1.6.1/t/httpd-corner.psgi000066400000000000000000000066351377346120300176720ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # corner case tests for the generic PSGI server # Usage: plackup [OPTIONS] /path/to/this/file use strict; use warnings; use Plack::Builder; require Digest::SHA; my $app = sub { my ($env) = @_; my $path = $env->{PATH_INFO}; my $in = $env->{'psgi.input'}; my $actual = -s $in; my $code = 500; my $h = [ 'Content-Type' => 'text/plain' ]; my $body = []; if ($path eq '/sha1') { my $sha1 = Digest::SHA->new('SHA-1'); my $buf; while (1) { my $r = $in->read($buf, 4096); die "read err: $!" unless defined $r; last if $r == 0; $sha1->add($buf); } $code = 200; push @$body, $sha1->hexdigest; } elsif (my $fifo = $env->{HTTP_X_CHECK_FIFO}) { if ($path eq '/slow-header') { return sub { open my $f, '<', $fifo or die "open $fifo: $!\n"; local $/ = "\n"; my @r = <$f>; $_[0]->([200, $h, \@r ]); }; } elsif ($path eq '/slow-body') { return sub { my $fh = $_[0]->([200, $h]); open my $f, '<', $fifo or die "open $fifo: $!\n"; local $/ = "\n"; while (defined(my $l = <$f>)) { $fh->write($l); } $fh->close; }; } } elsif ($path eq '/host-port') { $code = 200; push @$body, "$env->{REMOTE_ADDR}:$env->{REMOTE_PORT}"; } elsif ($path eq '/callback') { return sub { my ($res) = @_; my $buf = "hello world\n"; push @$h, 'Content-Length', length($buf); my $fh = $res->([200, $h]); $fh->write($buf); $fh->close; } } elsif ($path eq '/empty') { $code = 200; } elsif ($path eq '/getline-die') { $code = 200; $body = Plack::Util::inline_object( getline => sub { die 'GETLINE FAIL' }, close => sub { die 'CLOSE FAIL' }, ); } elsif ($path eq '/close-die') { $code = 200; $body = Plack::Util::inline_object( getline => sub { undef }, close => sub { die 'CLOSE FAIL' }, ); } elsif ($path eq '/async-big') { require PublicInbox::Qspawn; open my $null, '>', '/dev/null' or die; my $rdr = { 2 => fileno($null) }; my $cmd = [qw(dd if=/dev/zero count=30 bs=1024k)]; my $qsp = PublicInbox::Qspawn->new($cmd, undef, $rdr); return $qsp->psgi_return($env, undef, sub { my ($r, $bref) = @_; # make $rd_hdr retry sysread + $parse_hdr in Qspawn: return until length($$bref) > 8000; close $null; [ 200, [ qw(Content-Type application/octet-stream) ]]; }); } elsif ($path eq '/psgi-return-gzip') { require PublicInbox::Qspawn; require PublicInbox::GzipFilter; my $cmd = [qw(echo hello world)]; my $qsp = PublicInbox::Qspawn->new($cmd); $env->{'qspawn.filter'} = PublicInbox::GzipFilter->new; return $qsp->psgi_return($env, undef, sub { [ 200, [ qw(Content-Type application/octet-stream)]] }); } elsif ($path eq '/psgi-return-compressible') { require PublicInbox::Qspawn; my $cmd = [qw(echo goodbye world)]; my $qsp = PublicInbox::Qspawn->new($cmd); return $qsp->psgi_return($env, undef, sub { [200, [qw(Content-Type text/plain)]] }); } elsif ($path eq '/psgi-return-enoent') { require PublicInbox::Qspawn; my $cmd = [ 'this-better-not-exist-in-PATH'.rand ]; my $qsp = PublicInbox::Qspawn->new($cmd); return $qsp->psgi_return($env, undef, sub { [ 200, [ qw(Content-Type application/octet-stream)]] }); } elsif ($path eq '/pid') { $code = 200; push @$body, "$$\n"; } [ $code, $h, $body ] }; builder { enable 'ContentLength'; enable 'Head'; $app; } public-inbox-1.6.1/t/httpd-corner.t000066400000000000000000000504251377346120300171670ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # note: our HTTP server should be standalone and capable of running # generic PSGI/Plack apps. use strict; use warnings; use Test::More; use Time::HiRes qw(gettimeofday tv_interval); use PublicInbox::Spawn qw(which spawn popen_rd); use PublicInbox::TestCommon; require_mods(qw(Plack::Util Plack::Builder HTTP::Date HTTP::Status)); use Digest::SHA qw(sha1_hex); use IO::Handle (); use IO::Socket; use IO::Socket::UNIX; use Fcntl qw(:seek); use Socket qw(IPPROTO_TCP TCP_NODELAY SOL_SOCKET); use POSIX qw(mkfifo); use Carp (); my ($tmpdir, $for_destroy) = tmpdir(); my $fifo = "$tmpdir/fifo"; ok(defined mkfifo($fifo, 0777), 'created FIFO'); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $psgi = "./t/httpd-corner.psgi"; my $sock = tcp_server() or die; my @zmods = qw(PublicInbox::GzipFilter IO::Uncompress::Gunzip); # Make sure we don't clobber socket options set by systemd or similar # using socket activation: my ($defer_accept_val, $accf_arg, $TCP_DEFER_ACCEPT); if ($^O eq 'linux') { $TCP_DEFER_ACCEPT = eval { Socket::TCP_DEFER_ACCEPT() } // 9; setsockopt($sock, IPPROTO_TCP, $TCP_DEFER_ACCEPT, 5) or die; my $x = getsockopt($sock, IPPROTO_TCP, $TCP_DEFER_ACCEPT); defined $x or die "getsockopt: $!"; $defer_accept_val = unpack('i', $x); if ($defer_accept_val <= 0) { die "unexpected TCP_DEFER_ACCEPT value: $defer_accept_val"; } } elsif ($^O eq 'freebsd' && system('kldstat -m accf_data >/dev/null') == 0) { require PublicInbox::Daemon; my $var = PublicInbox::Daemon::SO_ACCEPTFILTER(); $accf_arg = pack('a16a240', 'dataready', ''); setsockopt($sock, SOL_SOCKET, $var, $accf_arg) or die "setsockopt: $!"; } sub unix_server ($) { my $s = IO::Socket::UNIX->new( Listen => 1024, Type => Socket::SOCK_STREAM(), Local => $_[0], ); $s->blocking(0); $s; } my $upath = "$tmpdir/s"; my $unix = unix_server($upath); ok($unix, 'UNIX socket created'); my $td; my $spawn_httpd = sub { my (@args) = @_; my $cmd = [ '-httpd', @args, "--stdout=$out", "--stderr=$err", $psgi ]; $td = start_script($cmd, undef, { 3 => $sock, 4 => $unix }); }; $spawn_httpd->(); if ('test worker death') { my $conn = conn_for($sock, 'killed worker'); $conn->write("GET /pid HTTP/1.1\r\nHost:example.com\r\n\r\n"); my $pid; while (defined(my $line = $conn->getline)) { next unless $line eq "\r\n"; chomp($pid = $conn->getline); last; } like($pid, qr/\A[0-9]+\z/, '/pid response'); is(kill('KILL', $pid), 1, 'killed worker'); is($conn->getline, undef, 'worker died and EOF-ed client'); $conn = conn_for($sock, 'respawned worker'); $conn->write("GET /pid HTTP/1.0\r\n\r\n"); ok($conn->read(my $buf, 8192), 'read response'); my ($head, $body) = split(/\r\n\r\n/, $buf); chomp($body); like($body, qr/\A[0-9]+\z/, '/pid response'); isnt($body, $pid, 'respawned worker'); } { my $conn = conn_for($sock, 'streaming callback'); $conn->write("GET /callback HTTP/1.0\r\n\r\n"); ok($conn->read(my $buf, 8192), 'read response'); my ($head, $body) = split(/\r\n\r\n/, $buf); is($body, "hello world\n", 'callback body matches expected'); } { my $conn = conn_for($sock, 'getline-die'); $conn->write("GET /getline-die HTTP/1.1\r\nHost: example.com\r\n\r\n"); ok($conn->read(my $buf, 8192), 'read some response'); like($buf, qr!HTTP/1\.1 200\b[^\r]*\r\n!, 'got some sort of header'); is($conn->read(my $nil, 8192), 0, 'read EOF'); $conn = undef; my $after = capture($err); is(scalar(grep(/GETLINE FAIL/, @$after)), 1, 'failure logged'); is(scalar(grep(/CLOSE FAIL/, @$after)), 1, 'body->close not called'); } { my $conn = conn_for($sock, 'close-die'); $conn->write("GET /close-die HTTP/1.1\r\nHost: example.com\r\n\r\n"); ok($conn->read(my $buf, 8192), 'read some response'); like($buf, qr!HTTP/1\.1 200\b[^\r]*\r\n!, 'got some sort of header'); is($conn->read(my $nil, 8192), 0, 'read EOF'); $conn = undef; my $after = capture($err); is(scalar(grep(/GETLINE FAIL/, @$after)), 0, 'getline not failed'); is(scalar(grep(/CLOSE FAIL/, @$after)), 1, 'body->close not called'); } sub check_400 { my ($conn) = @_; my $r = $conn->read(my $buf, 8192); # ECONNRESET and $r==0 are both observed on FreeBSD 11.2 if (!defined($r)) { ok($!{ECONNRESET}, 'ECONNRESET on read (BSD sometimes)'); } elsif ($r > 0) { like($buf, qr!\AHTTP/1\.\d 400 !, 'got 400 response'); } else { is($r, 0, 'got EOF (BSD sometimes)'); } close($conn); # ensure we don't get SIGPIPE later } { local $SIG{PIPE} = 'IGNORE'; my $conn = conn_for($sock, 'excessive header'); $conn->write("GET /callback HTTP/1.0\r\n"); foreach my $i (1..500000) { last unless $conn->write("X-xxxxxJunk-$i: omg\r\n"); } ok(!$conn->write("\r\n"), 'broken request'); check_400($conn); } { my $conn = conn_for($sock, 'excessive body Content-Length'); my $n = (10 * 1024 * 1024) + 1; $conn->write("PUT /sha1 HTTP/1.0\r\nContent-Length: $n\r\n\r\n"); my $r = $conn->read(my $buf, 8192); ok($r > 0, 'read response'); my ($head, $body) = split(/\r\n\r\n/, $buf); like($head, qr/\b413\b/, 'got 413 response'); } { my $conn = conn_for($sock, 'excessive body chunked'); my $n = (10 * 1024 * 1024) + 1; $conn->write("PUT /sha1 HTTP/1.1\r\nTransfer-Encoding: chunked\r\n"); $conn->write("\r\n".sprintf("%x\r\n", $n)); my $r = $conn->read(my $buf, 8192); ok($r > 0, 'read response'); my ($head, $body) = split(/\r\n\r\n/, $buf); like($head, qr/\b413\b/, 'got 413 response'); } { my $conn = conn_for($sock, '1.1 Transfer-Encoding bogus'); $conn->write("PUT /sha1 HTTP/1.1\r\nTransfer-Encoding: bogus\r\n\r\n"); $conn->read(my $buf, 4096); like($buf, qr!\AHTTP/1\.[0-9] 400 !, 'got 400 response on bogus TE'); } { my $conn = conn_for($sock, '1.1 Content-Length bogus'); $conn->write("PUT /sha1 HTTP/1.1\r\nContent-Length: 3.3\r\n\r\n"); $conn->read(my $buf, 4096); like($buf, qr!\AHTTP/1\.[0-9] 400 !, 'got 400 response on bad length'); } { my $req = "PUT /sha1 HTTP/1.1\r\nContent-Length: 3\r\n" . "Content-Length: 3\r\n\r\n"; # this is stricter than it needs to be. Due to the way # Plack::HTTPParser, PSGI specs, and how hash tables work in common # languages; it's not possible to tell the difference between folded # and intentionally bad commas (e.g. "Content-Length: 3, 3") if (0) { require Plack::HTTPParser; # XS or pure Perl require Data::Dumper; Plack::HTTPParser::parse_http_request($req, my $env = {}); diag Data::Dumper::Dumper($env); # "Content-Length: 3, 3" } my $conn = conn_for($sock, '1.1 Content-Length dupe'); $conn->write($req); $conn->read(my $buf, 4096); like($buf, qr!\AHTTP/1\.[0-9] 400 !, 'got 400 response on dupe length'); } { my $conn = conn_for($sock, 'chunk with pipeline'); my $n = 10; my $payload = 'b'x$n; $conn->write("PUT /sha1 HTTP/1.1\r\nTransfer-Encoding: chunked\r\n"); $conn->write("\r\n".sprintf("%x\r\n", $n)); $conn->write($payload . "\r\n0\r\n\r\nGET /empty HTTP/1.0\r\n\r\n"); $conn->read(my $buf, 4096); my $lim = 0; $lim++ while ($conn->read($buf, 4096, length($buf)) && $lim < 9); my $exp = sha1_hex($payload); like($buf, qr!\r\n\r\n${exp}HTTP/1\.0 200 OK\r\n!s, 'chunk parser can handled pipelined requests'); } # Unix domain sockets { my $u = IO::Socket::UNIX->new(Type => SOCK_STREAM, Peer => $upath); ok($u, 'unix socket connected'); $u->write("GET /host-port HTTP/1.0\r\n\r\n"); $u->read(my $buf, 4096); like($buf, qr!\r\n\r\n127\.0\.0\.1:0\z!, 'set REMOTE_ADDR and REMOTE_PORT for Unix socket'); } sub conn_for { my ($dest, $msg) = @_; my $conn = tcp_connect($dest); ok($conn, "connected for $msg"); setsockopt($conn, IPPROTO_TCP, TCP_NODELAY, 1); return $conn; } { my $conn = conn_for($sock, 'host-port'); $conn->write("GET /host-port HTTP/1.0\r\n\r\n"); $conn->read(my $buf, 4096); my ($head, $body) = split(/\r\n\r\n/, $buf); my ($addr, $port) = split(/:/, $body); is($addr, $conn->sockhost, 'host matches addr'); is($port, $conn->sockport, 'port matches'); } # graceful termination { my $conn = conn_for($sock, 'graceful termination via slow header'); $conn->write("GET /slow-header HTTP/1.0\r\n" . "X-Check-Fifo: $fifo\r\n\r\n"); open my $f, '>', $fifo or die "open $fifo: $!\n"; $f->autoflush(1); ok(print($f "hello\n"), 'wrote something to fifo'); is($td->kill, 1, 'started graceful shutdown'); ok(print($f "world\n"), 'wrote else to fifo'); close $f or die "close fifo: $!\n"; $conn->read(my $buf, 8192); my ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr!\AHTTP/1\.[01] 200 OK!, 'got 200 for slow-header'); is($body, "hello\nworld\n", 'read expected body'); $td->join; is($?, 0, 'no error'); $spawn_httpd->('-W0'); } { my $conn = conn_for($sock, 'graceful termination via slow-body'); $conn->write("GET /slow-body HTTP/1.0\r\n" . "X-Check-Fifo: $fifo\r\n\r\n"); open my $f, '>', $fifo or die "open $fifo: $!\n"; $f->autoflush(1); my $buf; $conn->sysread($buf, 8192); like($buf, qr!\AHTTP/1\.[01] 200 OK!, 'got 200 for slow-body'); like($buf, qr!\r\n\r\n!, 'finished HTTP response header'); foreach my $c ('a'..'c') { $c .= "\n"; ok(print($f $c), 'wrote line to fifo'); $conn->sysread($buf, 8192); is($buf, $c, 'got trickle for reading'); } is($td->kill, 1, 'started graceful shutdown'); ok(print($f "world\n"), 'wrote else to fifo'); close $f or die "close fifo: $!\n"; $conn->sysread($buf, 8192); is($buf, "world\n", 'read expected body'); is($conn->sysread($buf, 8192), 0, 'got EOF from server'); $td->join; is($?, 0, 'no error'); $spawn_httpd->('-W0'); } sub delay { select(undef, undef, undef, shift || rand(0.02)) } my $str = 'abcdefghijklmnopqrstuvwxyz'; my $len = length $str; is($len, 26, 'got the alphabet'); my $check_self = sub { my ($conn) = @_; vec(my $rbits = '', fileno($conn), 1) = 1; select($rbits, undef, undef, 30) or Carp::confess('timed out'); $conn->read(my $buf, 4096); my ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr/\r\nContent-Length: 40\r\n/s, 'got expected length'); is($body, sha1_hex($str), 'read expected body'); }; SKIP: { my $curl = which('curl') or skip('curl(1) missing', 4); my $base = 'http://' . $sock->sockhost . ':' . $sock->sockport; my $url = "$base/sha1"; my ($r, $w); pipe($r, $w) or die "pipe: $!"; my $cmd = [$curl, qw(--tcp-nodelay -T- -HExpect: -sSN), $url]; open my $cout, '+>', undef or die; open my $cerr, '>', undef or die; my $rdr = { 0 => $r, 1 => $cout, 2 => $cerr }; my $pid = spawn($cmd, undef, $rdr); close $r or die "close read pipe: $!"; foreach my $c ('a'..'z') { print $w $c or die "failed to write to curl: $!"; delay(); } close $w or die "close write pipe: $!"; waitpid($pid, 0); is($?, 0, 'curl exited successfully'); is(-s $cerr, 0, 'no errors from curl'); seek($cout, 0, SEEK_SET); is(<$cout>, sha1_hex($str), 'read expected body'); my $fh = popen_rd([$curl, '-sS', "$base/async-big"]); my $n = 0; my $non_zero = 0; while (1) { my $r = sysread($fh, my $buf, 4096) or last; $n += $r; $buf =~ /\A\0+\z/ or $non_zero++; } close $fh or die "close curl pipe: $!"; is($?, 0, 'curl succesful'); is($n, 30 * 1024 * 1024, 'got expected output from curl'); is($non_zero, 0, 'read all zeros'); require_mods(@zmods, 4); my $buf = xqx([$curl, '-sS', "$base/psgi-return-gzip"]); is($?, 0, 'curl succesful'); IO::Uncompress::Gunzip::gunzip(\$buf => \(my $out)); is($out, "hello world\n"); my $curl_rdr = { 2 => \(my $curl_err = '') }; $buf = xqx([$curl, qw(-sSv --compressed), "$base/psgi-return-compressible"], undef, $curl_rdr); is($?, 0, 'curl --compressed successful'); is($buf, "goodbye world\n", 'gzipped response as expected'); like($curl_err, qr/\bContent-Encoding: gzip\b/, 'curl got gzipped response'); } { my $conn = conn_for($sock, 'psgi_return ENOENT'); print $conn "GET /psgi-return-enoent HTTP/1.1\r\n\r\n" or die; my $buf = ''; sysread($conn, $buf, 16384, length($buf)) until $buf =~ /\r\n\r\n/; like($buf, qr!HTTP/1\.[01] 500\b!, 'got 500 error on ENOENT'); } { my $conn = conn_for($sock, '1.1 pipeline together'); $conn->write("PUT /sha1 HTTP/1.1\r\nUser-agent: hello\r\n\r\n" . "PUT /sha1 HTTP/1.1\r\n\r\n"); my $buf = ''; my @r; until (scalar(@r) >= 2) { my $r = $conn->sysread(my $tmp, 4096); die $! unless defined $r; die "EOF <$buf>" unless $r; $buf .= $tmp; @r = ($buf =~ /\r\n\r\n([a-f0-9]{40})/g); } is(2, scalar @r, 'got 2 responses'); my $i = 3; foreach my $hex (@r) { is($hex, sha1_hex(''), "read expected body $i"); $i++; } } { my $conn = conn_for($sock, 'no TCP_CORK on empty body'); $conn->write("GET /empty HTTP/1.1\r\nHost:example.com\r\n\r\n"); my $buf = ''; my $t0 = [ gettimeofday ]; until ($buf =~ /\r\n\r\n/s) { $conn->sysread($buf, 4096, length($buf)); } my $elapsed = tv_interval($t0, [ gettimeofday ]); ok($elapsed < 0.190, 'no 200ms TCP cork delay on empty body'); } { my $conn = conn_for($sock, 'graceful termination during slow request'); $conn->write("PUT /sha1 HTTP/1.0\r\nContent-Length: $len\r\n\r\n"); # XXX ugh, want a reliable and non-intrusive way to detect # that the server has started buffering our partial request so we # can reliably test graceful termination. Maybe making this and # similar tests dependent on Linux strace is a possibility? delay(0.1); is($td->kill, 1, 'start graceful shutdown'); my $n = 0; foreach my $c ('a'..'z') { $n += $conn->write($c); } ok(kill(0, $td->{pid}), 'graceful shutdown did not kill httpd'); is($n, $len, 'wrote alphabet'); $check_self->($conn); $td->join; is($?, 0, 'no error'); $spawn_httpd->('-W0'); } # various DoS attacks against the chunk parser: { local $SIG{PIPE} = 'IGNORE'; my $conn = conn_for($sock, '1.1 chunk header excessive'); $conn->write("PUT /sha1 HTTP/1.1\r\nTransfer-Encoding:chunked\r\n\r\n"); my $n = 0; my $w; while ($w = $conn->write('ffffffff')) { $n += $w; } ok($!, 'got error set in $!'); is($w, undef, 'write error happened'); ok($n > 0, 'was able to write'); check_400($conn); $conn = conn_for($sock, '1.1 chunk trailer excessive'); $conn->write("PUT /sha1 HTTP/1.1\r\nTransfer-Encoding:chunked\r\n\r\n"); is($conn->syswrite("1\r\na"), 4, 'wrote first header + chunk'); delay(); $n = 0; while ($w = $conn->write("\r")) { $n += $w; } ok($!, 'got error set in $!'); ok($n > 0, 'wrote part of chunk end (\r)'); check_400($conn); } { my $conn = conn_for($sock, '1.1 chunked close trickle'); $conn->write("PUT /sha1 HTTP/1.1\r\nConnection:close\r\n"); $conn->write("Transfer-encoding: chunked\r\n\r\n"); foreach my $x ('a'..'z') { delay(); $conn->write('1'); delay(); $conn->write("\r"); delay(); $conn->write("\n"); delay(); $conn->write($x); delay(); $conn->write("\r"); delay(); $conn->write("\n"); } $conn->write('0'); delay(); $conn->write("\r"); delay(); $conn->write("\n"); delay(); $conn->write("\r"); delay(); $conn->write("\n"); delay(); $check_self->($conn); } { my $conn = conn_for($sock, '1.1 chunked close'); $conn->write("PUT /sha1 HTTP/1.1\r\nConnection:close\r\n"); my $xlen = sprintf('%x', $len); $conn->write("Transfer-Encoding: chunked\r\n\r\n$xlen\r\n" . "$str\r\n0\r\n\r\n"); $check_self->($conn); } { my $conn = conn_for($sock, 'chunked body + pipeline'); $conn->write("PUT /sha1 HTTP/1.1\r\n" . "Transfer-Encoding: chunked\r\n"); delay(); $conn->write("\r\n1\r\n"); delay(); $conn->write('a'); delay(); $conn->write("\r\n0\r\n\r\nPUT /sha1 HTTP/1.1\r\n"); delay(); my $buf = ''; until ($buf =~ /\r\n\r\n[a-f0-9]{40}\z/) { $conn->sysread(my $tmp, 4096); $buf .= $tmp; } my ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr/\r\nContent-Length: 40\r\n/s, 'got expected length'); is($body, sha1_hex('a'), 'read expected body'); $conn->write("Connection: close\r\n"); $conn->write("Content-Length: $len\r\n\r\n$str"); $check_self->($conn); } { my $conn = conn_for($sock, 'trickle header, one-shot body + pipeline'); $conn->write("PUT /sha1 HTTP/1.0\r\n" . "Connection: keep-alive\r\n"); delay(); $conn->write("Content-Length: $len\r\n\r\n${str}PUT"); my $buf = ''; until ($buf =~ /\r\n\r\n[a-f0-9]{40}\z/) { $conn->sysread(my $tmp, 4096); $buf .= $tmp; } my ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr/\r\nContent-Length: 40\r\n/s, 'got expected length'); is($body, sha1_hex($str), 'read expected body'); $conn->write(" /sha1 HTTP/1.0\r\nContent-Length: $len\r\n\r\n$str"); $check_self->($conn); } { my $conn = conn_for($sock, 'trickle body'); $conn->write("PUT /sha1 HTTP/1.0\r\n"); $conn->write("Content-Length: $len\r\n\r\n"); my $beg = substr($str, 0, 10); my $end = substr($str, 10); is($beg . $end, $str, 'substr setup correct'); delay(); $conn->write($beg); delay(); $conn->write($end); $check_self->($conn); } { my $conn = conn_for($sock, 'one-shot write'); $conn->write("PUT /sha1 HTTP/1.0\r\n" . "Content-Length: $len\r\n\r\n$str"); $check_self->($conn); } { my $conn = conn_for($sock, 'trickle header, one-shot body'); $conn->write("PUT /sha1 HTTP/1.0\r\n"); delay(); $conn->write("Content-Length: $len\r\n\r\n$str"); $check_self->($conn); } { my $conn = conn_for($sock, '1.1 Connection: close'); $conn->write("PUT /sha1 HTTP/1.1\r\nConnection:close\r\n"); delay(); $conn->write("Content-Length: $len\r\n\r\n$str"); $check_self->($conn); } { my $conn = conn_for($sock, '1.1 pipeline start'); $conn->write("PUT /sha1 HTTP/1.1\r\n\r\nPUT"); my $buf = ''; until ($buf =~ /\r\n\r\n[a-f0-9]{40}\z/) { $conn->sysread(my $tmp, 4096); $buf .= $tmp; } my ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr/\r\nContent-Length: 40\r\n/s, 'got expected length'); is($body, sha1_hex(''), 'read expected body'); # 2nd request $conn->write(" /sha1 HTTP/1.1\r\n\r\n"); $buf = ''; until ($buf =~ /\r\n\r\n[a-f0-9]{40}\z/) { $conn->sysread(my $tmp, 4096); $buf .= $tmp; } ($head, $body) = split(/\r\n\r\n/, $buf, 2); like($head, qr/\r\nContent-Length: 40\r\n/s, 'got expected length'); is($body, sha1_hex(''), 'read expected body #2'); } SKIP: { skip 'TCP_DEFER_ACCEPT is Linux-only', 1 if $^O ne 'linux'; my $var = $TCP_DEFER_ACCEPT; defined(my $x = getsockopt($sock, IPPROTO_TCP, $var)) or die; is(unpack('i', $x), $defer_accept_val, 'TCP_DEFER_ACCEPT unchanged if previously set'); }; SKIP: { skip 'SO_ACCEPTFILTER is FreeBSD-only', 1 if $^O ne 'freebsd'; skip 'accf_data not loaded: kldload accf_data' if !defined $accf_arg; my $var = PublicInbox::Daemon::SO_ACCEPTFILTER(); defined(my $x = getsockopt($sock, SOL_SOCKET, $var)) or die; is($x, $accf_arg, 'SO_ACCEPTFILTER unchanged if previously set'); }; SKIP: { skip 'only testing lsof(8) output on Linux', 1 if $^O ne 'linux'; my $lsof = which('lsof') or skip 'no lsof in PATH', 1; my $null_in = ''; my $rdr = { 2 => \(my $null_err), 0 => \$null_in }; my @lsof = xqx([$lsof, '-p', $td->{pid}], undef, $rdr); is_deeply([grep(/\bdeleted\b/, @lsof)], [], 'no lingering deleted inputs'); # filter out pipes inherited from the parent my @this = xqx([$lsof, '-p', $$], undef, $rdr); my $bad; my $extract_inodes = sub { map {; my @f = split(' ', $_); my $inode = $f[-2]; $bad = $_ if $inode !~ /\A[0-9]+\z/; $inode => 1; } grep (/\bpipe\b/, @_); }; my %child = $extract_inodes->(@lsof); my %parent = $extract_inodes->(@this); skip("inode not in expected format: $bad", 1) if defined($bad); delete @child{(keys %parent)}; is_deeply([], [keys %child], 'no extra pipes with -W0'); }; # ensure compatibility with other PSGI servers SKIP: { require_mods(@zmods, qw(Plack::Test HTTP::Request::Common), 3); use_ok 'HTTP::Request::Common'; use_ok 'Plack::Test'; STDERR->flush; open my $olderr, '>&', \*STDERR or die "dup stderr: $!"; open my $tmperr, '+>', undef or die; open STDERR, '>&', $tmperr or die; STDERR->autoflush(1); my $app = require $psgi; test_psgi($app, sub { my ($cb) = @_; my $req = GET('http://example.com/psgi-return-gzip'); my $res = $cb->($req); my $buf = $res->content; IO::Uncompress::Gunzip::gunzip(\$buf => \(my $out)); is($out, "hello world\n", 'got expected output'); $req = GET('http://example.com/psgi-return-enoent'); $res = $cb->($req); is($res->code, 500, 'got error on ENOENT'); seek($tmperr, 0, SEEK_SET) or die; my $errbuf = do { local $/; <$tmperr> }; like($errbuf, qr/this-better-not-exist/, 'error logged about missing command'); }); open STDERR, '>&', $olderr or die "restore stderr: $!"; } done_testing(); sub capture { my ($f) = @_; open my $fh, '+<', $f or die "failed to open $f: $!\n"; local $/ = "\n"; my @r = <$fh>; truncate($fh, 0) or die "truncate failed on $f: $!\n"; \@r } 1; public-inbox-1.6.1/t/httpd-https.t000066400000000000000000000071641377346120300170430ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Socket qw(SOCK_STREAM IPPROTO_TCP SOL_SOCKET); use PublicInbox::TestCommon; # IO::Poll is part of the standard library, but distros may split them off... require_mods(qw(IO::Socket::SSL IO::Poll Plack::Util)); my $cert = 'certs/server-cert.pem'; my $key = 'certs/server-key.pem'; unless (-r $key && -r $cert) { plan skip_all => "certs/ missing for $0, run $^X ./create-certs.perl in certs/"; } use_ok 'PublicInbox::TLS'; use_ok 'IO::Socket::SSL'; my $psgi = "./t/httpd-corner.psgi"; my ($tmpdir, $for_destroy) = tmpdir(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $https = tcp_server(); my $td; my $https_addr = $https->sockhost . ':' . $https->sockport; for my $args ( [ "-lhttps://$https_addr/?key=$key,cert=$cert" ], ) { for ($out, $err) { open my $fh, '>', $_ or die "truncate: $!"; } my $cmd = [ '-httpd', '-W0', @$args, "--stdout=$out", "--stderr=$err", $psgi ]; $td = start_script($cmd, undef, { 3 => $https }); my %o = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); # start negotiating a slow TLS connection my $slow = tcp_connect($https, Blocking => 0); $slow = IO::Socket::SSL->start_SSL($slow, SSL_startHandshake => 0, %o); my @poll = (fileno($slow)); my $slow_done = $slow->connect_SSL; if ($slow_done) { diag('W: connect_SSL early OK, slow client test invalid'); push @poll, PublicInbox::Syscall::EPOLLOUT(); } else { push @poll, PublicInbox::TLS::epollbit(); } # normal HTTPS my $c = tcp_connect($https); IO::Socket::SSL->start_SSL($c, %o); ok($c->print("GET /empty HTTP/1.1\r\n\r\nHost: example.com\r\n\r\n"), 'wrote HTTP request'); my $buf = ''; sysread($c, $buf, 2007, length($buf)) until $buf =~ /\r\n\r\n/; like($buf, qr!\AHTTP/1\.1 200!, 'read HTTP response'); # HTTPS with bad hostname $c = tcp_connect($https); $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.fail'; $c = IO::Socket::SSL->start_SSL($c, %o); is($c, undef, 'HTTPS fails with bad hostname'); $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.local'; $c = tcp_connect($https); IO::Socket::SSL->start_SSL($c, %o); ok($c, 'HTTPS succeeds again with valid hostname'); # slow TLS connection did not block the other fast clients while # connecting, finish it off: until ($slow_done) { IO::Poll::_poll(-1, @poll); $slow_done = $slow->connect_SSL and last; @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } $slow->blocking(1); ok($slow->print("GET /empty HTTP/1.1\r\n\r\nHost: example.com\r\n\r\n"), 'wrote HTTP request from slow'); $buf = ''; sysread($slow, $buf, 666, length($buf)) until $buf =~ /\r\n\r\n/; like($buf, qr!\AHTTP/1\.1 200!, 'read HTTP response from slow'); $slow = undef; SKIP: { skip 'TCP_DEFER_ACCEPT is Linux-only', 2 if $^O ne 'linux'; my $var = eval { Socket::TCP_DEFER_ACCEPT() } // 9; defined(my $x = getsockopt($https, IPPROTO_TCP, $var)) or die; ok(unpack('i', $x) > 0, 'TCP_DEFER_ACCEPT set on https'); }; SKIP: { skip 'SO_ACCEPTFILTER is FreeBSD-only', 2 if $^O ne 'freebsd'; if (system('kldstat -m accf_data >/dev/null')) { skip 'accf_data not loaded? kldload accf_data', 2; } require PublicInbox::Daemon; my $var = PublicInbox::Daemon::SO_ACCEPTFILTER(); my $x = getsockopt($https, SOL_SOCKET, $var); like($x, qr/\Adataready\0+\z/, 'got dataready accf for https'); }; $c = undef; $td->kill; $td->join; is($?, 0, 'no error in exited process'); } done_testing(); 1; public-inbox-1.6.1/t/httpd-unix.t000066400000000000000000000141671377346120300166650ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # Tests for binding Unix domain sockets use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use Errno qw(EADDRINUSE); use Cwd qw(abs_path); use Carp qw(croak); require_mods(qw(Plack::Util Plack::Builder HTTP::Date HTTP::Status)); use IO::Socket::UNIX; my ($tmpdir, $for_destroy) = tmpdir(); my $unix = "$tmpdir/unix.sock"; my $psgi = './t/httpd-corner.psgi'; my $out = "$tmpdir/out.log"; my $err = "$tmpdir/err.log"; my $td; my $spawn_httpd = sub { my (@args) = @_; my $cmd = [ '-httpd', @args, "--stdout=$out", "--stderr=$err", $psgi ]; $td = start_script($cmd); }; { require PublicInbox::Daemon; my $l = "$tmpdir/named.sock"; my $s = IO::Socket::UNIX->new(Listen => 5, Local => $l, Type => SOCK_STREAM); is(PublicInbox::Daemon::sockname($s), $l, 'sockname works for UNIX'); } ok(!-S $unix, 'UNIX socket does not exist, yet'); $spawn_httpd->("-l$unix", '-W0'); my %o = (Peer => $unix, Type => SOCK_STREAM); for (1..1000) { last if -S $unix && IO::Socket::UNIX->new(%o); select undef, undef, undef, 0.02 } ok(-S $unix, 'UNIX socket was bound by -httpd'); sub check_sock ($) { my ($unix) = @_; my $sock = IO::Socket::UNIX->new(Peer => $unix, Type => SOCK_STREAM); warn "E: $! connecting to $unix\n" unless defined $sock; ok($sock, 'client UNIX socket connected'); ok($sock->write("GET /host-port HTTP/1.0\r\n\r\n"), 'wrote req to server'); ok($sock->read(my $buf, 4096), 'read response'); like($buf, qr!\r\n\r\n127\.0\.0\.1:0\z!, 'set REMOTE_ADDR and REMOTE_PORT for Unix socket'); } check_sock($unix); { # do not clobber existing socket my %err = ( 'linux' => EADDRINUSE, 'freebsd' => EADDRINUSE ); open my $out, '>>', "$tmpdir/1" or die "redirect failed: $!"; open my $err, '>>', "$tmpdir/2" or die "redirect failed: $!"; my $cmd = ['-httpd', '-l', $unix, '-W0', $psgi]; my $ftd = start_script($cmd, undef, { 1 => $out, 2 => $err }); $ftd->join; isnt($?, 0, 'httpd failure set $?'); SKIP: { my $ec = $err{$^O} or skip("not sure if $^O fails with EADDRINUSE", 1); is($? >> 8, $ec, 'httpd failed with EADDRINUSE'); }; open my $fh, "$tmpdir/2" or die "failed to open $tmpdir/2: $!"; local $/; my $e = <$fh>; like($e, qr/no listeners bound/i, 'got error message'); is(-s "$tmpdir/1", 0, 'stdout was empty'); } { is($td->kill, 1, 'terminate existing process'); $td->join; is($?, 0, 'existing httpd exited successfully'); ok(-S $unix, 'unix socket still exists'); } # portable Perl can delay or miss signal dispatches due to races, # so disable some tests on systems lacking signalfd(2) or EVFILT_SIGNAL my $has_sigfd = PublicInbox::Sigfd->new({}, 0) ? 1 : $ENV{TEST_UNRELIABLE}; sub delay_until { my $cond = shift; my $end = time + 30; do { return if $cond->(); select undef, undef, undef, 0.012; } until (time > $end); Carp::confess('condition failed'); } SKIP: { require_mods('Net::Server::Daemonize', 52); $has_sigfd or skip('signalfd / EVFILT_SIGNAL not available', 52); my $pid_file = "$tmpdir/pid"; my $read_pid = sub { my $f = shift; open my $fh, '<', $f or die "open $f failed: $!"; my $pid = do { local $/; <$fh> }; chomp($pid) or die("pid file not ready $!"); $pid; }; for my $w (qw(-W0 -W1)) { # wait for daemonization $spawn_httpd->("-l$unix", '-D', '-P', $pid_file, $w); $td->join; is($?, 0, "daemonized $w process"); check_sock($unix); ok(-s $pid_file, "$w pid file written"); my $pid = $read_pid->($pid_file); is(kill('TERM', $pid), 1, "signaled daemonized $w process"); delay_until(sub { !kill(0, $pid) }); is(kill(0, $pid), 0, "daemonized $w process exited"); ok(!-e $pid_file, "$w pid file unlinked at exit"); } # try a USR2 upgrade with workers: my $httpd = abs_path('blib/script/public-inbox-httpd'); $psgi = abs_path($psgi); my $opt = { run_mode => 0 }; my @args = ("-l$unix", '-D', '-P', $pid_file, -1, $out, -2, $err); $td = start_script([$httpd, @args, $psgi], undef, $opt); $td->join; is($?, 0, "daemonized process again"); check_sock($unix); ok(-s $pid_file, 'pid file written'); my $pid = $read_pid->($pid_file); # stop worker to ensure check_sock below hits $new_pid kill('TTOU', $pid) or die "TTOU failed: $!"; kill('USR2', $pid) or die "USR2 failed: $!"; delay_until(sub { $pid != (eval { $read_pid->($pid_file) } // $pid) }); my $new_pid = $read_pid->($pid_file); isnt($new_pid, $pid, 'new child started'); ok($new_pid > 0, '$new_pid valid'); delay_until(sub { -s "$pid_file.oldbin" }); my $old_pid = $read_pid->("$pid_file.oldbin"); is($old_pid, $pid, '.oldbin pid file written'); ok($old_pid > 0, '$old_pid valid'); check_sock($unix); # ensures $new_pid is ready to receive signals # first, back out of the upgrade kill('QUIT', $new_pid) or die "kill new PID failed: $!"; delay_until(sub { $pid == (eval { $read_pid->($pid_file) } // 0) }); is($read_pid->($pid_file), $pid, 'old PID file restored'); ok(!-f "$pid_file.oldbin", '.oldbin PID file gone'); # retry USR2 upgrade kill('USR2', $pid) or die "USR2 failed: $!"; delay_until(sub { $pid != (eval { $read_pid->($pid_file) } // $pid) }); $new_pid = $read_pid->($pid_file); isnt($new_pid, $pid, 'new child started again'); $old_pid = $read_pid->("$pid_file.oldbin"); is($old_pid, $pid, '.oldbin pid file written'); # drop the old parent kill('QUIT', $old_pid) or die "QUIT failed: $!"; delay_until(sub { !kill(0, $old_pid) }); ok(!-f "$pid_file.oldbin", '.oldbin PID file gone'); # drop the new child check_sock($unix); kill('QUIT', $new_pid) or die "QUIT failed: $!"; delay_until(sub { !kill(0, $new_pid) }); ok(!-f $pid_file, 'PID file is gone'); # try USR2 without workers (-W0) $td = start_script([$httpd, @args, '-W0', $psgi], undef, $opt); $td->join; is($?, 0, 'daemonized w/o workers'); check_sock($unix); $pid = $read_pid->($pid_file); # replace running process kill('USR2', $pid) or die "USR2 failed: $!"; delay_until(sub { !kill(0, $pid) }); check_sock($unix); $pid = $read_pid->($pid_file); kill('QUIT', $pid) or die "USR2 failed: $!"; delay_until(sub { !kill(0, $pid) }); ok(!-f $pid_file, 'PID file is gone'); } done_testing(); public-inbox-1.6.1/t/httpd.t000066400000000000000000000060011377346120300156700ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Eml; use Socket qw(IPPROTO_TCP SOL_SOCKET); require_mods(qw(Plack::Util Plack::Builder HTTP::Date HTTP::Status)); # FIXME: too much setup my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/pi-home"; my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $maindir = "$tmpdir/main.git"; my $group = 'test-httpd'; my $addr = $group . '@example.com'; my $cfgpfx = "publicinbox.$group"; my $sock = tcp_server(); my $td; use_ok 'PublicInbox::Git'; use_ok 'PublicInbox::Import'; { local $ENV{HOME} = $home; my $cmd = [ '-init', $group, $maindir, 'http://example.com/', $addr ]; ok(run_script($cmd), 'init ran properly'); # ensure successful message delivery { my $mime = PublicInbox::Eml->new(< To: You Cc: $addr Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 06:06:06 +0000 nntp EOF my $git = PublicInbox::Git->new($maindir); my $im = PublicInbox::Import->new($git, 'test', $addr); $im->add($mime); $im->done($mime); } ok($sock, 'sock created'); $cmd = [ '-httpd', '-W0', "--stdout=$out", "--stderr=$err" ]; $td = start_script($cmd, undef, { 3 => $sock }); my $host = $sock->sockhost; my $port = $sock->sockport; { my $bad = tcp_connect($sock); print $bad "GETT / HTTP/1.0\r\n\r\n" or die; like(<$bad>, qr!\AHTTP/1\.[01] 405\b!, 'got 405 on bad req'); } my $conn = tcp_connect($sock); ok($conn, 'connected'); ok($conn->write("GET / HTTP/1.0\r\n\r\n"), 'wrote data to socket'); { my $buf; ok($conn->read($buf, 4096), 'read some bytes'); like($buf, qr!\AHTTP/1\.[01] 404\b!, 'got 404 response'); is($conn->read($buf, 1), 0, "EOF"); } is(xsys(qw(git clone -q --mirror), "http://$host:$port/$group", "$tmpdir/clone.git"), 0, 'smart clone successful'); # ensure dumb cloning works, too: is(xsys('git', "--git-dir=$maindir", qw(config http.uploadpack false)), 0, 'disable http.uploadpack'); is(xsys(qw(git clone -q --mirror), "http://$host:$port/$group", "$tmpdir/dumb.git"), 0, 'clone successful'); ok($td->kill, 'killed httpd'); $td->join; is(xsys('git', "--git-dir=$tmpdir/clone.git", qw(fsck --no-verbose)), 0, 'fsck on cloned directory successful'); } SKIP: { skip 'TCP_DEFER_ACCEPT is Linux-only', 1 if $^O ne 'linux'; my $var = eval { Socket::TCP_DEFER_ACCEPT() } // 9; defined(my $x = getsockopt($sock, IPPROTO_TCP, $var)) or die; ok(unpack('i', $x) > 0, 'TCP_DEFER_ACCEPT set'); }; SKIP: { skip 'SO_ACCEPTFILTER is FreeBSD-only', 1 if $^O ne 'freebsd'; if (system('kldstat -m accf_http >/dev/null') != 0) { skip 'accf_http not loaded: kldload accf_http', 1; } require PublicInbox::Daemon; my $var = PublicInbox::Daemon::SO_ACCEPTFILTER(); my $x = getsockopt($sock, SOL_SOCKET, $var); like($x, qr/\Ahttpready\0+\z/, 'got httpready accf for HTTP'); }; done_testing(); 1; public-inbox-1.6.1/t/hval.t000066400000000000000000000033341377346120300155050ustar00rootroot00000000000000# Copyright (C) 2017-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use_ok 'PublicInbox::Hval', qw(to_attr); # reverse the result of to_attr sub from_attr ($) { my ($str) = @_; my $first = ''; if ($str =~ s/\AZ([a-f0-9]{2})//ms) { $first = chr(hex($1)); } $str =~ s!::([a-f0-9]{2})!chr(hex($1))!egms; $str =~ tr!:!/!; utf8::decode($str); $first . $str; } my $ibx = { -no_obfuscate_re => qr/(?:example\.com)\z/i, -no_obfuscate => { 'meta@public-inbox.org' => 1, } }; my $html = <<'EOF'; hello@example.comm hello@example.com meta@public-inbox.org test@public-inbox.org test@a.b.c.org te.st@example.org EOF PublicInbox::Hval::obfuscate_addrs($ibx, $html); my $exp = <<'EOF'; hello@example•comm hello@example.com meta@public-inbox.org test@public-inbox•org test@a•b.c.org te.st@example•org EOF is($html, $exp, 'only obfuscated relevant addresses'); is(PublicInbox::Hval::to_filename('foo bar '), 'foo-bar', 'to_filename has no trailing -'); is(PublicInbox::Hval::to_filename("foo bar\nanother line\n"), 'foo-bar', 'to_filename has no repeated -, and nothing past LF'); is(PublicInbox::Hval::to_filename("foo....bar"), 'foo.bar', 'to_filename squeezes -'); is(PublicInbox::Hval::to_filename(''), undef, 'empty string returns undef'); my $s = "\0\x07\n"; PublicInbox::Hval::src_escape($s); is($s, "\\0\\a\n", 'src_escape works as intended'); foreach my $s ('Hello/World.pm', 'Zcat', 'hello world.c', 'Eléanor', '$at') { my $attr = to_attr($s); is(from_attr($attr), $s, "$s => $attr => $s round trips"); } { my $bad = to_attr('foo//bar'); ok(!$bad, 'double-slash rejected'); } done_testing(); public-inbox-1.6.1/t/idx_stack.t000066400000000000000000000036111377346120300165220ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use_ok 'PublicInbox::IdxStack'; my $oid_a = '03c21563cf15c241687966b5b2a3f37cdc193316'; my $oid_b = '963caad026055ab9bcbe3ee9550247f9d8840feb'; my $stk = PublicInbox::IdxStack->new; is($stk->read_prepare, $stk, 'nothing'); is($stk->num_records, 0, 'no records'); is($stk->pop_rec, undef, 'undef on empty'); $stk = PublicInbox::IdxStack->new; $stk->push_rec('m', 1234, 5678, $oid_a); is($stk->read_prepare, $stk, 'read_prepare'); is($stk->num_records, 1, 'num_records'); is_deeply([$stk->pop_rec], ['m', 1234, 5678, $oid_a], 'pop once'); is($stk->pop_rec, undef, 'undef on empty'); $stk = PublicInbox::IdxStack->new; $stk->push_rec('m', 1234, 5678, $oid_a); $stk->push_rec('d', 1234, 5678, $oid_b); is($stk->read_prepare, $stk, 'read_prepare'); is($stk->num_records, 2, 'num_records'); is_deeply([$stk->pop_rec], ['d', 1234, 5678, $oid_b], 'pop'); is_deeply([$stk->pop_rec], ['m', 1234, 5678, $oid_a], 'pop-pop'); is($stk->pop_rec, undef, 'empty'); SKIP: { $stk = undef; my $nr = $ENV{TEST_GIT_LOG} or skip 'TEST_GIT_LOG unset', 3; open my $fh, '-|', qw(git log --pretty=tformat:%at.%ct.%H), "-$nr" or die "git log: $!"; my @expect; while (<$fh>) { chomp; my ($at, $ct, $H) = split(/\./); $stk //= PublicInbox::IdxStack->new($H); # not bothering to parse blobs here, just using commit OID # as a blob OID since they're the same size + format $stk->push_rec('m', $at + 0, $ct + 0, $H); push(@expect, [ 'm', $at, $ct, $H ]); } $stk or skip('nothing from git log', 3); is($stk->read_prepare, $stk, 'read_prepare'); is($stk->num_records, scalar(@expect), 'num_records matches expected'); my @result; while (my @tmp = $stk->pop_rec) { unshift @result, \@tmp; } is_deeply(\@result, \@expect, 'results match expected'); } done_testing; public-inbox-1.6.1/t/imap.t000066400000000000000000000125051377346120300155010ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ # unit tests (no network) for IMAP, see t/imapd.t for end-to-end tests use strict; use Test::More; use PublicInbox::TestCommon; require_git 2.6; require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address Parse::RecDescent)); require_ok 'PublicInbox::IMAP'; require_ok 'PublicInbox::IMAPD'; my ($tmpdir, $for_destroy) = tmpdir(); my $cfgfile = "$tmpdir/config"; { open my $fh, '>', $cfgfile or BAIL_OUT $!; print $fh <new; my @w; local $SIG{__WARN__} = sub { push @w, @_ }; $imapd->refresh_groups; my $self = { imapd => $imapd }; is(scalar(@w), 1, 'got a warning for upper-case'); like($w[0], qr/IGNORE\.THIS/, 'warned about upper-case'); my $res = PublicInbox::IMAP::cmd_list($self, 'tag', 'x', '%'); is(scalar($$res =~ tr/\n/\n/), 2, 'only one result'); like($$res, qr/ x\r\ntag OK/, 'saw expected'); $res = PublicInbox::IMAP::cmd_list($self, 'tag', 'x.', '%'); is(scalar($$res =~ tr/\n/\n/), 3, 'only one result'); is(scalar(my @x = ($$res =~ m/ x\.[zy]\r\n/g)), 2, 'match expected'); $res = PublicInbox::IMAP::cmd_list($self, 't', 'x.(?{die "RCE"})', '%'); like($$res, qr/\At OK /, 'refname does not match attempted RCE'); $res = PublicInbox::IMAP::cmd_list($self, 't', '', '(?{die "RCE"})%'); like($$res, qr/\At OK /, 'wildcard does not match attempted RCE'); } { my $partial_prepare = \&PublicInbox::IMAP::partial_prepare; my $x = {}; my $n = 0; my $r = $partial_prepare->(\$n, $x, [], my $p = 'BODY[9]'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[9]<5>'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[9]<5.1>'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[1.1]'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[HEADER.FIELDS (DATE FROM)]'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[HEADER.FIELDS.NOT (TO)]'); ok($r, $p); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[HEDDER.FIELDS.NOT (TO)]'); ok(!$r, "rejected misspelling $p"); $r = $partial_prepare->(\$n, $x, [], $p = 'BODY[1.1.HEADER.FIELDS (TO)]'); ok($r, $p); my $partial_body = \&PublicInbox::IMAP::partial_body; my $partial_hdr_get = \&PublicInbox::IMAP::partial_hdr_get; my $partial_hdr_not = \&PublicInbox::IMAP::partial_hdr_not; my $hdrs_regexp = \&PublicInbox::IMAP::hdrs_regexp; is_deeply($x, { 'BODY[9]' => [ $partial_body, 9, undef, undef, undef ], 'BODY[9]<5>' => [ $partial_body, 9, undef, 5, undef ], 'BODY[9]<5.1>' => [ $partial_body, 9, undef, 5, 1 ], 'BODY[1.1]' => [ $partial_body, '1.1', undef, undef, undef ], 'BODY[HEADER.FIELDS (DATE FROM)]' => [ $partial_hdr_get, undef, $hdrs_regexp->('DATE FROM'), undef, undef ], 'BODY[HEADER.FIELDS.NOT (TO)]' => [ $partial_hdr_not, undef, $hdrs_regexp->('TO'), undef, undef ], 'BODY[1.1.HEADER.FIELDS (TO)]' => [ $partial_hdr_get, '1.1', $hdrs_regexp->('TO'), undef, undef ], }, 'structure matches expected'); } { my $fetch_compile = \&PublicInbox::IMAP::fetch_compile; my ($cb, $ops, $partial) = $fetch_compile->(['BODY[]']); is($partial, undef, 'no partial fetch data'); is_deeply($ops, [ undef, \&PublicInbox::IMAP::op_crlf_bref, 'BODY[]', \&PublicInbox::IMAP::emit_rfc822 ], 'proper key and op compiled for BODY[]'); ($cb, $ops, $partial) = $fetch_compile->(['BODY', 'BODY[]']); is_deeply($ops, [ undef, \&PublicInbox::IMAP::op_crlf_bref, 'BODY[]', \&PublicInbox::IMAP::emit_rfc822, undef, \&PublicInbox::IMAP::op_eml_new, 'BODY', \&PublicInbox::IMAP::emit_body, ], 'placed op_eml_new before emit_body'); } # UID <=> MSN mapping sub uo2m_str_new ($) { no warnings 'uninitialized'; # uom2m_ary_new may have may have undef pack('S*', @{$_[0]->uo2m_ary_new}); # 2 bytes per-MSN } { my $ibx = bless { uid_range => [ 1, 2, 4 ] }, 'Uo2mTestInbox'; my $imap = bless { uid_base => 0, ibx => $ibx }, 'PublicInbox::IMAP'; my $uo2m = $imap->uo2m_ary_new; is_deeply($uo2m, [ 1, 2, undef, 3 ], 'uo2m ary'); $uo2m = uo2m_str_new($imap); is_deeply([ unpack('S*', $uo2m) ], [ 1, 2, 0, 3 ], 'uo2m str'); $ibx->{uid_range} = [ 1, 2, 4, 5, 6 ]; for ([ 1, 2, undef, 3 ], $uo2m) { $imap->{uo2m} = $_; is($imap->uid2msn(1), 1, 'uid2msn'); is($imap->uid2msn(4), 3, 'uid2msn'); is($imap->uo2m_last_uid, 4, 'uo2m_last_uid'); $imap->uo2m_extend(6); is($imap->uid2msn(5), 4, 'uid2msn 5 => 4'); is($imap->uid2msn(6), 5, 'uid2msn 6 => 5'); is($imap->uo2m_last_uid, 6, 'uo2m_last_uid'); my $msn2uid = $imap->msn2uid; my $range = '1,4:5'; $imap->can('msn_to_uid_range')->($msn2uid, $range); is($range, '1,5:6', 'range converted'); } } done_testing; package Uo2mTestInbox; use strict; require PublicInbox::DummyInbox; our @ISA = qw(PublicInbox::DummyInbox); sub over { shift } sub uid_range { my ($self, $beg, $end, undef) = @_; [ grep { $_ >= $beg && $_ <= $end } @{$self->{uid_range}} ]; } public-inbox-1.6.1/t/imap_searchqp.t000066400000000000000000000075541377346120300173770ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use Time::Local qw(timegm); use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address Parse::RecDescent)); use_ok 'PublicInbox::IMAPsearchqp'; use_ok 'PublicInbox::IMAP'; my $imap = bless {}, 'PublicInbox::IMAP'; my $q; my $parse = sub { PublicInbox::IMAPsearchqp::parse($imap, $_[0]) }; $q = $parse->(qq{BODY oops}); is($q->{xap}, 'b:"oops"', 'BODY key supported'); $q = $parse->(qq{OR HEADER TO Brian (OR FROM Ryan (OR TO Joe CC Scott))}); is($q->{sql}, undef, 'not using SQLite for complex query'); is($q->{xap}, '(t:"brian" OR (f:"ryan" OR (t:"joe" OR c:"scott")))', 'complex query matches Xapian query string'); $q = $parse->(qq{HEADER CC b SENTSINCE 2-Oct-1993}); is($q->{xap}, 'c:"b" d:19931002..', 'compound query'); $q = $parse->(qq{CHARSET UTF-8 From b}); is($q->{xap}, 'f:"b"', 'charset handled'); $q = $parse->(qq{CHARSET WTF-8 From b}); like($q, qr/\ANO \[/, 'bad charset rejected'); { # TODO: squelch errors by default? clients could flood logs open my $fh, '>:scalar', \(my $buf) or die; local *STDERR = $fh; $q = $parse->(qq{CHARSET}); } like($q, qr/\ABAD /, 'bad charset rejected'); $q = $parse->(qq{HEADER CC B (SENTBEFORE 2-Oct-1993)}); is($q->{xap}, 'c:"b" d:..19931002', 'compound query w/ parens'); { # limit recursion, stack and CPU cycles ain't free my $n = 10; my $s = ('('x$n ). 'To a' . ( ')'x$n ); $q = $parse->($s); is($q->{xap}, 't:"a"', 'nesting works'); ++$n; $s = ('('x$n ). 'To a' . ( ')'x$n ); my $err = $parse->($s); like($err, qr/\ABAD /, 'reject deep nesting'); } # IMAP has at least 6 ways of interpreting a date { my $t0 = timegm(0, 0, 0, 2, 10 - 1, 1993); my $t1 = $t0 + 86399; # no leap (day|second) support my $s; $q = $parse->($s = qq{SENTBEFORE 2-Oct-1993}); is_deeply($q->{sql}, \" AND ds <= $t0", 'SENTBEFORE SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, 'f:"z" d:..19931002', 'SENTBEFORE Xapian'); $q = $parse->($s = qq{SENTSINCE 2-Oct-1993}); is_deeply($q->{sql}, \" AND ds >= $t0", 'SENTSINCE SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, 'f:"z" d:19931002..', 'SENTSINCE Xapian'); $q = $parse->($s = qq{SENTON 2-Oct-1993}); is_deeply($q->{sql}, \" AND ds >= $t0 AND ds <= $t1", 'SENTON SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, 'f:"z" dt:19931002000000..19931002235959', 'SENTON Xapian'); $q = $parse->($s = qq{BEFORE 2-Oct-1993}); is_deeply($q->{sql}, \" AND ts <= $t0", 'BEFORE SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, qq{f:"z" ts:..$t0}, 'BEFORE Xapian'); $q = $parse->($s = qq{SINCE 2-Oct-1993}); is_deeply($q->{sql}, \" AND ts >= $t0", 'SINCE SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, qq{f:"z" ts:$t0..}, 'SINCE Xapian'); $q = $parse->($s = qq{ON 2-Oct-1993}); is_deeply($q->{sql}, \" AND ts >= $t0 AND ts <= $t1", 'ON SQL'); $q = $parse->("FROM z $s"); is($q->{xap}, qq{f:"z" ts:$t0..$t1}, 'ON Xapian'); } { $imap->{uo2m} = pack('S*', (1..50000)); $imap->{uid_base} = 50000; my $err = $parse->(qq{9:}); my $s; like($err, qr/\ABAD /, 'bad MSN range'); $err = $parse->(qq{UID 9:}); like($err, qr/\ABAD /, 'bad UID range'); $err = $parse->(qq{FROM x UID 9:}); like($err, qr/\ABAD /, 'bad UID range with Xapian'); $err = $parse->(qq{FROM x 9:}); like($err, qr/\ABAD /, 'bad UID range with Xapian'); $q = $parse->($s = qq{UID 50009:50099}); is_deeply($q->{sql}, \' AND (num >= 50009 AND num <= 50099)', 'SQL generated for UID range'); $q = $parse->("CC x $s"); is($q->{xap}, qq{c:"x" uid:50009..50099}, 'Xapian generated for UID range'); $q = $parse->($s = qq{9:99}); is_deeply($q->{sql}, \' AND (num >= 50009 AND num <= 50099)', 'SQL generated for MSN range'); $q = $parse->("CC x $s"); is($q->{xap}, qq{c:"x" uid:50009..50099}, 'Xapian generated for MSN range'); } done_testing; public-inbox-1.6.1/t/imap_tracker.t000066400000000000000000000030571377346120300172160ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ use Test::More; use strict; use PublicInbox::TestCommon; require_mods 'DBD::SQLite'; use_ok 'PublicInbox::IMAPTracker'; my ($tmpdir, $for_destroy) = tmpdir(); mkdir "$tmpdir/old" or die "mkdir $tmpdir/old: $!"; my $old = "$tmpdir/old/imap.sqlite3"; my $cur = "$tmpdir/data/public-inbox/imap.sqlite3"; local $ENV{XDG_DATA_HOME} = "$tmpdir/data"; { local $ENV{PI_DIR} = "$tmpdir/old"; my $tracker = PublicInbox::IMAPTracker->new; ok(-f $cur, '->new creates file'); $tracker = undef; ok(-f $cur, 'file persists after DESTROY'); link $cur, $old or die "link $cur => $old: $!"; unlink $cur or die "unlink $cur: $!"; $tracker = PublicInbox::IMAPTracker->new; ok(!-f $cur, '->new does not create new file if old is present'); } SKIP: { my $nproc = $ENV{TEST_STRESS_NPROC}; skip 'TEST_STRESS_NPROC= not set', 1 unless $nproc; my $nr = $ENV{TEST_STRESS_NR} // 10000; diag "TEST_STRESS_NPROC=$nproc TEST_STRESS_NR=$nr"; require POSIX; for my $n (1..$nproc) { defined(my $pid = fork) or BAIL_OUT "fork: $!"; if ($pid == 0) { my $url = "imap://example.com/INBOX.$$"; my $uidval = time; eval { my $itrk = PublicInbox::IMAPTracker->new($url); for my $uid (1..$nr) { $itrk->update_last($uidval, $uid); my ($uv, $u) = $itrk->get_last; } }; warn "E: $n $$ - $@\n" if $@; POSIX::_exit($@ ? 1 : 0); } } while (1) { my $pid = waitpid(-1, 0); last if $pid < 0; is($?, 0, "$pid exited"); } } done_testing; public-inbox-1.6.1/t/imapd-tls.t000066400000000000000000000155761377346120300164600ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Socket qw(IPPROTO_TCP SOL_SOCKET); use PublicInbox::TestCommon; # IO::Poll is part of the standard library, but distros may split it off... require_mods(qw(DBD::SQLite IO::Socket::SSL Mail::IMAPClient IO::Poll Email::Address::XS||Mail::Address Parse::RecDescent)); my $imap_client = 'Mail::IMAPClient'; $imap_client->can('starttls') or plan skip_all => 'Mail::IMAPClient does not support TLS'; my $can_compress = $imap_client->can('compress'); if ($can_compress) { # hope this gets fixed upstream, soon require PublicInbox::IMAPClient; $imap_client = 'PublicInbox::IMAPClient'; } my $cert = 'certs/server-cert.pem'; my $key = 'certs/server-key.pem'; unless (-r $key && -r $cert) { plan skip_all => "certs/ missing for $0, run $^X ./create-certs.perl in certs/"; } use_ok 'PublicInbox::TLS'; use_ok 'IO::Socket::SSL'; use PublicInbox::InboxWritable; require PublicInbox::SearchIdx; my $version = 1; # v2 needs newer git require_git('2.6') if $version >= 2; my ($tmpdir, $for_destroy) = tmpdir(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $inboxdir = "$tmpdir"; my $pi_config = "$tmpdir/pi_config"; my $group = 'test-imapd-tls'; my $addr = $group . '@example.com'; my $starttls = tcp_server(); my $imaps = tcp_server(); my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'imapd-tls', version => $version, -primary_address => $addr, indexlevel => 'basic', }); $ibx = PublicInbox::InboxWritable->new($ibx, {nproc=>1}); $ibx->init_inbox(0); { open my $fh, '>', $pi_config or BAIL_OUT "open: $!"; print $fh <importer(0); ok($im->add(eml_load('t/data/0001.patch')), 'message added'); $im->done; if ($version == 1) { my $s = PublicInbox::SearchIdx->new($ibx, 1); $s->index_sync; } } my $imaps_addr = $imaps->sockhost . ':' . $imaps->sockport; my $starttls_addr = $starttls->sockhost . ':' . $starttls->sockport; my $env = { PI_CONFIG => $pi_config }; my $td; for my $args ( [ "--cert=$cert", "--key=$key", "-limaps://$imaps_addr", "-limap://$starttls_addr" ], ) { for ($out, $err) { open my $fh, '>', $_ or BAIL_OUT "truncate: $!"; } my $cmd = [ '-imapd', '-W0', @$args, "--stdout=$out", "--stderr=$err" ]; $td = start_script($cmd, $env, { 3 => $starttls, 4 => $imaps }); my %o = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); # start negotiating a slow TLS connection my $slow = tcp_connect($imaps, Blocking => 0); $slow = IO::Socket::SSL->start_SSL($slow, SSL_startHandshake => 0, %o); my $slow_done = $slow->connect_SSL; my @poll; if ($slow_done) { diag('W: connect_SSL early OK, slow client test invalid'); use PublicInbox::Syscall qw(EPOLLIN EPOLLOUT); @poll = (fileno($slow), EPOLLIN | EPOLLOUT); } else { @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } # we should call connect_SSL much later... my %imaps_opt = (User => 'a', Password => 'b', Server => $imaps->sockhost, Port => $imaps->sockport); # IMAPS my $c = $imap_client->new(%imaps_opt, Ssl => [ %o ]); ok($c && $c->IsAuthenticated, 'authenticated'); ok($c->select($group), 'SELECT works'); ok(!(scalar $c->has_capability('STARTTLS')), 'starttls not advertised with IMAPS'); ok(!$c->starttls, "starttls fails"); ok($c->has_capability('COMPRESS') || $c->has_capability('COMPRESS=DEFLATE'), 'compress advertised'); ok($c->compress, 'compression enabled with IMAPS'); ok(!$c->starttls, 'starttls still fails'); ok($c->noop, 'noop succeeds'); ok($c->logout, 'logout succeeds'); # STARTTLS my %imap_opt = (Server => $starttls->sockhost, Port => $starttls->sockport); $c = $imap_client->new(%imap_opt); ok(scalar $c->has_capability('STARTTLS'), 'starttls advertised'); ok($c->Starttls([ %o ]), 'set starttls options'); ok($c->starttls, '->starttls works'); ok(!(scalar($c->has_capability('STARTTLS'))), 'starttls not advertised'); ok(!$c->starttls, '->starttls again fails'); ok(!(scalar($c->has_capability('STARTTLS'))), 'starttls still not advertised'); ok($c->examine($group), 'EXAMINE works'); ok($c->noop, 'NOOP works'); ok($c->compress, 'compression enabled with IMAPS'); ok($c->noop, 'NOOP works after compress'); ok($c->logout, 'logout succeeds after compress'); # STARTTLS with bad hostname $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.invalid'; $c = $imap_client->new(%imap_opt); ok(scalar $c->has_capability('STARTTLS'), 'starttls advertised'); ok($c->Starttls([ %o ]), 'set starttls options'); ok(!$c->starttls, '->starttls fails with bad hostname'); $c = $imap_client->new(%imap_opt); ok($c->noop, 'NOOP still works from plain IMAP'); # IMAPS with bad hostname $c = $imap_client->new(%imaps_opt, Ssl => [ %o ]); is($c, undef, 'IMAPS fails with bad hostname'); # make hostname valid $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.local'; $c = $imap_client->new(%imaps_opt, Ssl => [ %o ]); ok($c, 'IMAPS succeeds again with valid hostname'); # slow TLS connection did not block the other fast clients while # connecting, finish it off: until ($slow_done) { IO::Poll::_poll(-1, @poll); $slow_done = $slow->connect_SSL and last; @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } $slow->blocking(1); ok(sysread($slow, my $greet, 4096) > 0, 'slow got a greeting'); like($greet, qr/\A\* OK \[CAPABILITY IMAP4rev1 /, 'got greeting'); is(syswrite($slow, "1 LOGOUT\r\n"), 10, 'slow wrote LOGOUT'); ok(sysread($slow, my $end, 4096) > 0, 'got end'); is(sysread($slow, my $eof, 4096), 0, 'got EOF'); SKIP: { skip 'TCP_DEFER_ACCEPT is Linux-only', 2 if $^O ne 'linux'; my $var = eval { Socket::TCP_DEFER_ACCEPT() } // 9; defined(my $x = getsockopt($imaps, IPPROTO_TCP, $var)) or die; ok(unpack('i', $x) > 0, 'TCP_DEFER_ACCEPT set on IMAPS'); defined($x = getsockopt($starttls, IPPROTO_TCP, $var)) or die; is(unpack('i', $x), 0, 'TCP_DEFER_ACCEPT is 0 on plain IMAP'); }; SKIP: { skip 'SO_ACCEPTFILTER is FreeBSD-only', 2 if $^O ne 'freebsd'; if (system('kldstat -m accf_data >/dev/null')) { skip 'accf_data not loaded? kldload accf_data', 2; } require PublicInbox::Daemon; my $var = PublicInbox::Daemon::SO_ACCEPTFILTER(); my $x = getsockopt($imaps, SOL_SOCKET, $var); like($x, qr/\Adataready\0+\z/, 'got dataready accf for IMAPS'); $x = getsockopt($starttls, IPPROTO_TCP, $var); is($x, undef, 'no BSD accept filter for plain IMAP'); }; $c = undef; $td->kill; $td->join; is($?, 0, 'no error in exited process'); open my $fh, '<', $err or BAIL_OUT "open $err failed: $!"; my $eout = do { local $/; <$fh> }; unlike($eout, qr/wide/i, 'no Wide character warnings'); } done_testing; public-inbox-1.6.1/t/imapd.t000066400000000000000000000502101377346120300156400ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ # end-to-end IMAP tests, see unit tests in t/imap.t, too use strict; use Test::More; use Time::HiRes (); use PublicInbox::TestCommon; use PublicInbox::Config; use PublicInbox::Spawn qw(which); require_mods(qw(DBD::SQLite Mail::IMAPClient Mail::IMAPClient::BodyStructure Email::Address::XS||Mail::Address Parse::RecDescent)); my $imap_client = 'Mail::IMAPClient'; my $can_compress = $imap_client->can('compress'); if ($can_compress) { # hope this gets fixed upstream, soon require PublicInbox::IMAPClient; $imap_client = 'PublicInbox::IMAPClient'; } require_ok 'PublicInbox::IMAP'; my $first_range = '0'; my $level = 'basic'; SKIP: { require_mods('Search::Xapian', 1); $level = 'medium'; }; my @V = (1); push(@V, 2) if require_git('2.6', 1); my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/home"; local $ENV{HOME} = $home; for my $V (@V) { my $addr = "i$V\@example.com"; my $name = "i$V"; my $url = "http://example.com/i$V"; my $inboxdir = "$tmpdir/$name"; my $folder = "inbox.i$V"; my $cmd = ['-init', "-V$V", "-L$level", "--ng=$folder", $name, $inboxdir, $url, $addr]; run_script($cmd) or BAIL_OUT("init $name"); if ($V == 1) { xsys(qw(git config), "--file=$ENV{HOME}/.public-inbox/config", 'publicinboxmda.spamcheck', 'none') == 0 or BAIL_OUT("config: $?"); } open(my $fh, '<', 't/utf8.eml') or BAIL_OUT("open t/utf8.eml: $!"); my $env = { ORIGINAL_RECIPIENT => $addr }; run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT('-mda delivery'); if ($V == 1) { run_script(['-index', $inboxdir]) or BAIL_OUT("index $?"); } } my $sock = tcp_server(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $cmd = [ '-imapd', '-W0', "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, undef, { 3 => $sock }) or BAIL_OUT("-imapd: $?"); my %mic_opt = ( Server => $sock->sockhost, Port => $sock->sockport, Uid => 1, ); my $mic = $imap_client->new(%mic_opt); my $pre_login_capa = $mic->capability; is(grep(/\AAUTH=ANONYMOUS\z/, @$pre_login_capa), 1, 'AUTH=ANONYMOUS advertised pre-login'); $mic->User('lorelei'); $mic->Password('Hunter2'); ok($mic->login && $mic->IsAuthenticated, 'LOGIN works'); my $post_login_capa = $mic->capability; ok(join("\n", @$pre_login_capa) ne join("\n", @$post_login_capa), 'got different capabilities post-login'); $mic_opt{Authmechanism} = 'ANONYMOUS'; $mic_opt{Authcallback} = sub { '' }; $mic = $imap_client->new(%mic_opt); ok($mic && $mic->login && $mic->IsAuthenticated, 'AUTHENTICATE ANONYMOUS'); my $post_auth_anon_capa = $mic->capability; is_deeply($post_auth_anon_capa, $post_login_capa, 'auth anon has same capabilities'); my $e; ok($mic->noop, 'NOOP'); ok($mic->noop, 'NOOP (again)'); # for warnings ok(!$mic->examine('foo') && ($e = $@), 'EXAMINE non-existent'); like($e, qr/\bNO\b/, 'got a NO on EXAMINE for non-existent'); ok(!$mic->select('foo') && ($e = $@), 'EXAMINE non-existent'); like($e, qr/\bNO\b/, 'got a NO on EXAMINE for non-existent'); my $mailbox1 = "inbox.i1.$first_range"; ok($mic->select('inbox.i1'), 'SELECT on parent succeeds'); ok($mic->noop, 'NOOP while selected'); ok($mic->noop, 'NOOP again while selected'); # check warnings later ok($mic->select($mailbox1), 'SELECT succeeds'); ok($mic->examine($mailbox1), 'EXAMINE succeeds'); my @raw = $mic->status($mailbox1, qw(Messages uidnext uidvalidity)); is(scalar(@raw), 2, 'got status response'); like($raw[0], qr/\A\*\x20STATUS\x20inbox\.i1\.$first_range\x20 \(MESSAGES\x20\d+\x20UIDNEXT\x20\d+\x20UIDVALIDITY\x20\d+\)\r\n/sx); like($raw[1], qr/\A\S+ OK /, 'finished status response'); my @orig_list = @raw = $mic->list; like($raw[0], qr/^\* LIST \(.*?\) "\." INBOX/, 'got an inbox'); like($raw[-1], qr/^\S+ OK /, 'response ended with OK'); is(scalar(@raw), scalar(@V) * 2 + 2, 'default LIST response'); @raw = $mic->list('', 'inbox.i1'); is(scalar(@raw), 2, 'limited LIST response'); like($raw[0], qr/^\* LIST \(.*?\) "\." INBOX/, 'got an inbox.i1'); like($raw[-1], qr/^\S+ OK /, 'response ended with OK'); my $ret = $mic->search('all') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [ 1 ], 'search all works'); $ret = $mic->search('uid 1') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [ 1 ], 'search UID 1 works'); $ret = $mic->search('uid 1:1') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [ 1 ], 'search UID 1:1 works'); $ret = $mic->search('uid 1:*') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [ 1 ], 'search UID 1:* works'); $ret = $mic->search('DELETED') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [], 'searching for DELETED returns nothing'); SKIP: { skip 'Xapian missing', 8 if $level eq 'basic'; my $x = $mic->search(qw(smaller 99999)); is_deeply($x, [1], 'SMALLER works with Xapian (hit)'); $x = $mic->search(qw(smaller 9)); is_deeply($x, [], 'SMALLER works with Xapian (miss)'); $x = $mic->search(qw(larger 99999)); is_deeply($x, [], 'LARGER works with Xapian (miss)'); $x = $mic->search(qw(larger 9)); is_deeply($x, [1], 'LARGER works with Xapian (hit)'); $x = $mic->search(qw(HEADER Message-ID testmessage@example.com)); is_deeply($x, [1], 'HEADER Message-ID works'); $x = $mic->search(qw(DRAFT HEADER Message-ID testmessage@example.com)); is_deeply($x, [], 'impossible (DRAFT) key filters out match'); $x = $mic->search(qw(HEADER Message-ID miss)); is_deeply($x, [], 'HEADER Message-ID can miss'); my @q = qw[OR HEADER Message-ID testmessage@example.com (OR FROM Ryan (OR TO Joe CC Scott))]; $x = $mic->search(join(' ', @q)); is_deeply($x, [1], 'nested query works'); } is_deeply(scalar $mic->flags('1'), [], '->flags works'); { # RFC 3501 section 6.4.8 states: # Also note that a UID range of 559:* always includes the # UID of the last message in the mailbox, even if 559 is # higher than any assigned UID value. my $exp = $mic->fetch_hash(1, 'UID'); $ret = $mic->fetch_hash('559:*', 'UID'); is_deeply($ret, $exp, 'beginning range too big'); { my @w; # Mail::IMAPClient hits a warning via overload local $SIG{__WARN__} = sub { push @w, @_ }; $ret = $mic->fetch_hash(my $r = '559:558', 'UID'); is_deeply($ret, {}, "out-of-range UID FETCH $r"); @w = grep(!/\boverload\.pm\b/, @w); is_deeply(\@w, [], 'no unexpected warning'); } $ret = $mic->fetch_hash(my $r = '558:559', 'UID'); is_deeply($ret, {}, "out-of-range UID FETCH $r"); } for my $r ('1:*', '1') { $ret = $mic->fetch_hash($r, 'RFC822') or BAIL_OUT "FETCH $@"; is_deeply([keys %$ret], [1]); like($ret->{1}->{RFC822}, qr/\r\n\r\nThis is a test/, 'read full'); # ensure Mail::IMAPClient behaves my $str = $mic->message_string($r) or BAIL_OUT "->message_string: $@"; is($str, $ret->{1}->{RFC822}, '->message_string works as expected'); my $sz = $mic->fetch_hash($r, 'RFC822.size') or BAIL_OUT "FETCH $@"; is($sz->{1}->{'RFC822.SIZE'}, length($ret->{1}->{RFC822}), 'RFC822.SIZE'); $ret = $mic->fetch_hash($r, 'RFC822.HEADER') or BAIL_OUT "FETCH $@"; is_deeply([keys %$ret], [1]); like($ret->{1}->{'RFC822.HEADER'}, qr/^Message-ID: /ms, 'read header'); $ret = $mic->fetch_hash($r, 'INTERNALDATE') or BAIL_OUT "FETCH $@"; is($ret->{1}->{'INTERNALDATE'}, '01-Jan-1970 00:00:00 +0000', 'internaldate matches'); ok(!$mic->fetch_hash($r, 'INFERNALDATE'), 'bogus attribute fails'); my $envelope = $mic->get_envelope($r) or BAIL_OUT("get_envelope: $@"); is($envelope->{bcc}, 'NIL', 'empty bcc'); is($envelope->{messageid}, '', 'messageid'); is(scalar @{$envelope->{to}}, 1, 'one {to} header'); # *sigh* too much to verify... #use Data::Dumper; diag Dumper($envelope); $ret = $mic->fetch_hash($r, 'FLAGS') or BAIL_OUT "FETCH $@"; is_deeply($ret->{1}->{FLAGS}, '', 'no flags'); $ret = $mic->fetch_hash($r, 'BODY[1]') or BAIL_OUT "FETCH $@"; like($ret->{1}->{'BODY[1]'}, qr/\AThis is a test message/, 'BODY[1]'); $ret = $mic->fetch_hash($r, 'BODY[1]<1>') or BAIL_OUT "FETCH $@"; like($ret->{1}->{'BODY[1]<1>'}, qr/\Ahis is a test message/, 'BODY[1]<1>'); $ret = $mic->fetch_hash($r, 'BODY[1]<2.3>') or BAIL_OUT "FETCH $@"; is($ret->{1}->{'BODY[1]<2>'}, "is ", 'BODY[1]<2.3>'); $ret = $mic->bodypart_string($r, 1, 3, 2) or BAIL_OUT "bodypart_string $@"; is($ret, "is ", 'bodypart string'); $ret = $mic->fetch_hash($r, 'BODY[HEADER.FIELDS.NOT (Message-ID)]') or BAIL_OUT "FETCH $@"; $ret = $ret->{1}->{'BODY[HEADER.FIELDS.NOT (MESSAGE-ID)]'}; unlike($ret, qr/message-id/i, 'Message-ID excluded'); like($ret, qr/\r\n\r\n\z/s, 'got header end'); $ret = $mic->fetch_hash($r, 'BODY[HEADER.FIELDS (Message-ID)]') or BAIL_OUT "FETCH $@"; is($ret->{1}->{'BODY[HEADER.FIELDS (MESSAGE-ID)]'}, 'Message-ID: '."\r\n\r\n", 'got only Message-ID'); my $bs = $mic->get_bodystructure($r) or BAIL_OUT("bodystructure: $@"); ok($bs, 'got a bodystructure'); is(lc($bs->bodytype), 'text', '->bodytype'); is(lc($bs->bodyenc), '8bit', '->bodyenc'); } ok($mic->has_capability('COMPRESS') || $mic->has_capability('COMPRESS=DEFLATE'), 'deflate cap'); SKIP: { skip 'Mail::IMAPClient too old for ->compress', 2 if !$can_compress; my $c = $imap_client->new(%mic_opt); ok($c && $c->compress, 'compress enabled'); ok($c->examine($mailbox1), 'EXAMINE succeeds after COMPRESS'); $ret = $c->search('uid 1:*') or BAIL_OUT "SEARCH FAIL $@"; is_deeply($ret, [ 1 ], 'search UID 1:* works after compression'); } ok($mic->logout, 'logout works'); my $have_inotify = eval { require Linux::Inotify2; 1 }; my $pi_config = PublicInbox::Config->new; $pi_config->each_inbox(sub { my ($ibx) = @_; my $env = { ORIGINAL_RECIPIENT => $ibx->{-primary_address} }; my $name = $ibx->{name}; my $ng = $ibx->{newsgroup}; my $mic = $imap_client->new(%mic_opt); ok($mic && $mic->login && $mic->IsAuthenticated, "authed $name"); my $mb = "$ng.$first_range"; my $uidnext = $mic->uidnext($mb); # we'll fetch BODYSTRUCTURE on this ok($uidnext, 'got uidnext for later fetch'); ok($mic->has_capability('IDLE'), "IDLE capa $name"); ok(!$mic->idle, "IDLE fails w/o SELECT/EXAMINE $name"); ok($mic->examine($mb), "EXAMINE $ng succeeds"); ok(my $idle_tag = $mic->idle, "IDLE succeeds on $ng"); open(my $fh, '<', 't/data/message_embed.eml') or BAIL_OUT("open: $!"); run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT('-mda delivery'); my $t0 = Time::HiRes::time(); ok(my @res = $mic->idle_data(11), "IDLE succeeds on $ng"); is(grep(/\A\* [0-9] EXISTS\b/, @res), 1, 'got EXISTS message'); ok((Time::HiRes::time() - $t0) < 10, 'IDLE client notified'); my (@ino_info, $ino_fdinfo); SKIP: { skip 'no inotify support', 1 unless $have_inotify; skip 'missing /proc/$PID/fd', 1 if !-d "/proc/$td->{pid}/fd"; my @ino = grep { readlink($_) =~ /\binotify\b/ } glob("/proc/$td->{pid}/fd/*"); is(scalar(@ino), 1, 'only one inotify FD'); my $ino_fd = (split('/', $ino[0]))[-1]; $ino_fdinfo = "/proc/$td->{pid}/fdinfo/$ino_fd"; if (open my $fh, '<', $ino_fdinfo) { local $/ = "\n"; @ino_info = grep(/^inotify wd:/, <$fh>); ok(scalar(@ino_info), 'inotify has watches'); } else { skip "$ino_fdinfo missing: $!", 1; } }; # ensure IDLE persists across HUP, w/o extra watches or FDs $td->kill('HUP') or BAIL_OUT "failed to kill -imapd: $!"; for my $n (1..2) { # kick the event loop so we know HUP is done my $m = $imap_client->new(%mic_opt); ok($m->login && $m->IsAuthenticated && $m->logout, "connection $n works after HUP"); } open($fh, '<', 't/data/0001.patch') or BAIL_OUT("open: $!"); run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or BAIL_OUT('-mda delivery'); $t0 = Time::HiRes::time(); ok(@res = $mic->idle_data(11), "IDLE succeeds on $ng after HUP"); is(grep(/\A\* [0-9] EXISTS\b/, @res), 1, 'got EXISTS message'); ok((Time::HiRes::time() - $t0) < 10, 'IDLE client notified'); ok($mic->done($idle_tag), 'IDLE DONE'); my $bs = $mic->get_bodystructure($uidnext); ok($bs, 'BODYSTRUCTURE ok for deeply nested'); $ret = $mic->fetch_hash($uidnext, 'BODY') or BAIL_OUT "FETCH $@"; ok($ret->{$uidnext}->{BODY}, 'got something in BODY'); # this matches dovecot behavior $ret = $mic->fetch_hash($uidnext, 'BODY[1]') or BAIL_OUT "FETCH $@"; is($ret->{$uidnext}->{'BODY[1]'}, "testing embedded message harder\r\n", 'BODY[1]'); $ret = $mic->fetch_hash($uidnext, 'BODY[2]') or BAIL_OUT "FETCH $@"; like($ret->{$uidnext}->{'BODY[2]'}, qr/\ADate: Sat, 18 Apr 2020 22:20:20 /, 'BODY[2]'); $ret = $mic->fetch_hash($uidnext, 'BODY[2.1.1]') or BAIL_OUT "FETCH $@"; is($ret->{$uidnext}->{'BODY[2.1.1]'}, "testing embedded message\r\n", 'BODY[2.1.1]'); $ret = $mic->fetch_hash($uidnext, 'BODY[2.1.2]') or BAIL_OUT "FETCH $@"; like($ret->{$uidnext}->{'BODY[2.1.2]'}, qr/\AFrom: /, 'BODY[2.1.2] tip matched'); like($ret->{$uidnext}->{'BODY[2.1.2]'}, # trailing CRLF may vary depending on MIME parser qr/done_testing;(?:\r\n){1,2}\z/, 'BODY[2.1.2] tail matched'); $ret = $mic->fetch_hash("1:$uidnext", 'BODY[2.HEADER]') or BAIL_OUT "2.HEADER $@"; like($ret->{$uidnext}->{'BODY[2.HEADER]'}, qr/\ADate: Sat, 18 Apr 2020 22:20:20 /, '2.HEADER of message/rfc822'); $ret = $mic->fetch_hash($uidnext, 'BODY[2.MIME]') or BAIL_OUT "2.MIME $@"; is($ret->{$uidnext}->{'BODY[2.MIME]'}, <search('SENTON' => '18-Apr-2020'); is_deeply(\@hits, [ $uidnext ], 'search with date condition works'); ok($mic->examine($ng), 'EXAMINE on dummy'); @hits = $mic->search('SENTSINCE' => '18-Apr-2020'); is_deeply(\@hits, [], 'search on dummy with condition works'); ok(!$mic->search('SENTSINCE' => '18-Abr-2020'), 'bad month fails'); $mic->Uid(0); my $ret = $mic->fetch_hash(2, 'RFC822'); is_deeply($ret, {}, 'MSN FETCH on empty dummy will not trigger warnings, later'); }); # each_inbox # message sequence numbers :< is($mic->Uid(0), 0, 'disable UID on '.ref($mic)); ok($mic->reconnect, 'reconnected'); $ret = $mic->fetch_hash('1,2:3', 'RFC822') or BAIL_OUT "FETCH $@"; is(scalar keys %$ret, 3, 'got all 3 messages with comma-separated sequence'); $ret = $mic->fetch_hash('1:*', 'RFC822') or BAIL_OUT "FETCH $@"; is(scalar keys %$ret, 3, 'got all 3 messages'); SKIP: { # do any clients use non-UID IMAP SEARCH? skip 'Xapian missing', 2 if $level eq 'basic'; my $x = $mic->search('all'); is_deeply($x, [1, 2, 3], 'MSN SEARCH works before rm'); $x = $mic->search(qw(header subject embedded)); is_deeply($x, [2], 'MSN SEARCH on Subject works before rm'); } { my $rdr = { 0 => \($ret->{1}->{RFC822}) }; my $env = { HOME => $ENV{HOME} }; my @cmd = qw(-learn rm --all); run_script(\@cmd, $env, $rdr) or BAIL_OUT('-learn rm'); } SKIP: { # do any clients use non-UID IMAP SEARCH? We only ensure # MSN "SEARCH" can return a result which can be retrieved # via MSN "FETCH" skip 'Xapian missing', 3 if $level eq 'basic'; my $x = $mic->search(qw(header subject embedded)); is(scalar(@$x), 1, 'MSN SEARCH on Subject works after rm'); SKIP: { skip 'previous test failed', 1 if !scalar(@$x); $x = $mic->message_string($x->[0]); is($x, $ret->{2}->{RFC822}, 'message 2 unchanged'); } $x = $mic->search(qw(text embedded)); is(scalar(@$x), 1, 'MSN SEARCH on TEXT works after rm'); } # FIXME? no EXPUNGE response, yet my $r2 = $mic->fetch_hash('1:*', 'BODY.PEEK[]') or BAIL_OUT "FETCH $@"; is(scalar keys %$r2, 2, 'did not get all 3 messages'); is($r2->{2}->{'BODY[]'}, $ret->{2}->{RFC822}, 'message 2 unchanged'); is($r2->{3}->{'BODY[]'}, $ret->{3}->{RFC822}, 'message 3 unchanged'); $r2 = $mic->fetch_hash(2, 'BODY.PEEK[HEADER.FIELDS (message-id)]') or BAIL_OUT "FETCH $@"; is($r2->{2}->{'BODY[HEADER.FIELDS (MESSAGE-ID)]'}, 'Message-ID: <20200418222508.GA13918@dcvr>'."\r\n\r\n", 'BODY.PEEK[HEADER.FIELDS ...] drops .PEEK'); { my @new_list = $mic->list; # tag differs in [-1] like($orig_list[-1], qr/\A\S+ OK List done\r\n/, 'orig LIST'); like($new_list[-1], qr/\A\S+ OK List done\r\n/, 'new LIST'); pop @new_list; pop @orig_list; # TODO: not sure if sort order matters, imapd_refresh_finalize # doesn't sort, hopefully clients don't care... @new_list = sort @new_list; @orig_list = sort @orig_list; is_deeply(\@new_list, \@orig_list, 'LIST identical'); } ok($mic->close, 'CLOSE works'); ok(!$mic->close, 'CLOSE not idempotent'); ok($mic->logout, 'logged out'); { my $c = tcp_connect($sock); $c->autoflush(1); like(<$c>, qr/\* OK/, 'got a greeting'); print $c "\r\n"; like(<$c>, qr/\A\* BAD Error in IMAP command/, 'empty line'); print $c "tagonly\r\n"; like(<$c>, qr/\Atagonly BAD Error in IMAP command/, 'tag-only line'); } SKIP: { use_ok 'PublicInbox::Watch'; use_ok 'PublicInbox::InboxIdle'; require_git('1.8.5', 1) or skip('git 1.8.5+ needed for --urlmatch', 4); my $old_env = { HOME => $ENV{HOME} }; my $home = "$tmpdir/watch_home"; mkdir $home or BAIL_OUT $!; mkdir "$home/.public-inbox" or BAIL_OUT $!; local $ENV{HOME} = $home; my $name = 'watchimap'; my $addr = "i1\@example.com"; my $url = "http://example.com/i1"; my $inboxdir = "$tmpdir/watchimap"; my $cmd = ['-init', '-V2', '-Lbasic', $name, $inboxdir, $url, $addr]; my ($ihost, $iport) = ($sock->sockhost, $sock->sockport); my $imapurl = "imap://$ihost:$iport/inbox.i1.0"; run_script($cmd) or BAIL_OUT("init $name"); xsys(qw(git config), "--file=$home/.public-inbox/config", "publicinbox.$name.watch", $imapurl) == 0 or BAIL_OUT "git config $?"; my $cfg = PublicInbox::Config->new; PublicInbox::DS->Reset; my $ii = PublicInbox::InboxIdle->new($cfg); my $cb = sub { PublicInbox::DS->SetPostLoopCallback(sub {}) }; my $obj = bless \$cb, 'PublicInbox::TestCommon::InboxWakeup'; $cfg->each_inbox(sub { $_[0]->subscribe_unlock('ident', $obj) }); my $watcherr = "$tmpdir/watcherr"; open my $err_wr, '>>', $watcherr or BAIL_OUT $!; open my $err, '<', $watcherr or BAIL_OUT $!; my $w = start_script(['-watch'], undef, { 2 => $err_wr }); diag 'waiting for initial fetch...'; PublicInbox::DS->EventLoop; diag 'inbox unlocked on initial fetch, waiting for IDLE'; tick until (grep(/I: \S+ idling/, <$err>)); open my $fh, '<', 't/iso-2202-jp.eml' or BAIL_OUT $!; $old_env->{ORIGINAL_RECIPIENT} = $addr; ok(run_script([qw(-mda --no-precheck)], $old_env, { 0 => $fh }), 'delivered a message for IDLE to kick -watch'); diag 'waiting for IMAP IDLE wakeup'; PublicInbox::DS->SetPostLoopCallback(undef); PublicInbox::DS->EventLoop; diag 'inbox unlocked on IDLE wakeup'; # try again with polling xsys(qw(git config), "--file=$home/.public-inbox/config", 'imap.PollInterval', 0.11) == 0 or BAIL_OUT "git config $?"; $w->kill('HUP'); diag 'waiting for -watch reload + initial fetch'; tick until (grep(/I: will check/, <$err>)); open $fh, '<', 't/psgi_attach.eml' or BAIL_OUT $!; ok(run_script([qw(-mda --no-precheck)], $old_env, { 0 => $fh }), 'delivered a message for -watch PollInterval'); diag 'waiting for PollInterval wakeup'; PublicInbox::DS->SetPostLoopCallback(undef); PublicInbox::DS->EventLoop; diag 'inbox unlocked (poll)'; $w->kill; $w->join; is($?, 0, 'no error in exited -watch process'); $cfg->each_inbox(sub { shift->unsubscribe_unlock('ident') }); $ii->close; PublicInbox::DS->Reset; seek($err, 0, 0); my @err = grep(!/^I:/, <$err>); is(@err, 0, 'no warnings/errors from -watch'.join(' ', @err)); if ($ENV{TEST_KILL_IMAPD}) { # not sure how reliable this test can be xsys(qw(git config), "--file=$home/.public-inbox/config", qw(--unset imap.PollInterval)) == 0 or BAIL_OUT "git config $?"; truncate($err_wr, 0) or BAIL_OUT $!; my @t0 = times; $w = start_script(['-watch'], undef, { 2 => $err_wr }); seek($err, 0, 0); tick until (grep(/I: \S+ idling/, <$err>)); diag 'killing imapd, waiting for CPU spins'; my $delay = 0.11; $td->kill(9); tick $delay; $w->kill; $w->join; is($?, 0, 'no error in exited -watch process'); my @t1 = times; my $c = $t1[2] + $t1[3] - $t0[2] - $t0[3]; my $thresh = (0.9 * $delay); diag "c=$c, threshold=$thresh"; ok($c < $thresh, 'did not burn much CPU'); is_deeply([grep(/ line \d+$/m, <$err>)], [], 'no backtraces from errors'); } } $td->kill; $td->join; is($?, 0, 'no error in exited process') if !$ENV{TEST_KILL_IMAPD}; open my $fh, '<', $err or BAIL_OUT("open $err failed: $!"); my $eout = do { local $/; <$fh> }; unlike($eout, qr/wide/i, 'no Wide character warnings'); unlike($eout, qr/uninitialized/i, 'no uninitialized warnings'); done_testing; public-inbox-1.6.1/t/import.t000066400000000000000000000104711377346120300160650ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::Smsg; use PublicInbox::Git; use PublicInbox::Import; use PublicInbox::Spawn qw(spawn); use Fcntl qw(:DEFAULT SEEK_SET); use PublicInbox::TestCommon; use MIME::Base64 3.05; # Perl 5.10.0 / 5.9.2 my ($dir, $for_destroy) = tmpdir(); my $git = PublicInbox::Git->new($dir); my $im = PublicInbox::Import->new($git, 'testbox', 'test@example'); $im->init_bare; my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: b@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $v2 = require_git(2.6, 1); my $smsg = bless {}, 'PublicInbox::Smsg' if $v2; like($im->add($mime, undef, $smsg), qr/\A:[0-9]+\z/, 'added one message'); if ($v2) { like($smsg->{blob}, qr/\A[a-f0-9]{40}\z/, 'got last object_id'); my $raw_email = $smsg->{-raw_email}; is($mime->as_string, $$raw_email, 'string matches'); is($smsg->{raw_bytes}, length($$raw_email), 'length matches'); my @cmd = ('git', "--git-dir=$git->{git_dir}", qw(hash-object --stdin)); open my $in, '+<', undef or BAIL_OUT "open(+<): $!"; print $in $mime->as_string or die "write failed: $!"; $in->flush or die "flush failed: $!"; seek($in, 0, SEEK_SET); open my $out, '+<', undef or BAIL_OUT "open(+<): $!"; my $pid = spawn(\@cmd, {}, { 0 => $in, 1 => $out }); is(waitpid($pid, 0), $pid, 'waitpid succeeds on hash-object'); is($?, 0, 'hash-object'); seek($out, 0, SEEK_SET); chomp(my $hashed_obj = <$out>); is($hashed_obj, $smsg->{blob}, "blob object_id matches exp"); } $im->done; my @revs = $git->qx(qw(rev-list HEAD)); is(scalar @revs, 1, 'one revision created'); my $odd = '"=?iso-8859-1?Q?J_K=FCpper?= header_set('From', $odd); $mime->header_set('Message-ID', ''); $mime->header_set('Subject', 'msg2'); like($im->add($mime, sub { $mime }), qr/\A:\d+\z/, 'added 2nd message'); $im->done; @revs = $git->qx(qw(rev-list HEAD)); is(scalar @revs, 2, '2 revisions exist'); is($im->add($mime), undef, 'message only inserted once'); $im->done; @revs = $git->qx(qw(rev-list HEAD)); is(scalar @revs, 2, '2 revisions exist'); foreach my $c ('c'..'z') { $mime->header_set('Message-ID', "<$c\@example.com>"); $mime->header_set('Subject', "msg - $c"); like($im->add($mime), qr/\A:\d+\z/, "added $c message"); } $im->done; @revs = $git->qx(qw(rev-list HEAD)); is(scalar @revs, 26, '26 revisions exist after mass import'); my ($mark, $msg) = $im->remove($mime); like($mark, qr/\A:\d+\z/, 'got mark'); like(ref($msg), qr/\bPublicInbox::(?:Eml|MIME)\b/, 'got old message deleted'); is(undef, $im->remove($mime), 'remove is idempotent'); # mismatch on identical Message-ID $mime->header_set('Message-ID', ''); ($mark, $msg) = $im->remove($mime); is($mark, 'MISMATCH', 'mark == MISMATCH on mismatch'); is($msg->header('Message-ID'), '', 'Message-ID matches'); isnt($msg->header('Subject'), $mime->header('Subject'), 'subject mismatch'); $mime->header_set('Message-Id', ''); is($im->add($mime, sub { undef }), undef, 'check callback fails'); is($im->remove($mime), undef, 'message not added, so not removed'); is(undef, $im->checkpoint, 'checkpoint works before ->done'); $im->done; is(undef, $im->checkpoint, 'checkpoint works after ->done'); $im->checkpoint; my $nogit = PublicInbox::Git->new("$dir/non-existent/dir"); eval { my $nope = PublicInbox::Import->new($nogit, 'nope', 'no@example.com'); $nope->add($mime); }; ok($@, 'Import->add fails on non-existent dir'); my @cls = qw(PublicInbox::Eml); SKIP: { require_mods('PublicInbox::MIME', 1); push @cls, 'PublicInbox::MIME'; }; $main::badchars = "\n\0\r"; my $from = '=?UTF-8?B?'. encode_base64("B\ra\nd\0\$main::badchars", ''). '?='; for my $cls (@cls) { my $eml = $cls->new(< Message-ID: <$cls\@example.com> EOF ok($im->add($eml), "added $cls message with nasty char in From"); } $im->done; my $bref = $git->cat_file('HEAD'); like($$bref, qr/^author Ba d \$main::badchars /sm, 'latest commit accepted by spammer'); $git->qx(qw(fsck --no-progress --strict)); is($?, 0, 'fsck reported no errors'); $main::badchars = undef; done_testing(); public-inbox-1.6.1/t/inbox.t000066400000000000000000000027201377346120300156700ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use_ok 'PublicInbox::Inbox'; use File::Temp 0.19 (); my $x = PublicInbox::Inbox->new({url => [ '//example.com/test/' ]}); is($x->base_url, 'https://example.com/test/', 'expanded protocol-relative'); $x = PublicInbox::Inbox->new({url => [ 'http://example.com/test' ]}); is($x->base_url, 'http://example.com/test/', 'added trailing slash'); $x = PublicInbox::Inbox->new({}); is($x->base_url, undef, 'undef base_url allowed'); my $tmpdir = File::Temp->newdir('pi-inbox-XXXXXX', TMPDIR => 1); $x->{inboxdir} = $tmpdir->dirname; is_deeply($x->cloneurl, [], 'no cloneurls'); is($x->description, '($INBOX_DIR/description missing)', 'default description'); { open my $fh, '>', "$x->{inboxdir}/cloneurl" or die; print $fh "https://example.com/inbox\n" or die; close $fh or die; open $fh, '>', "$x->{inboxdir}/description" or die; print $fh "\xc4\x80blah\n" or die; close $fh or die; } is_deeply($x->cloneurl, ['https://example.com/inbox'], 'cloneurls update'); ok(utf8::valid($x->description), 'description is utf8::valid'); is($x->description, "\x{100}blah", 'description updated'); is(unlink(glob("$x->{inboxdir}/*")), 2, 'unlinked cloneurl & description'); is_deeply($x->cloneurl, ['https://example.com/inbox'], 'cloneurls memoized'); is($x->description, "\x{100}blah", 'description memoized'); done_testing(); public-inbox-1.6.1/t/inbox_idle.t000066400000000000000000000041221377346120300166630ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use Test::More; use PublicInbox::TestCommon; use PublicInbox::Config; require_git 2.6; require_mods(qw(DBD::SQLite)); require PublicInbox::SearchIdx; use_ok 'PublicInbox::InboxIdle'; use PublicInbox::InboxWritable; my ($tmpdir, $for_destroy) = tmpdir(); for my $V (1, 2) { my $inboxdir = "$tmpdir/$V"; mkdir $inboxdir or BAIL_OUT("mkdir: $!"); my %opt = ( inboxdir => $inboxdir, name => 'inbox-idle', version => $V, -primary_address => 'test@example.com', indexlevel => 'basic', ); my $ibx = PublicInbox::Inbox->new({ %opt }); $ibx = PublicInbox::InboxWritable->new($ibx); my $obj = InboxIdleTestObj->new; $ibx->init_inbox(0); my $im = $ibx->importer(0); if ($V == 1) { my $sidx = PublicInbox::SearchIdx->new($ibx, 1); $sidx->idx_acquire; $sidx->set_metadata_once; $sidx->idx_release; # allow watching on lockfile } my $pi_config = PublicInbox::Config->new(\<each_inbox(sub { shift->subscribe_unlock($ident, $obj) }); my $ii = PublicInbox::InboxIdle->new($pi_config); ok($ii, 'InboxIdle created'); SKIP: { skip('inotify or kqueue missing', 1) unless $ii->{sock}; ok(fileno($ii->{sock}) >= 0, 'fileno() gave valid FD'); } ok($im->add(eml_load('t/utf8.eml')), "$V added"); $im->done; PublicInbox::SearchIdx->new($ibx)->index_sync if $V == 1; $ii->event_step; is(scalar @{$obj->{called}}, 1, 'called on unlock'); $pi_config->each_inbox(sub { shift->unsubscribe_unlock($ident) }); ok($im->add(eml_load('t/data/0001.patch')), "$V added #2"); $im->done; PublicInbox::SearchIdx->new($ibx)->index_sync if $V == 1; $ii->event_step; is(scalar @{$obj->{called}}, 1, 'not called when unsubbed'); $ii->close; } done_testing; package InboxIdleTestObj; use strict; sub new { bless {}, shift } sub on_inbox_unlock { my ($self, $ibx) = @_; push @{$self->{called}}, $ibx; } public-inbox-1.6.1/t/index-git-times.t000066400000000000000000000066111377346120300175630ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Import; use PublicInbox::Config; use PublicInbox::Admin; use File::Path qw(remove_tree); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::Over'; my ($tmpdir, $for_destroy) = tmpdir(); local $ENV{PI_CONFIG} = "$tmpdir/cfg"; my $v1dir = "$tmpdir/v1"; my $addr = 'x@example.com'; run_script(['-init', '--indexlevel=medium', 'v1', $v1dir, 'http://example.com/x', $addr]) or die "init failed"; { my $data = <<'EOF'; blob mark :1 data 133 From: timeless To: x Subject: can I haz the time? Message-ID: <19700101000000-1234@example.com> plz reset refs/heads/master commit refs/heads/master mark :2 author timeless 749520000 +0100 committer x 1285977600 -0100 data 20 can I haz the time? M 100644 :1 53/256f6177504c2878d3a302ef5090dacf5e752c EOF pipe(my($r, $w)) or die; length($data) <= 512 or die "data too large to fit in POSIX pipe"; print $w $data or die; close $w or die; my $cmd = ['git', "--git-dir=$v1dir", 'fast-import', '--quiet']; PublicInbox::Import::run_die($cmd, undef, { 0 => $r }); } run_script(['-index', '--skip-docdata', $v1dir]) or die 'v1 index failed'; my $smsg; { my $cfg = PublicInbox::Config->new; my $ibx = $cfg->lookup($addr); my $lvl = PublicInbox::Admin::detect_indexlevel($ibx); is($lvl, 'medium', 'indexlevel detected'); is($ibx->{-skip_docdata}, 1, '--skip-docdata flag set on -index'); $smsg = $ibx->over->get_art(1); is($smsg->{ds}, 749520000, 'datestamp from git author time'); is($smsg->{ts}, 1285977600, 'timestamp from git committer time'); my $mset = $ibx->search->mset("m:$smsg->{mid}"); is($mset->size, 1, 'got one result for m:'); my $res = $ibx->search->mset_to_smsg($ibx, $mset); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian stored datestamp'); $mset = $ibx->search->mset('d:19931002..19931002'); $res = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar @$res, 1, 'got one result for d:'); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian search on datestamp'); } SKIP: { require_git(2.6, 1) or skip('git 2.6+ required for v2', 10); my $v2dir = "$tmpdir/v2"; run_script(['-convert', $v1dir, $v2dir]) or die 'v2 conversion failed'; my $check_v2 = sub { my $ibx = PublicInbox::Inbox->new({inboxdir => $v2dir, address => $addr}); my $lvl = PublicInbox::Admin::detect_indexlevel($ibx); is($lvl, 'medium', 'indexlevel detected after convert'); is($ibx->{-skip_docdata}, 1, '--skip-docdata preserved after convert'); my $v2smsg = $ibx->over->get_art(1); is($v2smsg->{ds}, $smsg->{ds}, 'v2 datestamp from git author time'); is($v2smsg->{ts}, $smsg->{ts}, 'v2 timestamp from git committer time'); my $mset = $ibx->search->mset("m:$smsg->{mid}"); my $res = $ibx->search->mset_to_smsg($ibx, $mset); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian stored datestamp'); $mset = $ibx->search->mset('d:19931002..19931002'); $res = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar @$res, 1, 'got one result for d:'); is($res->[0]->{ds}, $smsg->{ds}, 'Xapian search on datestamp'); }; $check_v2->(); remove_tree($v2dir); # test non-parallelized conversion run_script(['-convert', '-j0', $v1dir, $v2dir]) or die 'v2 conversion failed'; $check_v2->(); } done_testing; public-inbox-1.6.1/t/indexlevels-mirror-v1.t000066400000000000000000000003111377346120300207210ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ local $ENV{PI_TEST_VERSION} = 1; require './t/indexlevels-mirror.t'; public-inbox-1.6.1/t/indexlevels-mirror.t000066400000000000000000000137231377346120300204100ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::Inbox; use PublicInbox::InboxWritable; require PublicInbox::Admin; use PublicInbox::TestCommon; my $PI_TEST_VERSION = $ENV{PI_TEST_VERSION} || 2; require_git('2.6') if $PI_TEST_VERSION == 2; require_mods(qw(DBD::SQLite)); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $import_index_incremental = sub { my ($v, $level, $mime) = @_; my $err = ''; my $this = "pi-$v-$level-indexlevels"; my ($tmpdir, $for_destroy) = tmpdir(); local $ENV{PI_CONFIG} = "$tmpdir/config"; my $ibx = PublicInbox::Inbox->new({ inboxdir => "$tmpdir/testbox", name => $this, version => $v, -primary_address => 'test@example.com', indexlevel => $level, }); my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer(0); $mime->header_set('Message-ID', ''); ok($im->add($mime), 'first message added'); $im->done; # index master (required for v1) my @cmd = (qw(-index -j0), $ibx->{inboxdir}, "-L$level"); push @cmd, '-c' if have_xapian_compact; ok(run_script(\@cmd, undef, { 2 => \$err }), 'index master'); my $ro_master = PublicInbox::Inbox->new({ inboxdir => $ibx->{inboxdir}, indexlevel => $level }); my $msgs = $ro_master->recent; is(scalar(@$msgs), 1, 'only one message in master, so far'); is($msgs->[0]->{mid}, 'm@1', 'first message in master indexed'); # clone @cmd = (qw(git clone --mirror -q)); my $mirror = "$tmpdir/mirror-$v"; if ($v == 1) { push @cmd, $ibx->{inboxdir}, $mirror; } else { push @cmd, "$ibx->{inboxdir}/git/0.git", "$mirror/git/0.git"; } my $fetch_dir = $cmd[-1]; is(xsys(@cmd), 0, "v$v clone OK"); # inbox init local $ENV{PI_CONFIG} = "$tmpdir/.picfg"; @cmd = ('-init', '-L', $level, 'mirror', $mirror, '//example.com/test', 'test@example.com'); push @cmd, '-V2' if $v == 2; ok(run_script(\@cmd), "v$v init OK"); # index mirror ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror OK"); # read-only access my $ro_mirror = PublicInbox::Inbox->new({ inboxdir => $mirror, indexlevel => $level, }); $msgs = $ro_mirror->recent; is(scalar(@$msgs), 1, 'only one message, so far'); is($msgs->[0]->{mid}, 'm@1', 'read first message'); # update master $mime->header_set('Message-ID', ''); ok($im->add($mime), '2nd message added'); $im->done; # mirror updates is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK"); $msgs = $ro_mirror->recent; is(scalar(@$msgs), 2, '2nd message seen in mirror'); is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], ['m@1','m@2'], 'got both messages in mirror'); # incremental index master (required for v1) ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]), 'index master OK'); $msgs = $ro_master->recent; is(scalar(@$msgs), 2, '2nd message seen in master'); is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], ['m@1','m@2'], 'got both messages in master'); my @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; is_deeply(\@rw_nums, [1, 2], 'master has expected NNTP articles'); my @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)}; is_deeply(\@ro_nums, [1, 2], 'mirror has expected NNTP articles'); # remove message from master ok($im->remove($mime), '2nd message removed'); $im->done; @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; is_deeply(\@rw_nums, [1], 'unindex NNTP article'.$v.$level); if ($level ne 'basic') { ok(run_script(['-xcpdb', '-q', $mirror]), "v$v xcpdb OK"); is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level, 'indexlevel detectable by Admin after xcpdb v' .$v.$level); delete $ro_mirror->{$_} for (qw(over search)); my $mset = $ro_mirror->search->mset('m:m@2'); is($mset->size, 1, "v$v found m\@2 via Xapian on $level"); } # sync the mirror is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK"); $msgs = $ro_mirror->recent; is(scalar(@$msgs), 1, '2nd message gone from mirror'); is_deeply([map { $_->{mid} } @$msgs], ['m@1'], 'message unavailable in mirror'); if ($v == 2 && $level eq 'basic') { is_deeply([glob("$ibx->{inboxdir}/xap*/?/")], [], 'no Xapian shard directories for v2 basic'); } if ($level ne 'basic') { my $mset = $ro_mirror->search->reopen->mset('m:m@2'); is($mset->size, 0, "v$v m\@2 gone from Xapian in mirror on $level"); } # add another message to master and have the mirror # sync and reindex it my @expect = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; foreach my $i (3..5) { $mime->header_set('Message-ID', ""); ok($im->add($mime), "#$i message added"); push @expect, $i; } $im->done; is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script([qw(-index -j0 --reindex), $mirror]), "v$v index --reindex mirror OK"); @ro_nums = map { $_->{num} } @{$ro_mirror->over->query_ts(0, 0)}; @rw_nums = map { $_->{num} } @{$ibx->over->query_ts(0, 0)}; is_deeply(\@rw_nums, \@expect, "v$v master has expected NNTP articles"); is_deeply(\@ro_nums, \@expect, "v$v mirror matches master articles"); is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level, 'indexlevel detectable by Admin '.$v.$level); SKIP: { skip 'xapian-compact missing', 1 if !have_xapian_compact; my $cmd = [ qw(-compact), $mirror ]; ok(run_script($cmd, undef, { 2 => \$err}), "compact $level"); } }; # we can probably cull some other tests $import_index_incremental->($PI_TEST_VERSION, 'basic', $mime); SKIP: { require PublicInbox::Search; PublicInbox::Search::load_xapian() or skip('Xapian perl binding missing', 2); foreach my $l (qw(medium full)) { $import_index_incremental->($PI_TEST_VERSION, $l, $mime); } } done_testing(); public-inbox-1.6.1/t/init.t000066400000000000000000000154671377346120300155300ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Config; use PublicInbox::TestCommon; use PublicInbox::Admin; use File::Basename; my ($tmpdir, $for_destroy) = tmpdir(); sub quiet_fail { my ($cmd, $msg) = @_; my $err = ''; ok(!run_script($cmd, undef, { 2 => \$err, 1 => \$err }), $msg); } { local $ENV{PI_DIR} = "$tmpdir/.public-inbox/"; my $cfgfile = "$ENV{PI_DIR}/config"; my $cmd = [ '-init', 'blist', "$tmpdir/blist", qw(http://example.com/blist blist@example.com) ]; ok(run_script($cmd), 'public-inbox-init OK'); is(read_indexlevel('blist'), '', 'indexlevel unset by default'); ok(-e $cfgfile, "config exists, now"); ok(run_script($cmd), 'public-inbox-init OK (idempotent)'); chmod 0666, $cfgfile or die "chmod failed: $!"; $cmd = [ '-init', 'clist', "$tmpdir/clist", qw(http://example.com/clist clist@example.com)]; ok(run_script($cmd), 'public-inbox-init clist OK'); is((stat($cfgfile))[2] & 07777, 0666, "permissions preserved"); $cmd = [ '-init', 'clist', '-V2', "$tmpdir/clist", qw(http://example.com/clist clist@example.com) ]; quiet_fail($cmd, 'attempting to init V2 from V1 fails'); ok(!-e "$cfgfile.lock", 'no lock leftover after init'); open my $lock, '+>', "$cfgfile.lock" or die; $cmd = [ '-init', 'lock', "$tmpdir/lock", qw(http://example.com/lock lock@example.com) ]; ok(-e "$cfgfile.lock", 'lock exists'); # this calls exit(): my $err = ''; ok(!run_script($cmd, undef, {2 => \$err}), 'lock init failed'); is($? >> 8, 255, 'got expected exit code on lock failure'); ok(unlink("$cfgfile.lock"), '-init did not unlink lock on failure'); } { my $env = { PI_DIR => "$tmpdir/.public-inbox/" }; my $rdr = { 2 => \(my $err = '') }; my $cmd = [ '-init', 'alist', "$tmpdir/a\nlist", qw(http://example.com/alist alist@example.com) ]; ok(!run_script($cmd, $env, $rdr), 'public-inbox-init rejects LF in inboxdir'); like($err, qr/`\\n' not allowed in `/s, 'reported \\n'); is_deeply([glob("$tmpdir/.public-inbox/pi-init-*")], [], 'no junk files left behind'); # "git init" does this, too $cmd = [ '-init', 'deep-non-existent', "$tmpdir/a/b/c/d", qw(http://example.com/abcd abcd@example.com) ]; $err = ''; ok(run_script($cmd, $env, $rdr), 'initializes non-existent hierarchy'); ok(-d "$tmpdir/a/b/c/d", 'directory created'); open my $fh, '>', "$tmpdir/d" or BAIL_OUT "open: $!"; close $fh; $cmd = [ '-init', 'd-f-conflict', "$tmpdir/d/f/conflict", qw(http://example.com/conflict onflict@example.com) ]; ok(!run_script($cmd, $env, $rdr), 'fails on D/F conflict'); } SKIP: { require_mods(qw(DBD::SQLite Search::Xapian::WritableDatabase), 2); require_git(2.6, 1) or skip "git 2.6+ required", 2; use_ok 'PublicInbox::Msgmap'; local $ENV{PI_DIR} = "$tmpdir/.public-inbox/"; local $ENV{PI_EMERGENCY} = "$tmpdir/.public-inbox/emergency"; my $cfgfile = "$ENV{PI_DIR}/config"; my $cmd = [ '-init', '-V2', 'v2list', "$tmpdir/v2list", qw(http://example.com/v2list v2list@example.com) ]; ok(run_script($cmd), 'public-inbox-init -V2 OK'); ok(-d "$tmpdir/v2list", 'v2list directory exists'); ok(-f "$tmpdir/v2list/msgmap.sqlite3", 'msgmap exists'); ok(-d "$tmpdir/v2list/all.git", 'catch-all.git directory exists'); $cmd = [ '-init', 'v2list', "$tmpdir/v2list", qw(http://example.com/v2list v2list@example.com) ]; ok(run_script($cmd), 'public-inbox-init is idempotent'); ok(! -d "$tmpdir/public-inbox" && !-d "$tmpdir/objects", 'idempotent invocation w/o -V2 does not make inbox v1'); is(read_indexlevel('v2list'), '', 'indexlevel unset by default'); $cmd = [ '-init', 'v2list', "-V1", "$tmpdir/v2list", qw(http://example.com/v2list v2list@example.com) ]; quiet_fail($cmd, 'initializing V2 as V1 fails'); foreach my $lvl (qw(medium basic)) { my $dir = "$tmpdir/v2$lvl"; $cmd = [ '-init', "v2$lvl", '-V2', '-L', $lvl, $dir, "http://example.com/v2$lvl", "v2$lvl\@example.com" ]; ok(run_script($cmd), "-init -L $lvl"); is(read_indexlevel("v2$lvl"), $lvl, "indexlevel set to '$lvl'"); my $ibx = PublicInbox::Inbox->new({ inboxdir => $dir }); is(PublicInbox::Admin::detect_indexlevel($ibx), $lvl, 'detected expected level w/o config'); ok(!$ibx->{-skip_docdata}, 'docdata written by default'); } for my $v (1, 2) { my $name = "v$v-skip-docdata"; my $dir = "$tmpdir/$name"; $cmd = [ '-init', $name, "-V$v", '--skip-docdata', $dir, "http://example.com/$name", "$name\@example.com" ]; ok(run_script($cmd), "-init -V$v --skip-docdata"); my $ibx = PublicInbox::Inbox->new({ inboxdir => $dir }); is(PublicInbox::Admin::detect_indexlevel($ibx), 'full', "detected default indexlevel -V$v"); ok($ibx->{-skip_docdata}, "docdata skip set -V$v"); ok($ibx->search->has_threadid, 'has_threadid flag set on new inbox'); } # loop for idempotency for (1..2) { $cmd = [ '-init', '-V2', '-S1', 'skip1', "$tmpdir/skip1", qw(http://example.com/skip1 skip1@example.com) ]; ok(run_script($cmd), "--skip-epoch 1"); my $gits = [ glob("$tmpdir/skip1/git/*.git") ]; is_deeply($gits, ["$tmpdir/skip1/git/1.git"], 'skip OK'); } $cmd = [ '-init', '-V2', '--skip-epoch=2', 'skip2', "$tmpdir/skip2", qw(http://example.com/skip2 skip2@example.com) ]; ok(run_script($cmd), "--skip-epoch 2"); my $gits = [ glob("$tmpdir/skip2/git/*.git") ]; is_deeply($gits, ["$tmpdir/skip2/git/2.git"], 'skipping 2 works, too'); xsys(qw(git config), "--file=$ENV{PI_DIR}/config", 'publicinboxmda.spamcheck', 'none') == 0 or BAIL_OUT "git config $?"; my $addr = 'skip3@example.com'; $cmd = [ qw(-init -V2 -Lbasic --skip-artnum=12 skip3), "$tmpdir/skip3", qw(http://example.com/skip3), $addr ]; ok(run_script($cmd), '--skip-artnum -V2'); my $env = { ORIGINAL_RECIPIENT => $addr }; my $mid = 'skip-artnum@example.com'; my $msg = "Message-ID: <$mid>\n\n"; my $rdr = { 0 => \$msg, 2 => \(my $err = '') }; ok(run_script([qw(-mda --no-precheck)], $env, $rdr), 'deliver V1'); diag "err=$err" if $err; my $mm = PublicInbox::Msgmap->new_file("$tmpdir/skip3/msgmap.sqlite3"); my $n = $mm->num_for($mid); is($n, 13, 'V2 NNTP article numbers skipped via --skip-artnum'); $addr = 'skip4@example.com'; $env = { ORIGINAL_RECIPIENT => $addr }; $cmd = [ qw(-init -V1 --skip-artnum 12 -Lmedium skip4), "$tmpdir/skip4", qw(http://example.com/skip4), $addr ]; ok(run_script($cmd), '--skip-artnum -V1'); $err = ''; ok(run_script([qw(-mda --no-precheck)], $env, $rdr), 'deliver V1'); diag "err=$err" if $err; $mm = PublicInbox::Msgmap->new("$tmpdir/skip4"); $n = $mm->num_for($mid); is($n, 13, 'V1 NNTP article numbers skipped via --skip-artnum'); } done_testing(); sub read_indexlevel { my ($inbox) = @_; my $cmd = [ qw(git config), "publicinbox.$inbox.indexlevel" ]; my $env = { GIT_CONFIG => "$ENV{PI_DIR}/config" }; chomp(my $lvl = xqx($cmd, $env)); $lvl; } public-inbox-1.6.1/t/iso-2202-jp.eml000066400000000000000000000004071377346120300166470ustar00rootroot00000000000000Message-Id: <199707281508.AAA24167@hoyogw.example> Date: Tue, 29 Jul 97 00:08:29 +0900 From: matz@example.com Subject: [ruby-dev:4] To: ruby-dev@example Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-2022-JP |$B$1$$$8$e!w:#$O%U%j!<(B(^^;;;$B$G$9(B. public-inbox-1.6.1/t/kqnotify.t000066400000000000000000000026071377346120300164210ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ # # Ensure KQNotify can pick up rename(2) and link(2) operations # used by Maildir writing tools use strict; use Test::More; use PublicInbox::TestCommon; plan skip_all => 'KQNotify is only for *BSD systems' if $^O !~ /bsd/; require_mods('IO::KQueue'); use_ok 'PublicInbox::KQNotify'; my ($tmpdir, $for_destroy) = tmpdir(); mkdir "$tmpdir/new" or BAIL_OUT "mkdir: $!"; open my $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!"; close $fh or BAIL_OUT "close: $!"; my $kqn = PublicInbox::KQNotify->new; my $mask = PublicInbox::KQNotify::MOVED_TO_OR_CREATE(); my $w = $kqn->watch("$tmpdir/new", $mask); rename("$tmpdir/tst", "$tmpdir/new/tst") or BAIL_OUT "rename: $!"; my $hit = [ map { $_->fullname } $kqn->read ]; is_deeply($hit, ["$tmpdir/new/tst"], 'rename(2) detected (via NOTE_EXTEND)'); open $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!"; close $fh or BAIL_OUT "close: $!"; link("$tmpdir/tst", "$tmpdir/new/link") or BAIL_OUT "link: $!"; $hit = [ grep m!/link$!, map { $_->fullname } $kqn->read ]; is_deeply($hit, ["$tmpdir/new/link"], 'link(2) detected (via NOTE_WRITE)'); $w->cancel; link("$tmpdir/new/tst", "$tmpdir/new/link2") or BAIL_OUT "link: $!"; $hit = [ map { $_->fullname } $kqn->read ]; is_deeply($hit, [], 'link(2) not detected after cancel'); done_testing; public-inbox-1.6.1/t/linkify.t000066400000000000000000000075271377346120300162300ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Linkify; { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/url-with-trailing-period'; my $s = $u . '.'; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq($u.), 'trailing period not in URL'); } { my $l = PublicInbox::Linkify->new; my $u = 'http://i-forgot-trailing-slash.example.com'; my $s = $u; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq($u), 'missing trailing slash OK'); } # handle URLs in parenthesized statements { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/'; my $s = "(see: $u)"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq{(see: $u)}, 'trailing ) not in URL'); } { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/url-with-trailing-semicolon'; my $s = $u . ';'; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq($u;), 'trailing semicolon not in URL'); } { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/url-with-(parens)'; my $s = "hello $u world"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq(hello $u world), 'URL preserved'); $s = "$u. hi"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq($u. hi), 'paired () in URL OK'); $u .= "?query=a"; $s = "hello $u world"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq(hello $u world), 'query preserved'); $u .= "#fragment"; $s = "hello $u world"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq(hello $u world), 'query + fragment preserved'); $u = "http://example.com/"; $s = "hello $u world"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq(hello $u world), "root URL preserved"); $u = "http://example.com/#fragment"; $s = "hello $u world"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq(hello $u world), "root + fragment"); } # Markdown compatibility { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/'; my $s = "[markdown]($u)"; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq![markdown]($u)!, 'Markdown-compatible'); $s = qq![markdown]($u "title")!; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq![markdown]($u "title")!, 'Markdown title compatible'); $s = qq![markdown]($u).!; $s = $l->linkify_1($s); $s = $l->linkify_2($s); is($s, qq![markdown]($u).!, 'Markdown-compatible end of sentence'); } # Perl and Ruby code compatibility { my $l = PublicInbox::Linkify->new; my $u = 'http://example.com/'; foreach my $q ("'%s'", '"%s"', 'q!%s!', 'q(%s)') { # Perl my $s = sprintf("my \$var = $q;", $u); $s = $l->linkify_1($s); $s = $l->linkify_2($s); like($s, qr/>\Q$u\Elinkify_1($s); $s = $l->linkify_2($s); like($s, qr/>\Q$u\Enew; my $s = '(see http://example.com/).'; $s = $l->linkify_1($s); $s = $l->linkify_2($s); like($s, qr!\(see ]+>http://example\.com/\)\.!s, 'punctuation with unpaired ) OK') } if ('IDN example: ') { my $hc = '月'; my $u = "http://www.\x{6708}.example.com/"; my $s = $u; my $l = PublicInbox::Linkify->new; $s = $l->linkify_1($s); $s = $l->linkify_2($s); my $expect = qq{http://www.$hc.example.com/}; is($s, $expect, 'IDN message escaped properly'); } done_testing(); public-inbox-1.6.1/t/main-bin/000077500000000000000000000000001377346120300160555ustar00rootroot00000000000000public-inbox-1.6.1/t/main-bin/spamc000077500000000000000000000000501377346120300171010ustar00rootroot00000000000000#!/bin/sh # trivial spamc mock exec cat public-inbox-1.6.1/t/mda-mime.eml000066400000000000000000000006401377346120300165500ustar00rootroot00000000000000From: a@example.com Subject: blah Cc: test-public@example.com Message-ID: Content-Type: multipart/alternative; boundary="b" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit --b Content-Type: text/html; charset=UTF-8 Content-Transfer-Encoding: base64 PGh0bWw+PGJvZHk+aGk8L2JvZHk+PC9odG1sPg== --b Content-Type: text/plain Content-Transfer-Encoding: quoted-printable hi =3D "bye"= --b-- public-inbox-1.6.1/t/mda.t000066400000000000000000000221111377346120300153060ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Cwd qw(getcwd); use PublicInbox::MID qw(mid2path); use PublicInbox::Git; use PublicInbox::InboxWritable; use PublicInbox::TestCommon; use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/pi-home"; my $pi_home = "$home/.public-inbox"; my $pi_config = "$pi_home/config"; my $maindir = "$tmpdir/main.git"; my $main_bin = getcwd()."/t/main-bin"; my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock my $fail_bin = getcwd()."/t/fail-bin"; my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc spam mock my $addr = 'test-public@example.com'; my $cfgpfx = "publicinbox.test"; my $faildir = "$home/faildir/"; my $git = PublicInbox::Git->new($maindir); my $fail_bad_header = sub ($$$) { my ($good_rev, $msg, $in) = @_; my @f = glob("$faildir/*/*"); unlink @f if @f; my ($out, $err) = ("", ""); my $opt = { 0 => \$in, 1 => \$out, 2 => \$err }; local $ENV{PATH} = $main_path; ok(run_script(['-mda'], undef, $opt), "no error on undeliverable ($msg)"); my $rev = $git->qx(qw(rev-list HEAD)); chomp $rev; is($rev, $good_rev, "bad revision not committed ($msg)"); @f = glob("$faildir/*/*"); is(scalar @f, 1, "faildir written to"); [ $in, $out, $err ]; }; { ok(-x "$main_bin/spamc", "spamc ham mock found (run in top of source tree"); ok(-x "$fail_bin/spamc", "spamc mock found (run in top of source tree"); is(1, mkdir($home, 0755), "setup ~/ for testing"); is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox"); PublicInbox::Import::init_bare($maindir); open my $fh, '>>', $pi_config or die; print $fh <header('From'); my ($author) = PublicInbox::Address::names($from); my ($email) = PublicInbox::Address::emails($from); my $date = $msg->header('Date'); is('Eléanor', encode('us-ascii', my $tmp = $author, Encode::HTMLCREF), 'HTML conversion is correct'); is($email, 'e@example.com', 'email parsed correctly'); is($date, 'Thu, 01 Jan 1970 00:00:00 +0000', 'message date parsed correctly'); $author; }; die $@ if $@; { my $good_rev; local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = $addr; my $in = < To: You Cc: $addr Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF # ensure successful message delivery { local $ENV{PATH} = $main_path; ok(run_script(['-mda'], undef, { 0 => \$in })); my $rev = $git->qx(qw(rev-list HEAD)); like($rev, qr/\A[a-f0-9]{40}/, "good revision committed"); chomp $rev; my $cmt = $git->cat_file($rev); like($$cmt, qr/^author Me 0 \+0000\n/m, "author info set correctly"); like($$cmt, qr/^committer test /m, "committer info set correctly"); $good_rev = $rev; } # ensure failures work, fail with bad spamc { my @prev = <$faildir/new/*>; is(scalar @prev, 0 , "nothing in PI_EMERGENCY before"); local $ENV{PATH} = $fail_path; ok(run_script(['-mda'], undef, { 0 => \$in })); my @revs = $git->qx(qw(rev-list HEAD)); is(scalar @revs, 1, "bad revision not committed"); my @new = <$faildir/new/*>; is(scalar @new, 1, "PI_EMERGENCY is written to"); } $fail_bad_header->($good_rev, "bad recipient", <<""); From: Me To: You Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 my $fail = $fail_bad_header->($good_rev, "duplicate Message-ID", <<""); From: Me To: You Cc: $addr Message-ID: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 like($fail->[2], qr/CONFLICT/, "duplicate Message-ID message"); $fail_bad_header->($good_rev, "missing From:", <<""); To: $addr Message-ID: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 $fail_bad_header->($good_rev, "short subject:", <<""); To: $addr From: cat\@example.com Message-ID: Subject: a Date: Thu, 01 Jan 1970 00:00:00 +0000 $fail_bad_header->($good_rev, "no date", <<""); To: $addr From: u\@example.com Message-ID: Subject: hihi $fail_bad_header->($good_rev, "bad date", <<""); To: $addr From: u\@example.com Message-ID: Subject: hihi Date: deadbeef } # spam training { local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = $addr; local $ENV{PATH} = $main_path; my $mid = 'spam-train@example.com'; my $in = < To: You Cc: $addr Message-ID: <$mid> Subject: this message will be trained as spam Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF { # deliver the spam message, first ok(run_script(['-mda'], undef, { 0 => \$in })); my $path = mid2path($mid); my $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/\Q$mid\E/, "message delivered"); # now train it local $ENV{GIT_AUTHOR_EMAIL} = 'trainer@example.com'; local $ENV{GIT_COMMITTER_EMAIL} = 'trainer@example.com'; local $ENV{GIT_COMMITTER_NAME}; delete $ENV{GIT_COMMITTER_NAME}; ok(run_script(['-learn', 'spam'], undef, { 0 => $msg }), "no failure from learning spam"); ok(run_script(['-learn', 'spam'], undef, { 0 => $msg }), "no failure from learning spam idempotently"); } } # train ham message { local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = $addr; local $ENV{PATH} = $main_path; my $mid = 'ham-train@example.com'; my $in = < To: You Cc: $addr Message-ID: <$mid> Subject: this message will be trained as spam Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF # now train it # these should be overridden local $ENV{GIT_AUTHOR_EMAIL} = 'trainer@example.com'; local $ENV{GIT_COMMITTER_EMAIL} = 'trainer@example.com'; ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), "learned ham without failure"); my $path = mid2path($mid); my $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/\Q$mid\E/, "ham message delivered"); ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), "learned ham idempotently "); # ensure trained email is filtered, too my $mime = eml_load 't/mda-mime.eml'; ($mid) = ($mime->header_raw('message-id') =~ /<([^>]+)>/); { $in = $mime->as_string; ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), "learned ham without failure"); my $path = mid2path($mid); $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/<\Q$mid\E>/, "ham message delivered"); unlike($$msg, qr//i, ' filtered'); } } # List-ID based delivery { local $ENV{PI_EMERGENCY} = $faildir; local $ENV{HOME} = $home; local $ENV{ORIGINAL_RECIPIENT} = undef; delete $ENV{ORIGINAL_RECIPIENT}; local $ENV{PATH} = $main_path; my $list_id = 'foo.example.com'; my $mid = 'list-id-delivery@example.com'; my $in = < To: You Cc: $addr Message-ID: <$mid> List-Id: <$list_id> Subject: this message will be trained as spam Date: Thu, 01 Jan 1970 00:00:00 +0000 EOF xsys(qw(git config --file), $pi_config, "$cfgpfx.listid", uc $list_id); $? == 0 or die "failed to set listid $?"; ok(run_script(['-mda'], undef, { 0 => \$in }), 'mda OK with List-Id match'); my $path = mid2path($mid); my $msg = $git->cat_file("HEAD:$path"); like($$msg, qr/\Q$list_id\E/, 'delivered message w/ List-ID matches'); # try a message w/o precheck $in = < List-Id: <$list_id> this message would not be accepted without --no-precheck EOF my ($out, $err) = ('', ''); my $rdr = { 0 => \$in, 1 => \$out, 2 => \$err }; ok(run_script(['-mda', '--no-precheck'], undef, $rdr), 'mda OK with List-Id match and --no-precheck'); my $cur = $git->qx(qw(diff HEAD~1..HEAD)); like($cur, qr/this message would not be accepted without --no-precheck/, '--no-precheck delivered message anyways'); # try a message with multiple List-ID headers $in = < List-ID: <$list_id> Message-ID: <2lids\@example> Subject: two List-IDs From: user To: $addr Date: Fri, 02 Oct 1993 00:00:00 +0000 EOF ($out, $err) = ('', ''); ok(run_script(['-mda'], undef, $rdr), 'mda OK with multiple List-Id matches'); $cur = $git->qx(qw(diff HEAD~1..HEAD)); like($cur, qr/^\+Message-ID: <2lids\@example>/sm, 'multi List-ID match delivered'); like($err, qr/multiple List-ID/, 'warned about multiple List-ID'); # ensure -learn rm works after inbox address is updated ($out, $err) = ('', ''); xsys(qw(git config --file), $pi_config, "$cfgpfx.address", 'updated-address@example.com'); ok(run_script(['-learn', 'rm'], undef, $rdr), 'rm-ed via -learn'); $cur = $git->qx(qw(diff HEAD~1..HEAD)); like($cur, qr/^-Message-ID: <2lids\@example>/sm, 'changed in git'); } done_testing(); public-inbox-1.6.1/t/mda_filter_rubylang.t000066400000000000000000000036241377346120300205660ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::V2Writable'; my ($tmpdir, $for_destroy) = tmpdir(); my $pi_config = "$tmpdir/pi_config"; local $ENV{PI_CONFIG} = $pi_config; local $ENV{PI_EMERGENCY} = "$tmpdir/emergency"; my @cfg = ('git', 'config', "--file=$pi_config"); is(xsys(@cfg, 'publicinboxmda.spamcheck', 'none'), 0); for my $v (qw(V1 V2)) { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my $cfgpfx = "publicinbox.$v"; my $inboxdir = "$tmpdir/$v"; my $addr = "test-$v\@example.com"; my $cmd = [ '-init', "-$v", $v, $inboxdir, "http://example.com/$v", $addr ]; ok(run_script($cmd), 'public-inbox-init'); ok(run_script([qw(-index -j0), $inboxdir]), 'public-inbox-index'); is(xsys(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0); is(xsys(@cfg, "$cfgpfx.altid", 'serial:alerts:file=msgmap.sqlite3'), 0); for my $i (1..2) { my $env = { ORIGINAL_RECIPIENT => $addr }; my $opt = { 0 => \(< Date: Sat, 05 Jan 2019 04:19:17 +0000 something EOF ok(run_script(['-mda'], $env, $opt), 'message delivered'); } my $config = PublicInbox::Config->new; my $ibx = $config->lookup_name($v); # make sure all serials are searchable: for my $i (1..2) { my $mset = $ibx->search->mset("alerts:$i"); is($mset->size, 1, "got one result for alerts:$i"); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i"); } is_deeply([], \@warn, 'no warnings'); # TODO: public-inbox-learn doesn't know about filters # (but -watch does) } done_testing(); public-inbox-1.6.1/t/mid.t000066400000000000000000000045121377346120300153230ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Eml; use PublicInbox::MID qw(mid_escape mids references mids_for_index id_compress); is(mid_escape('foo!@(bar)'), 'foo!@(bar)'); is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)'); is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)'); # n.b: this is probably invalid since we dropped CGI for PSGI: like(id_compress('foo%bar@wtf'), qr/\A[a-f0-9]{40}\z/, "percent always converted to sha1 to workaround buggy httpds"); is(id_compress('foobar-wtf'), 'foobar-wtf', 'regular ID not compressed'); { my $mime = PublicInbox::Eml->new("Message-ID: \n\n"); $mime->header_set('X-Alt-Message-ID', ''); is_deeply(['mid-1@a'], mids($mime->header_obj), 'mids in common case'); $mime->header_set('Message-Id', '', ''); is_deeply(['mid-1@a', 'mid-2@b'], mids($mime->header_obj), '2 mids'); $mime->header_set('Message-Id', '', ''); is_deeply(['mid-1@a'], mids($mime->header_obj), 'dup mids'); $mime->header_set('Message-Id', ' comment'); is_deeply(['mid-1@a'], mids($mime->header_obj), 'comment ignored'); $mime->header_set('Message-Id', 'bare-mid'); is_deeply(['bare-mid'], mids($mime->header_obj), 'bare mid OK'); $mime->header_set('References', ' '); $mime->header_set('In-Reply-To', ''); is_deeply(['hello', 'world', 'weld'], references($mime->header_obj), 'references combines with In-Reply-To'); $mime->header_set('References', "\n\t"); $mime->header_set('In-Reply-To'); is_deeply(references($mime->header_obj), ['hello', 'world'], 'multiline References OK'); $mime->header_set('References', ""); is_deeply(references($mime->header_obj), ['helloworld'], 'drop \t in References <656C30A1EFC89F6B2082D9B6@localhost>'); $mime->header_set('Message-ID', ""); is_deeply(mids($mime->header_obj), ['helloworld'], 'drop \t in Message-ID'); $mime->header_set('To', 'u@example.com'); $mime->header_set('References', ' '); is_deeply(references($mime->header_obj), [qw(hello world)]); is_deeply([qw(helloworld alt-id-for-nntp)], mids_for_index($mime->header_obj), 'X-Alt-Message-ID can be indexed'); } done_testing(); 1; public-inbox-1.6.1/t/mime.t000066400000000000000000000075021377346120300155030ustar00rootroot00000000000000#!perl -w # Copyright (C) 2017-2020 all contributors # This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # Artistic or GPL-1+ use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::MsgIter; my @classes = qw(PublicInbox::Eml); SKIP: { require_mods('Email::MIME', 1); push @classes, 'PublicInbox::MIME'; }; use_ok $_ for @classes; local $SIG{__WARN__} = sub {}; # needed for old Email::Simple (used by E::M) for my $cls (@classes) { my $msg = $cls->new(<<'EOF'); From: Richard Hansen To: git@vger.kernel.org Cc: Richard Hansen Subject: [PATCH 0/2] minor diff orderfile documentation improvements Date: Mon, 9 Jan 2017 19:40:29 -0500 Message-Id: <20170110004031.57985-1-hansenr@google.com> X-Mailer: git-send-email 2.11.0.390.gc69c2f50cf-goog Content-Type: multipart/signed; protocol="application/pkcs7-signature"; micalg=sha-256; boundary="94eb2c0bc864b76ba30545b2bca9" --94eb2c0bc864b76ba30545b2bca9 Richard Hansen (2): diff: document behavior of relative diff.orderFile diff: document the pattern format for diff.orderFile Documentation/diff-config.txt | 5 ++++- Documentation/diff-options.txt | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) --94eb2c0bc864b76ba30545b2bca9 Content-Type: application/pkcs7-signature; name="smime.p7s" Content-Transfer-Encoding: base64 Content-Disposition: attachment; filename="smime.p7s" Content-Description: (truncated) S/MIME Cryptographic Signature dkTlB69771K2eXK4LcHSH/2LqX+VYa3K44vrx1ruzjXdNWzIpKBy0weFNiwnJCGofvCysM2RCSI1 --94eb2c0bc864b76ba30545b2bca9-- EOF my @parts = $msg->subparts; my $exp = <body, $exp, 'body matches expected'); my $raw = <<'EOF'; Date: Wed, 18 Jan 2017 13:28:32 -0500 From: Santiago Torres To: Junio C Hamano Cc: git@vger.kernel.org, peff@peff.net, sunshine@sunshineco.com, walters@verbum.org, Lukas Puehringer Subject: Re: [PATCH v6 4/6] builtin/tag: add --format argument for tag -v Message-ID: <20170118182831.pkhqu2np3bh2puei@LykOS.localdomain> References: <20170117233723.23897-1-santiago@nyu.edu> <20170117233723.23897-5-santiago@nyu.edu> MIME-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha256; protocol="application/pgp-signature"; boundary="r24xguofrazenjwe" Content-Disposition: inline In-Reply-To: --r24xguofrazenjwe Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable your tree directly?=20 --r24xguofrazenjwe Content-Type: application/pgp-signature; name="signature.asc" -----BEGIN PGP SIGNATURE----- =7wIb -----END PGP SIGNATURE----- --r24xguofrazenjwe-- EOF $msg = $cls->new($raw); my $nr = 0; msg_iter($msg, sub { my ($part, $level, @ex) = @{$_[0]}; is($level, 1, 'at expected level'); if (join('fail if $#ex > 0', @ex) eq '1') { is($part->body_str, "your tree directly? \r\n", 'body OK'); } elsif (join('fail if $#ex > 0', @ex) eq '2') { is($part->body, "-----BEGIN PGP SIGNATURE-----\n\n" . "=7wIb\n" . "-----END PGP SIGNATURE-----\n", 'sig "matches"'); } else { fail "unexpected part\n"; } $nr++; }); is($nr, 2, 'got 2 parts'); is($msg->as_string, $raw, 'stringified sufficiently close to original'); } done_testing(); public-inbox-1.6.1/t/msg_iter-nested.eml000066400000000000000000000005451377346120300201570ustar00rootroot00000000000000From: root@localhost MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="outer" --outer From: sub@localhost MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="inner" --inner MIME-Version: 1.0 a --inner MIME-Version: 1.0 b --inner-- --outer MIME-Version: 1.0 sig --outer-- public-inbox-1.6.1/t/msg_iter-order.eml000066400000000000000000000002561377346120300200070ustar00rootroot00000000000000From: root@localhost MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b MIME-Version: 1.0 a --b MIME-Version: 1.0 b --b-- public-inbox-1.6.1/t/msg_iter.t000066400000000000000000000051641377346120300163670ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Hval qw(ascii_html); use_ok('PublicInbox::MsgIter'); { my $mime = eml_load 't/msg_iter-order.eml'; my @parts; msg_iter($mime, sub { my ($part, $level, @ex) = @{$_[0]}; my $s = $part->body_str; $s =~ s/\s+//s; push @parts, [ $s, $level, @ex ]; }); is_deeply(\@parts, [ [ qw(a 1 1) ], [ qw(b 1 2) ] ], 'order is fine'); } { my $mime = eml_load 't/msg_iter-nested.eml'; my @parts; msg_iter($mime, sub { my ($part, $level, @ex) = @{$_[0]}; my $s = $part->body_str; $s =~ s/\s+//s; push @parts, [ $s, $level, @ex ]; }); is_deeply(\@parts, [ [qw(a 2 1.1)], [qw(b 2 1.2)], [qw(sig 1 2)] ], 'nested part shows up properly'); } { my $mime = eml_load 't/iso-2202-jp.eml'; my $raw = ''; msg_iter($mime, sub { my ($part, $level, @ex) = @{$_[0]}; my ($s, $err) = msg_part_text($part, 'text/plain'); ok(!$err, 'no error'); $raw .= $s; }); ok(length($raw) > 0, 'got non-empty message'); is(index($raw, '$$$'), -1, 'no unescaped $$$'); } { my $mime = eml_load 't/x-unknown-alpine.eml'; my $raw = ''; msg_iter($mime, sub { my ($part, $level, @ex) = @{$_[0]}; my ($s, $err) = msg_part_text($part, 'text/plain'); $raw .= $s; }); like($raw, qr!^\thttps://!ms, 'tab expanded with X-UNKNOWN'); like(ascii_html($raw), qr/• bullet point/s, 'got bullet point when X-UNKNOWN assumes UTF-8'); } { # API not finalized my @warn; local $SIG{__WARN__} = sub { push @warn, [ @_ ] }; my $attr = "So and so wrote:\n"; my $q = "> hello world\n" x 10; my $nq = "hello world\n" x 10; my @sections = PublicInbox::MsgIter::split_quotes($attr . $q . $nq); is($sections[0], $attr, 'attribution matches'); is($sections[1], $q, 'quoted section matches'); is($sections[2], $nq, 'non-quoted section matches'); is(scalar(@sections), 3, 'only three sections for short message'); is_deeply(\@warn, [], 'no warnings'); $q x= 3300; $nq x= 3300; @sections = PublicInbox::MsgIter::split_quotes($attr . $q . $nq); is_deeply(\@warn, [], 'no warnings on giant message'); is(join('', @sections), $attr . $q . $nq, 'result matches expected'); is(shift(@sections), $attr, 'attribution is first section'); my @check = ('', ''); while (defined(my $l = shift @sections)) { next if $l eq ''; like($l, qr/\n\z/s, 'section ends with newline'); my $idx = ($l =~ /\A>/) ? 0 : 1; $check[$idx] .= $l; } is($check[0], $q, 'long quoted section matches'); is($check[1], $nq, 'long quoted section matches'); } done_testing(); 1; public-inbox-1.6.1/t/msgmap.t000066400000000000000000000043701377346120300160400ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; require_mods('DBD::SQLite'); use_ok 'PublicInbox::Msgmap'; my ($tmpdir, $for_destroy) = tmpdir(); my $d = PublicInbox::Msgmap->new($tmpdir, 1); my %mid2num; my %num2mid; my @mids = qw(a@b c@d e@f g@h aa@bb aa@cc); is_deeply([$d->minmax], [undef,undef], "empty min max on new DB"); foreach my $mid (@mids) { my $n = $d->mid_insert($mid); ok($n, "mid $mid inserted"); $mid2num{$mid} = $n; $num2mid{$n} = $mid; } $@ = undef; my $ret = $d->mid_insert('a@b'); is($ret, undef, 'duplicate mid_insert in undef result'); is($d->num_for('a@b'), $mid2num{'a@b'}, 'existing number not clobbered'); my $next = (sort(keys %num2mid))[-1]; is($d->mid_insert('ok@unique'), $next + 1, 'got expected num after failing mid_insert'); foreach my $n (keys %num2mid) { is($d->mid_for($n), $num2mid{$n}, "num:$n maps correctly"); } foreach my $mid (@mids) { is($d->num_for($mid), $mid2num{$mid}, "mid:$mid maps correctly"); } is(undef, $d->last_commit, "last commit not set"); my $lc = 'deadbeef' x 5; is(undef, $d->last_commit($lc), 'previous last commit (undef) returned'); is($lc, $d->last_commit, 'last commit was set correctly'); my $nc = 'deaddead' x 5; is($lc, $d->last_commit($nc), 'returned previously set commit'); is($nc, $d->last_commit, 'new commit was set correctly'); is($d->mid_delete('a@b'), 1, 'deleted a@b'); is($d->mid_delete('a@b') + 0, 0, 'delete again returns zero'); is(undef, $d->num_for('a@b'), 'num_for fails on deleted msg'); $d = undef; ok($d = PublicInbox::Msgmap->new($tmpdir, 1), 'idempotent DB creation'); my ($min, $max) = $d->minmax; ok($min > 0, "article min OK"); ok($max > 0 && $max < 10, "article max OK"); ok($min < $max, "article counts OK"); my $orig = $d->mid_insert('spam@1'); $d->mid_delete('spam@1'); is($d->mid_insert('spam@2'), 1 + $orig, "last number not recycled"); my $tmp = $d->tmp_clone($tmpdir); is_deeply([$d->minmax], [$tmp->minmax], 'Cloned temporary DB matches'); ok($tmp->mid_delete('spam@2'), 'temporary DB is writable'); is(eval { $tmp->atfork_prepare; $tmp->atfork_parent; 'ok' }, 'ok', 'atfork_* work on tmp_clone'); done_testing(); public-inbox-1.6.1/t/msgtime.t000066400000000000000000000103461377346120300162210ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::MsgTime; use PublicInbox::TestCommon; our $received_date = 'Mon, 22 Jan 2007 13:16:24 -0500'; sub datestamp ($) { my ($date) = @_; local $SIG{__WARN__} = sub {}; # Suppress warnings my $mime = PublicInbox::Eml->new(<<"EOF"); From: a\@example.com To: b\@example.com Subject: this is a subject Message-ID: Date: $date Received: (majordomo\@vger.kernel.org) by vger.kernel.org via listexpand id S932173AbXAVSQY (ORCPT ); $received_date hello world EOF my @ts = PublicInbox::MsgTime::msg_datestamp($mime->header_obj); return \@ts; } sub timestamp ($) { my ($received) = @_; local $SIG{__WARN__} = sub {}; # Suppress warnings my $mime = PublicInbox::Eml->new(<<"EOF"); From: a\@example.com To: b\@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 Received: (majordomo\@vger.kernel.org) by vger.kernel.org via listexpand id S932173AbXAVSQY (ORCPT ); $received hello world EOF my @ts = PublicInbox::MsgTime::msg_timestamp($mime->header_obj); return \@ts; } # Verify that the parser sucks up the timezone for dates for (my $min = -1440; $min <= 1440; $min += 30) { my $sign = ($min < 0) ? '-': '+'; my $h = abs(int($min / 60)); my $m = $min % 60; my $ts_expect = 749520000 - ($min * 60); my $tz_expect = sprintf('%s%02d%02d', $sign, $h, $m); if ($tz_expect >= 1400 || $tz_expect <= -1400) { $tz_expect = '+0000'; } my $date = sprintf("Fri, 02 Oct 1993 00:00:00 %s%02d%02d", $sign, $h, $m); my $result = datestamp($date); is_deeply($result, [ $ts_expect, $tz_expect ], $date); } # Verify that the parser sucks up the timezone and for received timestamps for (my $min = -1440; $min <= 1440; $min += 30) { my $sign = ($min < 0) ? '-' : '+'; my $h = abs(int($min / 60)); my $m = $min %60; my $ts_expect = 1169471784 - ($min * 60); my $tz_expect = sprintf('%s%02d%02d', $sign, $h, $m); if ($tz_expect >= 1400 || $tz_expect <= -1400) { $tz_expect = '+0000'; } my $received = sprintf('Mon, 22 Jan 2007 13:16:24 %s%02d%02d', $sign, $h, $m); is_deeply(timestamp($received), [ $ts_expect, $tz_expect ], $received); } sub is_datestamp ($$) { my ($date, $expect) = @_; is_deeply(datestamp($date), $expect, $date); } is_datestamp('Wed, 13 Dec 2006 10:26:38 +1', [1166001998, '+0100']); is_datestamp('Fri, 3 Feb 2006 18:11:22 -00', [1138990282, '+0000']); is_datestamp('Thursday, 20 Feb 2003 01:14:34 +000', [1045703674, '+0000']); is_datestamp('Fri, 28 Jun 2002 12:54:40 -700', [1025294080, '-0700']); is_datestamp('Sat, 12 Jan 2002 12:52:57 -200', [1010847177, '-0200']); is_datestamp('Mon, 05 Nov 2001 10:36:16 -800', [1004985376, '-0800']); is_datestamp('Tue, 3 Jun 2003 8:58:23 --500', [1054648703, '-0500']); is_datestamp('Thu, 18 May 100 10:40:43 +0200 (MET DST)', [958639243, '+0200']); is_datestamp('Thu, 18 May 2000 10:40:43 +0200', [958639243, '+0200']); is_datestamp('Tue, 27 Feb 2007 16:23:25 -0060', [1172597005, '-0100']); is_datestamp('Wed, 20 Dec 2006 05:32:58 -0420', [1166608378, '-0420']); is_datestamp('Wed, 20 Dec 2006 05:32:58 +0420', [1166577178, '+0420']); is_datestamp('Thu, 14 Dec 2006 00:20:24 +0480', [1166036424, '+0520']); is_datestamp('Thu, 14 Dec 2006 00:20:24 -0480', [1166074824, '-0520']); is_datestamp('Mon, 14 Apr 2014 07:59:01 -0007', [1397462761, '-0007']); SKIP: { require_mods('Date::Parse', 1); my $now = time; if (join("\0", gmtime($now)) ne join("\0", localtime($now))) { skip('needs TZ=UTC to test zone-less parsing', 1); } is_datestamp('Sat, 27 Sep 1997 10:02:32', [875354552, '+0000']); } # obsolete formats described in RFC2822 for (qw(UT GMT Z)) { is_datestamp('Fri, 02 Oct 1993 00:00:00 '.$_, [ 749520000, '+0000']); } is_datestamp('Fri, 02 Oct 1993 00:00:00 EDT', [ 749534400, '-0400']); # fallback to Received: header if Date: is out-of-range: is_datestamp('Fri, 1 Jan 1904 10:12:31 +0100', PublicInbox::MsgTime::str2date_zone($received_date)); is_datestamp('Fri, 9 Mar 71685 18:45:56 +0000', # Y10K is not my problem :P PublicInbox::MsgTime::str2date_zone($received_date)); done_testing(); public-inbox-1.6.1/t/multi-mid.t000066400000000000000000000046231377346120300164560ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; use PublicInbox::InboxWritable; require_git(2.6); require_mods(qw(DBD::SQLite)); require PublicInbox::SearchIdx; my $delay = $ENV{TEST_DELAY_CONVERT}; my $addr = 'test@example.com'; my $bad = PublicInbox::Eml->new(< Message-ID: From: a\@example.com To: $addr Subject: bad EOF my $good = PublicInbox::Eml->new(< From: b\@example.com To: $addr Subject: good EOF for my $order ([$bad, $good], [$good, $bad]) { my $before; my ($tmpdir, $for_destroy) = tmpdir(); my $ibx = PublicInbox::InboxWritable->new({ inboxdir => "$tmpdir/v1", name => 'test-v1', indexlevel => 'basic', -primary_address => $addr, }, my $creat_opt = {}); my @old; if ('setup v1 inbox') { my $im = $ibx->importer(0); for (@$order) { ok($im->add($_), 'added '.$_->header('Subject')); sleep($delay) if $delay; } $im->done; my $s = PublicInbox::SearchIdx->new($ibx, 1); $s->index_sync; $before = [ $ibx->mm->minmax ]; @old = ($ibx->over->get_art(1), $ibx->over->get_art(2)); $ibx->cleanup; } my $rdr = { 1 => \(my $out = ''), 2 => \(my $err = '') }; my $cmd = [ '-convert', $ibx->{inboxdir}, "$tmpdir/v2" ]; my $env = { PI_DIR => "$tmpdir/.public-inbox" }; ok(run_script($cmd, $env, $rdr), 'convert to v2'); $err =~ s!\AW: $tmpdir/v1 not configured[^\n]+\n!!s; is($err, '', 'no errors or warnings from -convert'); $ibx->{version} = 2; $ibx->{inboxdir} = "$tmpdir/v2"; is_deeply([$ibx->mm->minmax], $before, 'min, max article numbers unchanged'); my @v2 = ($ibx->over->get_art(1), $ibx->over->get_art(2)); is_deeply(\@v2, \@old, 'v2 conversion times match'); xsys(qw(git clone -sq --mirror), "$tmpdir/v2/git/0.git", "$tmpdir/v2-clone/git/0.git") == 0 or die "clone: $?"; $cmd = [ '-init', '-Lbasic', '-V2', 'v2c', "$tmpdir/v2-clone", 'http://example.com/v2c', 'v2c@example.com' ]; ok(run_script($cmd, $env), 'init clone'); $cmd = [ qw(-index -j0), "$tmpdir/v2-clone" ]; sleep($delay) if $delay; ok(run_script($cmd, $env), 'index the clone'); $ibx->cleanup; $ibx->{inboxdir} = "$tmpdir/v2-clone"; my @v2c = ($ibx->over->get_art(1), $ibx->over->get_art(2)); is_deeply(\@v2c, \@old, 'v2 clone times match'); } done_testing(); public-inbox-1.6.1/t/nntp.t000066400000000000000000000102531377346120300155300ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Eml; require_mods(qw(DBD::SQLite Data::Dumper)); use_ok 'PublicInbox::NNTP'; use_ok 'PublicInbox::Inbox'; { sub quote_str { my (undef, $s) = split(/ = /, Data::Dumper::Dumper($_[0]), 2); $s =~ s/;\n//; $s; } sub wm_prepare { my ($wm) = @_; my $orig = qq{'$wm'}; PublicInbox::NNTP::wildmat2re($_[0]); my $new = quote_str($_[0]); ($orig, $new); } sub wildmat_like { my ($str, $wm) = @_; my ($orig, $new) = wm_prepare($wm); like($str, $wm, "$orig matches '$str' using $new"); } sub wildmat_unlike { my ($str, $wm, $check_ex) = @_; if ($check_ex) { use re 'eval'; my $re = qr/$wm/; like($str, $re, "normal re with $wm matches, but ..."); } my ($orig, $new) = wm_prepare($wm); unlike($str, $wm, "$orig does not match '$str' using $new"); } wildmat_like('[foo]', '[\[foo\]]'); wildmat_like('any', '*'); wildmat_unlike('bar.foo.bar', 'foo.*'); # no code execution wildmat_unlike('HI', '(?{"HI"})', 1); wildmat_unlike('HI', '[(?{"HI"})]', 1); } { sub ngpat_like { my ($str, $pat) = @_; my $orig = $pat; PublicInbox::NNTP::ngpat2re($pat); like($str, $pat, "'$orig' matches '$str' using $pat"); } ngpat_like('any', '*'); ngpat_like('a.s.r', 'a.t,a.s.r'); ngpat_like('a.s.r', 'a.t,a.s.*'); } { use POSIX qw(strftime); sub time_roundtrip { my ($date, $time, $gmt) = @_; my $m = join(' ', @_); my $ts = PublicInbox::NNTP::parse_time(@_); my @t = $gmt ? gmtime($ts) : localtime($ts); my ($d, $t) = split(' ', strftime('%Y%m%d %H%M%S', @t)); if (length($date) != 8) { # Net::NNTP uses YYMMDD :< $d =~ s/^[0-9]{2}//; } is_deeply([$d, $t], [$date, $time], "roundtripped: $m"); $ts; } my $x1 = time_roundtrip(qw(20141109 060606 GMT)); my $x2 = time_roundtrip(qw(141109 060606 GMT)); my $x3 = time_roundtrip(qw(930724 060606 GMT)); my $x5 = time_roundtrip(qw(710101 000000)); my $x6 = time_roundtrip(qw(720101 000000)); SKIP: { skip('YYMMDD test needs updating', 6) if (time > 0x7fffffff); # our world probably ends in 2038, but if not we'll try to # remember to update the test then is($x1, $x2, 'YYYYMMDD and YYMMDD parse identically'); is(strftime('%Y', gmtime($x3)), '1993', '930724 was in 1993'); my $epoch = time_roundtrip(qw(700101 000000 GMT)); is($epoch, 0, 'epoch parsed correctly'); ok($x6 > $x5, '1972 > 1971'); ok($x5 > $epoch, '1971 > Unix epoch'); } } { # test setting NNTP headers in HEAD and ARTICLE requests my $u = 'https://example.com/a/'; my $ng = PublicInbox::Inbox->new({ name => 'test', inboxdir => 'test.git', address => 'a@example.com', -primary_address => 'a@example.com', newsgroup => 'test', domain => 'example.com', url => [ '//example.com/a' ]}); is($ng->base_url, $u, 'URL expanded'); my $mid = 'a@b'; my $mime = PublicInbox::Eml->new("Message-ID: <$mid>\r\n\r\n"); my $hdr = $mime->header_obj; my $mock_self = { nntpd => { grouplist => [], servername => 'example.com' }, ng => $ng, }; my $smsg = { num => 1, mid => $mid, nntp => $mock_self, -ibx => $ng }; PublicInbox::NNTP::set_nntp_headers($hdr, $smsg); is_deeply([ $mime->header('Message-ID') ], [ "<$mid>" ], 'Message-ID unchanged'); is_deeply([ $mime->header('Archived-At') ], [ "<${u}a\@b/>" ], 'Archived-At: set'); is_deeply([ $mime->header('List-Archive') ], [ "<$u>" ], 'List-Archive: set'); is_deeply([ $mime->header('List-Post') ], [ '' ], 'List-Post: set'); is_deeply([ $mime->header('Newsgroups') ], [ 'test' ], 'Newsgroups: set'); is_deeply([ $mime->header('Xref') ], [ 'example.com test:1' ], 'Xref: set'); $ng->{-base_url} = 'http://mirror.example.com/m/'; $smsg->{num} = 2; PublicInbox::NNTP::set_nntp_headers($hdr, $smsg); is_deeply([ $mime->header('Message-ID') ], [ "<$mid>" ], 'Message-ID unchanged'); is_deeply([ $mime->header('Archived-At') ], [ "<${u}a\@b/>", '' ], 'Archived-At: appended'); is_deeply([ $mime->header('Xref') ], [ 'example.com test:2' ], 'Old Xref: clobbered'); } done_testing(); public-inbox-1.6.1/t/nntpd-tls.t000066400000000000000000000156641377346120300165070ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Socket qw(SOCK_STREAM IPPROTO_TCP SOL_SOCKET); use PublicInbox::TestCommon; # IO::Poll and Net::NNTP are part of the standard library, but # distros may split them off... require_mods(qw(DBD::SQLite IO::Socket::SSL Net::NNTP IO::Poll)); Net::NNTP->can('starttls') or plan skip_all => 'Net::NNTP does not support TLS'; my $cert = 'certs/server-cert.pem'; my $key = 'certs/server-key.pem'; unless (-r $key && -r $cert) { plan skip_all => "certs/ missing for $0, run $^X ./create-certs.perl in certs/"; } use_ok 'PublicInbox::TLS'; use_ok 'IO::Socket::SSL'; require PublicInbox::InboxWritable; require PublicInbox::Eml; require PublicInbox::SearchIdx; our $need_zlib; eval { require Compress::Raw::Zlib } or $need_zlib = 'Compress::Raw::Zlib missing'; my $version = 2; # v2 needs newer git require_git('2.6') if $version >= 2; my ($tmpdir, $for_destroy) = tmpdir(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $inboxdir = "$tmpdir"; my $pi_config = "$tmpdir/pi_config"; my $group = 'test-nntpd-tls'; my $addr = $group . '@example.com'; my $starttls = tcp_server(); my $nntps = tcp_server(); my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'nntpd-tls', version => $version, -primary_address => $addr, indexlevel => 'basic', }); $ibx = PublicInbox::InboxWritable->new($ibx, {nproc=>1}); $ibx->init_inbox(0); { open my $fh, '>', $pi_config or die "open: $!\n"; print $fh <importer(0); my $mime = eml_load 't/data/0001.patch'; ok($im->add($mime), 'message added'); $im->done; if ($version == 1) { my $s = PublicInbox::SearchIdx->new($ibx, 1); $s->index_sync; } } my $nntps_addr = $nntps->sockhost . ':' . $nntps->sockport; my $starttls_addr = $starttls->sockhost . ':' . $starttls->sockport; my $env = { PI_CONFIG => $pi_config }; my $td; for my $args ( [ "--cert=$cert", "--key=$key", "-lnntps://$nntps_addr", "-lnntp://$starttls_addr" ], ) { for ($out, $err) { open my $fh, '>', $_ or die "truncate: $!"; } my $cmd = [ '-nntpd', '-W0', @$args, "--stdout=$out", "--stderr=$err" ]; $td = start_script($cmd, $env, { 3 => $starttls, 4 => $nntps }); my %o = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); my $expect = { $group => [qw(1 1 n)] }; # start negotiating a slow TLS connection my $slow = tcp_connect($nntps, Blocking => 0); $slow = IO::Socket::SSL->start_SSL($slow, SSL_startHandshake => 0, %o); my $slow_done = $slow->connect_SSL; my @poll; if ($slow_done) { diag('W: connect_SSL early OK, slow client test invalid'); use PublicInbox::Syscall qw(EPOLLIN EPOLLOUT); @poll = (fileno($slow), EPOLLIN | EPOLLOUT); } else { @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } # we should call connect_SSL much later... # NNTPS my $c = Net::NNTP->new($nntps_addr, %o, SSL => 1); my $list = $c->list; is_deeply($list, $expect, 'NNTPS LIST works'); unlike(get_capa($c), qr/\bSTARTTLS\r\n/, 'STARTTLS not advertised for NNTPS'); is($c->command('QUIT')->response(), Net::Cmd::CMD_OK(), 'QUIT works'); is(0, sysread($c, my $buf, 1), 'got EOF after QUIT'); # STARTTLS $c = Net::NNTP->new($starttls_addr, %o); $list = $c->list; is_deeply($list, $expect, 'plain LIST works'); ok($c->starttls, 'STARTTLS succeeds'); is($c->code, 382, 'got 382 for STARTTLS'); $list = $c->list; is_deeply($list, $expect, 'LIST works after STARTTLS'); unlike(get_capa($c), qr/\bSTARTTLS\r\n/, 'STARTTLS not advertised after STARTTLS'); # Net::NNTP won't let us do dumb things, but we need to test # dumb things, so use Net::Cmd directly: my $n = $c->command('STARTTLS')->response(); is($n, Net::Cmd::CMD_ERROR(), 'error attempting STARTTLS again'); is($c->code, 502, '502 according to RFC 4642 sec#2.2.1'); # STARTTLS with bad hostname $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.invalid'; $c = Net::NNTP->new($starttls_addr, %o); like(get_capa($c), qr/\bSTARTTLS\r\n/, 'STARTTLS advertised'); $list = $c->list; is_deeply($list, $expect, 'plain LIST works again'); ok(!$c->starttls, 'STARTTLS fails with bad hostname'); $c = Net::NNTP->new($starttls_addr, %o); $list = $c->list; is_deeply($list, $expect, 'not broken after bad negotiation'); # NNTPS with bad hostname $c = Net::NNTP->new($nntps_addr, %o, SSL => 1); is($c, undef, 'NNTPS fails with bad hostname'); $o{SSL_hostname} = $o{SSL_verifycn_name} = 'server.local'; $c = Net::NNTP->new($nntps_addr, %o, SSL => 1); ok($c, 'NNTPS succeeds again with valid hostname'); # slow TLS connection did not block the other fast clients while # connecting, finish it off: until ($slow_done) { IO::Poll::_poll(-1, @poll); $slow_done = $slow->connect_SSL and last; @poll = (fileno($slow), PublicInbox::TLS::epollbit()); } $slow->blocking(1); ok(sysread($slow, my $greet, 4096) > 0, 'slow got greeting'); like($greet, qr/\A201 /, 'got expected greeting'); is(syswrite($slow, "QUIT\r\n"), 6, 'slow wrote QUIT'); ok(sysread($slow, my $end, 4096) > 0, 'got EOF'); is(sysread($slow, my $eof, 4096), 0, 'got EOF'); $slow = undef; SKIP: { skip 'TCP_DEFER_ACCEPT is Linux-only', 2 if $^O ne 'linux'; my $var = eval { Socket::TCP_DEFER_ACCEPT() } // 9; defined(my $x = getsockopt($nntps, IPPROTO_TCP, $var)) or die; ok(unpack('i', $x) > 0, 'TCP_DEFER_ACCEPT set on NNTPS'); defined($x = getsockopt($starttls, IPPROTO_TCP, $var)) or die; is(unpack('i', $x), 0, 'TCP_DEFER_ACCEPT is 0 on plain NNTP'); }; SKIP: { skip 'SO_ACCEPTFILTER is FreeBSD-only', 2 if $^O ne 'freebsd'; if (system('kldstat -m accf_data >/dev/null')) { skip 'accf_data not loaded? kldload accf_data', 2; } require PublicInbox::Daemon; my $var = PublicInbox::Daemon::SO_ACCEPTFILTER(); my $x = getsockopt($nntps, SOL_SOCKET, $var); like($x, qr/\Adataready\0+\z/, 'got dataready accf for NNTPS'); $x = getsockopt($starttls, IPPROTO_TCP, $var); is($x, undef, 'no BSD accept filter for plain NNTP'); }; $c = undef; $td->kill; $td->join; is($?, 0, 'no error in exited process'); my $eout = eval { open my $fh, '<', $err or die "open $err failed: $!"; local $/; <$fh>; }; unlike($eout, qr/wide/i, 'no Wide character warnings'); } done_testing(); sub get_capa { my ($sock) = @_; syswrite($sock, "CAPABILITIES\r\n"); my $capa = ''; do { my $r = sysread($sock, $capa, 8192, length($capa)); die "unexpected: $!" unless defined($r); die 'unexpected EOF' if $r == 0; } until $capa =~ /\.\r\n\z/; my $deflate_capa = qr/\r\nCOMPRESS DEFLATE\r\n/; if ($need_zlib) { unlike($capa, $deflate_capa, 'COMPRESS DEFLATE NOT advertised '.$need_zlib); } else { like($capa, $deflate_capa, 'COMPRESS DEFLATE advertised'); } $capa; } 1; public-inbox-1.6.1/t/nntpd-v2.t000066400000000000000000000002741377346120300162230ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ local $ENV{PI_TEST_VERSION} = 2; require './t/nntpd.t'; public-inbox-1.6.1/t/nntpd.t000066400000000000000000000350351377346120300157010ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Spawn qw(which); require_mods(qw(DBD::SQLite)); require PublicInbox::InboxWritable; use PublicInbox::Eml; use IO::Socket; use Socket qw(IPPROTO_TCP TCP_NODELAY); use Net::NNTP; use Sys::Hostname; use POSIX qw(_exit); use Digest::SHA; use_ok 'PublicInbox::Msgmap'; # t/nntpd-v2.t wraps this for v2 my $version = $ENV{PI_TEST_VERSION} || 1; require_git('2.6') if $version == 2; my $lsof = which('lsof'); my $fast_idle = eval { require Linux::Inotify2; 1 } // eval { require IO::KQueue; 1 }; my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/pi-home"; my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $inboxdir = "$tmpdir/main"; my $otherdir = "$tmpdir/other"; my $group = 'test-nntpd'; my $addr = $group . '@example.com'; my %opts; my $sock = tcp_server(); my $td; my $len; my $ibx = { inboxdir => $inboxdir, name => $group, version => $version, -primary_address => $addr, indexlevel => 'basic', }; $ibx = PublicInbox::Inbox->new($ibx); { local $ENV{HOME} = $home; my @cmd = ('-init', $group, $inboxdir, 'http://example.com/abc', $addr, "-V$version", '-Lbasic', '--newsgroup', $group); ok(run_script(\@cmd), "init $group"); @cmd = ('-init', 'xyz', $otherdir, 'http://example.com/xyz', 'e@example.com', "-V$version", qw(-Lbasic --newsgroup x.y.z)); ok(run_script(\@cmd), 'init xyz'); is(xsys([qw(git config -f), "$home/.public-inbox/config", qw(publicinboxmda.spamcheck none)]), 0, 'disable spamcheck'); open(my $fh, '<', 't/utf8.eml') or BAIL_OUT("open t/utf8.eml: $!"); my $env = { ORIGINAL_RECIPIENT => 'e@example.com' }; run_script([qw(-mda --no-precheck)], $env, { 0 => $fh }) or BAIL_OUT('-mda delivery'); my $len; $ibx = PublicInbox::InboxWritable->new($ibx); my $im = $ibx->importer(0); # ensure successful message delivery { my $mime = PublicInbox::Eml->new(< From: =?utf-8?Q?El=C3=A9anor?= Cc: $addr Message-Id: Content-Type: text/plain; charset=utf-8 Subject: Testing for =?utf-8?Q?El=C3=A9anor?= Date: Thu, 01 Jan 1970 06:06:06 +0000 Content-Transfer-Encoding: 8bit References: This is a test message for El\xc3\xa9anor EOF my $list_id = $addr; $list_id =~ s/@/./; $mime->header_set('List-Id', "<$list_id>"); my $str = $mime->as_string; $str =~ s/(?add($mime); $im->done; if ($version == 1) { ok(run_script(['-index', $ibx->{inboxdir}]), 'indexed v1'); } } ok($sock, 'sock created'); my $cmd = [ '-nntpd', '-W0', "--stdout=$out", "--stderr=$err" ]; $td = start_script($cmd, undef, { 3 => $sock }); my $host_port = $sock->sockhost . ':' . $sock->sockport; my $n = Net::NNTP->new($host_port); my $list = $n->list; ok(delete $list->{'x.y.z'}, 'deleted x.y.z group'); is_deeply($list, { $group => [ qw(1 1 n) ] }, 'LIST works'); is_deeply([$n->group($group)], [ qw(0 1 1), $group ], 'GROUP works'); is_deeply($n->listgroup($group), [1], 'listgroup OK'); # TODO: Net::NNTP::listgroup does not support range at the moment { my $expect = [ qw(Subject: From: Date: Message-ID: References: Bytes: Lines: Xref:full) ]; is_deeply($n->overview_fmt, $expect, 'RFC3977 8.4.2 compliant LIST OVERVIEW.FMT'); } SKIP: { $n->can('starttls') or skip('Net::NNTP too old to support STARTTLS', 2); require_mods('IO::Socket::SSL', 2); ok(!$n->starttls, 'STARTTLS fails when unconfigured'); is($n->code, 580, 'got 580 code on server w/o TLS'); }; my $mid = ''; my %xhdr = ( 'message-id' => $mid, subject => "Testing for El\xc3\xa9anor", 'date' => 'Thu, 01 Jan 1970 06:06:06 +0000', 'from' => "El\xc3\xa9anor ", 'to' => "El\xc3\xa9anor ", 'cc' => $addr, 'xref' => hostname . " $group:1", 'references' => '', ); my $s = tcp_connect($sock); sysread($s, my $buf, 4096); is($buf, "201 " . hostname . " ready - post via email\r\n", 'got greeting'); ok(syswrite($s, " \r\n"), 'wrote spaces'); ok(syswrite($s, "\r\n"), 'wrote nothing'); syswrite($s, "NEWGROUPS\t19990424 000000 \033GMT\007\r\n"); is(0, sysread($s, $buf, 4096), 'GOT EOF on cntrl'); $s = tcp_connect($sock); sysread($s, $buf, 4096); is($buf, "201 " . hostname . " ready - post via email\r\n", 'got greeting'); syswrite($s, "CAPABILITIES\r\n"); $buf = read_til_dot($s); like($buf, qr/\r\nVERSION 2\r\n/s, 'CAPABILITIES works'); unlike($buf, qr/STARTTLS/s, 'STARTTLS not advertised'); my $deflate_capa = qr/\r\nCOMPRESS DEFLATE\r\n/; if (eval { require Compress::Raw::Zlib }) { like($buf, $deflate_capa, 'DEFLATE advertised'); } else { unlike($buf, $deflate_capa, 'DEFLATE not advertised (Compress::Raw::Zlib missing)'); } syswrite($s, "NEWGROUPS 19990424 000000 GMT\r\n"); $buf = read_til_dot($s); like($buf, qr/\A231 list of /, 'newgroups OK'); while (my ($k, $v) = each %xhdr) { is_deeply($n->xhdr("$k $mid"), { $mid => $v }, "XHDR $k by message-id works"); is_deeply($n->xhdr("$k 1"), { 1 => $v }, "$k by article number works"); is_deeply($n->xhdr("$k 1-"), { 1 => $v }, "$k by article range works"); $buf = ''; syswrite($s, "HDR $k $mid\r\n"); $buf = read_til_dot($s); my @r = split("\r\n", $buf); like($r[0], qr/\A225 /, '225 response for HDR'); is($r[1], "0 $v", 'got expected response for HDR'); } { my $nogroup = Net::NNTP->new($host_port); while (my ($k, $v) = each %xhdr) { is_deeply($nogroup->xhdr("$k $mid"), { $mid => $v }, "$k by message-id works without group"); } } is_deeply($n->xover('1-'), { '1' => ["Testing for El\xc3\xa9anor", "El\xc3\xa9anor ", 'Thu, 01 Jan 1970 06:06:06 +0000', '', '', $len, '1', 'Xref: '. hostname . ' test-nntpd:1'] }, "XOVER range works"); is_deeply($n->xover('1'), { '1' => ["Testing for El\xc3\xa9anor", "El\xc3\xa9anor ", 'Thu, 01 Jan 1970 06:06:06 +0000', '', '', $len, '1', 'Xref: '. hostname . ' test-nntpd:1'] }, "XOVER by article works"); is_deeply($n->head(1), $n->head(''), 'HEAD OK'); is_deeply($n->body(1), $n->body(''), 'BODY OK'); is_deeply($n->nntpstat(1), '', 'STAT'); is($n->body(1)->[0], "This is a test message for El\xc3\xa9anor\n", 'body really matches'); my $art = $n->article(1); is(ref($art), 'ARRAY', 'got array for ARTICLE'); is_deeply($art, $n->article(''), 'ARTICLE OK'); is($n->article(999), undef, 'non-existent num'); is($n->article(''), undef, 'non-existent mid'); { syswrite($s, "OVER $mid\r\n"); $buf = read_til_dot($s); my @r = split("\r\n", $buf); like($r[0], qr/^224 /, 'got 224 response for OVER'); is($r[1], "0\tTesting for El\xc3\xa9anor\t" . "El\xc3\xa9anor \t" . "Thu, 01 Jan 1970 06:06:06 +0000\t" . "$mid\t\t$len\t1" . "\tXref: " . hostname . " test-nntpd:0", 'OVER by Message-ID works'); is($r[2], '.', 'correctly terminated response'); } is_deeply($n->xhdr(qw(Cc 1-)), { 1 => 'test-nntpd@example.com' }, 'XHDR Cc 1- works'); is_deeply($n->xhdr(qw(References 1-)), { 1 => '' }, 'XHDR References 1- works)'); is_deeply($n->xhdr(qw(list-id 1-)), {}, 'XHDR on invalid header returns empty'); my $mids = $n->newnews(0, $group); is_deeply($mids, [''], 'NEWNEWS works'); { my $t0 = time; my $date = $n->date; my $t1 = time; ok($date >= $t0, 'valid date after start'); ok($date <= $t1, 'valid date before stop'); } if ('leafnode interop') { my $for_leafnode = PublicInbox::Eml->new(<<""); From: longheader\@example.com To: $addr Subject: none Date: Fri, 02 Oct 1993 00:00:00 +0000 my $long_hdr = 'for-leafnode-'.('y'x200).'@example.com'; $for_leafnode->header_set('Message-ID', "<$long_hdr>"); $im->add($for_leafnode); $im->done; if ($version == 1) { ok(run_script(['-index', $ibx->{inboxdir}]), 'indexed v1'); } my $hdr = $n->head("<$long_hdr>"); my $expect = qr/\AMessage-ID: /i . qr/\Q<$long_hdr>\E/; ok(scalar(grep(/$expect/, @$hdr)), 'Message-ID not folded'); ok(scalar(grep(/^Path:/, @$hdr)), 'Path: header found'); # it's possible for v2 messages to have 2+ Message-IDs, # but leafnode can't handle it if ($version != 1) { my @mids = ("<$long_hdr>", '<2mid@wtf>'); $for_leafnode->header_set('Message-ID', @mids); $for_leafnode->body_set('not-a-dupe'); my $warn = ''; local $SIG{__WARN__} = sub { $warn .= join('', @_) }; $im->add($for_leafnode); $im->done; like($warn, qr/reused/, 'warned for reused MID'); $hdr = $n->head('<2mid@wtf>'); my @hmids = grep(/\AMessage-ID: /i, @$hdr); is(scalar(@hmids), 1, 'Single Message-ID in header'); like($hmids[0], qr/: <2mid\@wtf>/, 'got expected mid'); } } ok($n->article(''), 'cross newsgroup ARTICLE by Message-ID'); ok($n->body(''), 'cross newsgroup BODY by Message-ID'); ok($n->head(''), 'cross newsgroup HEAD by Message-ID'); is($n->xpath(''), 'x.y.z/1', 'xpath hit'); is($n->xpath(''), undef, 'xpath miss'); # pipelined requests: { my $nreq = 90; my $nart = 2; syswrite($s, "GROUP $group\r\n"); my $res = <$s>; my $rdr = fork; if ($rdr == 0) { for (1..$nreq) { <$s> =~ /\A224 / or _exit(1); <$s> =~ /\A1/ or _exit(2); <$s> eq ".\r\n" or _exit(3); } my %sums; for (1..$nart) { <$s> =~ /\A220 / or _exit(4); my $dig = Digest::SHA->new(1); while (my $l = <$s>) { last if $l eq ".\r\n"; $dig->add($l); } $dig = $dig->hexdigest; $sums{$dig}++; } if ($nart) { scalar(keys(%sums)) == 1 or _exit(5); (values(%sums))[0] == $nart or _exit(6); } _exit(0); } for (1..$nreq) { syswrite($s, "XOVER 1\r\n"); } syswrite($s, "ARTICLE 1\r\n" x $nart); is($rdr, waitpid($rdr, 0), 'reader done'); is($? >> 8, 0, 'no errors'); } my $noerr = { 2 => \(my $null) }; SKIP: { if ($INC{'Search/Xapian.pm'} && ($ENV{TEST_RUN_MODE}//2)) { skip 'Search/Xapian.pm pre-loaded (by t/run.perl?)', 1; } $lsof or skip 'lsof missing', 1; my @of = xqx([$lsof, '-p', $td->{pid}], undef, $noerr); skip('lsof broken', 1) if (!scalar(@of) || $?); my @xap = grep m!Search/Xapian!, @of; is_deeply(\@xap, [], 'Xapian not loaded in nntpd'); } # -compact requires Xapian SKIP: { require_mods('Search::Xapian', 2); have_xapian_compact or skip 'xapian-compact missing', 2; is(xsys(qw(git config), "--file=$home/.public-inbox/config", "publicinbox.$group.indexlevel", 'medium'), 0, 'upgraded indexlevel'); my $ex = eml_load('t/data/0001.patch'); is($n->article($ex->header('Message-ID')), undef, 'article did not exist'); $im->add($ex); $im->done; { my $f = $ibx->mm->{dbh}->sqlite_db_filename; my $tmp = "$tmpdir/tmp.sqlite3"; $ibx->mm->{dbh}->sqlite_backup_to_file($tmp); delete $ibx->{mm}; rename($tmp, $f) or BAIL_OUT "rename($tmp, $f): $!"; } ok(run_script([qw(-index -c -j0 --reindex), $ibx->{inboxdir}], undef, $noerr), '-compacted'); select(undef, undef, undef, $fast_idle ? 0.1 : 2.1); $art = $n->article($ex->header('Message-ID')); ok($art, 'new article retrieved after compact'); $lsof or skip 'lsof missing', 1; ($^O =~ /\A(?:linux)\z/) or skip "lsof /(deleted)/ check untested on $^O", 1; my @of = xqx([$lsof, '-p', $td->{pid}], undef, $noerr); is(scalar(grep(/\(deleted\)/, @of)), 0, 'no deleted files'); }; SKIP: { test_watch($tmpdir, $sock, $group) }; { setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1); syswrite($s, 'HDR List-id 1-'); select(undef, undef, undef, 0.15); ok($td->kill, 'killed nntpd'); select(undef, undef, undef, 0.15); syswrite($s, "\r\n"); $buf = ''; do { sysread($s, $buf, 4096, length($buf)); } until ($buf =~ /\r\n\z/); my @r = split("\r\n", $buf); like($r[0], qr/^5\d\d /, 'got 5xx response for unoptimized HDR'); is(scalar @r, 1, 'only one response line'); } $n = $s = undef; $td->join; is($?, 0, 'no error in exited process'); my $eout = do { open my $fh, '<', $err or die "open $err failed: $!"; local $/; <$fh>; }; unlike($eout, qr/wide/i, 'no Wide character warnings'); } $td = undef; done_testing(); sub read_til_dot { my ($s) = @_; my $buf = ''; do { sysread($s, $buf, 4096, length($buf)); } until ($buf =~ /\r\n\.\r\n\z/); $buf; } sub test_watch { my ($tmpdir, $sock, $group) = @_; use_ok 'PublicInbox::Watch'; use_ok 'PublicInbox::InboxIdle'; use_ok 'PublicInbox::Config'; require_git('1.8.5', 1) or skip('git 1.8.5+ needed for --urlmatch', 4); my $old_env = { HOME => $ENV{HOME} }; my $home = "$tmpdir/watch_home"; mkdir $home or BAIL_OUT $!; mkdir "$home/.public-inbox" or BAIL_OUT $!; local $ENV{HOME} = $home; my $name = 'watchnntp'; my $addr = "i1\@example.com"; my $url = "http://example.com/i1"; my $inboxdir = "$tmpdir/watchnntp"; my $cmd = ['-init', '-V1', '-Lbasic', $name, $inboxdir, $url, $addr]; my ($ihost, $iport) = ($sock->sockhost, $sock->sockport); my $nntpurl = "nntp://$ihost:$iport/$group"; run_script($cmd) or BAIL_OUT("init $name"); xsys(qw(git config), "--file=$home/.public-inbox/config", "publicinbox.$name.watch", $nntpurl) == 0 or BAIL_OUT "git config $?"; # try again with polling xsys(qw(git config), "--file=$home/.public-inbox/config", 'nntp.PollInterval', 0.11) == 0 or BAIL_OUT "git config $?"; my $cfg = PublicInbox::Config->new; PublicInbox::DS->Reset; my $ii = PublicInbox::InboxIdle->new($cfg); my $cb = sub { PublicInbox::DS->SetPostLoopCallback(sub {}) }; my $obj = bless \$cb, 'PublicInbox::TestCommon::InboxWakeup'; $cfg->each_inbox(sub { $_[0]->subscribe_unlock('ident', $obj) }); my $watcherr = "$tmpdir/watcherr"; open my $err_wr, '>', $watcherr or BAIL_OUT $!; open my $err, '<', $watcherr or BAIL_OUT $!; my $w = start_script(['-watch'], undef, { 2 => $err_wr }); diag 'waiting for initial fetch...'; PublicInbox::DS->EventLoop; diag 'inbox unlocked on initial fetch'; $w->kill; $w->join; is($?, 0, 'no error in exited -watch process'); $cfg->each_inbox(sub { shift->unsubscribe_unlock('ident') }); $ii->close; PublicInbox::DS->Reset; my @err = grep(!/^I:/, <$err>); is(@err, 0, 'no warnings/errors from -watch'.join(' ', @err)); my @ls = xqx(['git', "--git-dir=$inboxdir", qw(ls-tree -r HEAD)]); isnt(scalar(@ls), 0, 'imported something'); } 1; public-inbox-1.6.1/t/nodatacow.t000066400000000000000000000032711377346120300165320ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use File::Temp 0.19; use PublicInbox::TestCommon; use PublicInbox::Spawn qw(which); use_ok 'PublicInbox::NDC_PP'; SKIP: { my $nr = 2; skip 'test is Linux-only', $nr if $^O ne 'linux'; my $dir = $ENV{BTRFS_TESTDIR}; skip 'BTRFS_TESTDIR not defined', $nr unless defined $dir; skip 'chattr(1) not installed', $nr unless which('chattr'); my $lsattr = which('lsattr') or skip 'lsattr(1) not installed', $nr; my $tmp = File::Temp->newdir('nodatacow-XXXXX', DIR => $dir); my $dn = $tmp->dirname; my $name = "$dn/pp.f"; open my $fh, '>', $name or BAIL_OUT "open($name): $!"; my $pp_sub = \&PublicInbox::NDC_PP::nodatacow_fd; $pp_sub->(fileno($fh)); my $res = xqx([$lsattr, $name]); like($res, qr/C.*\Q$name\E/, "`C' attribute set on fd with pure Perl"); $name = "$dn/pp.d"; mkdir($name) or BAIL_OUT "mkdir($name) $!"; PublicInbox::NDC_PP::nodatacow_dir($name); $res = xqx([$lsattr, '-d', $name]); like($res, qr/C.*\Q$name\E/, "`C' attribute set on dir with pure Perl"); $name = "$dn/ic.f"; my $ic_sub = \&PublicInbox::Spawn::nodatacow_fd; $pp_sub == $ic_sub and skip 'Inline::C or Linux kernel headers missing', 2; open $fh, '>', $name or BAIL_OUT "open($name): $!"; $ic_sub->(fileno($fh)); $res = xqx([$lsattr, $name]); like($res, qr/C.*\Q$name\E/, "`C' attribute set on fd with Inline::C"); $name = "$dn/ic.d"; mkdir($name) or BAIL_OUT "mkdir($name) $!"; PublicInbox::Spawn::nodatacow_dir($name); $res = xqx([$lsattr, '-d', $name]); like($res, qr/C.*\Q$name\E/, "`C' attribute set on dir with Inline::C"); }; done_testing; public-inbox-1.6.1/t/nulsubject.t000066400000000000000000000014111377346120300167230ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use_ok 'PublicInbox::Import'; use_ok 'PublicInbox::Git'; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; { my $git = PublicInbox::Git->new($git_dir); my $im = PublicInbox::Import->new($git, 'testbox', 'test@example'); $im->init_bare; $im->add(PublicInbox::Eml->new(<<'EOF')); From: a@example.com To: b@example.com Subject: A subject line with a null =?iso-8859-1?q?=00?= see! Message-ID: hello world EOF $im->done; is(xsys(qw(git --git-dir), $git_dir, 'fsck', '--strict'), 0, 'git fsck ok'); } done_testing(); 1; public-inbox-1.6.1/t/over.t000066400000000000000000000052601377346120300155260ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Compress::Zlib qw(compress); use PublicInbox::TestCommon; require_mods('DBD::SQLite'); use_ok 'PublicInbox::OverIdx'; my ($tmpdir, $for_destroy) = tmpdir(); my $over = PublicInbox::OverIdx->new("$tmpdir/over.sqlite3"); $over->dbh; # open file is($over->max, 0, 'max is zero on new DB (scalar context)'); is_deeply([$over->max], [0], 'max is zero on new DB (list context)'); my $x = $over->next_tid; is(int($x), $x, 'integer tid'); my $y = $over->next_tid; is($y, $x+1, 'tid increases'); $x = $over->sid('hello-world'); is(int($x), $x, 'integer sid'); $y = $over->sid('hello-WORLD'); is($y, $x+1, 'sid increases'); is($over->sid('hello-world'), $x, 'idempotent'); ok(!$over->{dbh}->{ReadOnly}, 'OverIdx is not ReadOnly'); $over->dbh_close; $over = PublicInbox::Over->new("$tmpdir/over.sqlite3"); ok($over->dbh->{ReadOnly}, 'Over is ReadOnly'); $over = PublicInbox::OverIdx->new("$tmpdir/over.sqlite3"); $over->dbh; is($over->sid('hello-world'), $x, 'idempotent across reopen'); $over->each_by_mid('never', sub { fail('should not be called') }); $x = $over->resolve_mid_to_tid('never'); is(int($x), $x, 'integer tid for ghost'); $y = $over->resolve_mid_to_tid('NEVAR'); is($y, $x + 1, 'integer tid for ghost increases'); my $ddd = compress(''); my $msg = sub { { ts => 0, ds => 0, num => $_[0] } }; foreach my $s ('', undef) { $over->add_over($msg->(98), [ 'a' ], [], $s, $ddd); $over->add_over($msg->(99), [ 'b' ], [], $s, $ddd); my $msgs = [ map { $_->{num} } @{$over->get_thread('a')} ]; is_deeply([98], $msgs, 'messages not linked by empty subject'); } $over->add_over($msg->(98), [ 'a' ], [], 's', $ddd); $over->add_over($msg->(99), [ 'b' ], [], 's', $ddd); foreach my $mid (qw(a b)) { my $msgs = [ map { $_->{num} } @{$over->get_thread('a')} ]; is_deeply([98, 99], $msgs, 'linked messages by subject'); } $over->add_over($msg->(98), [ 'a' ], [], 's', $ddd); $over->add_over($msg->(99), [ 'b' ], ['a'], 'diff', $ddd); foreach my $mid (qw(a b)) { my $msgs = [ map { $_->{num} } @{$over->get_thread($mid)} ]; is_deeply([98, 99], $msgs, "linked messages by Message-ID: <$mid>"); } isnt($over->max, 0, 'max is non-zero'); $over->rollback_lazy; # L my $v = eval 'v'.$over->{dbh}->{sqlite_version}; SKIP: { skip("no WAL in SQLite version $v < 3.7.0", 1) if $v lt v3.7.0; $over->{dbh}->do('PRAGMA journal_mode = WAL'); $over = PublicInbox::OverIdx->new("$tmpdir/over.sqlite3"); is($over->dbh->selectrow_array('PRAGMA journal_mode'), 'wal', 'WAL journal_mode not clobbered if manually set'); } done_testing(); public-inbox-1.6.1/t/plack-2-txt-bodies.eml000066400000000000000000000004171377346120300203750ustar00rootroot00000000000000From: a@example.com Subject: blargh Message-ID: In-Reply-To: MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain hi --b Content-Type: text/plain bye --b-- public-inbox-1.6.1/t/plack-attached-patch.eml000066400000000000000000000005321377346120300210240ustar00rootroot00000000000000From: a@example.com Subject: [PATCH] asdf Message-ID: MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain hi, see attached --b Content-Type: text/plain Content-Disposition: inline; filename="foo&.patch" --- a/file +++ b/file @@ -49, 7 +49,34 @@ --b-- public-inbox-1.6.1/t/plack-qp.eml000066400000000000000000000002421377346120300165700ustar00rootroot00000000000000From: qp@example.com Subject: QP Message-ID: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain hi =3D bye= public-inbox-1.6.1/t/plack.t000066400000000000000000000213641377346120300156500ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; my $psgi = "./examples/public-inbox.psgi"; my ($tmpdir, $for_destroy) = tmpdir(); my $pi_config = "$tmpdir/config"; my $inboxdir = "$tmpdir/main.git"; my $addr = 'test-public@example.com'; my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape); require_mods(@mods); use_ok 'PublicInbox::Import'; use_ok 'PublicInbox::Git'; my @ls; foreach my $mod (@mods) { use_ok $mod; } local $ENV{PI_CONFIG} = $pi_config; ok(-f $psgi, "psgi example file found"); my $pfx = 'http://example.com/test'; ok(run_script(['-init', 'test', $inboxdir, "$pfx/", $addr]), 'initialized repo'); PublicInbox::Import::run_die([qw(git config -f), $pi_config, 'publicinbox.test.newsgroup', 'inbox.test']); open my $fh, '>', "$inboxdir/description" or die "open: $!\n"; print $fh "test for public-inbox\n"; close $fh or die "close: $!\n"; my $app = require $psgi; my $git = PublicInbox::Git->new($inboxdir); my $im = PublicInbox::Import->new($git, 'test', $addr); # ensure successful message delivery { my $mime = PublicInbox::Eml->new(< To: You Cc: $addr Message-Id: Subject: hihi Date: Fri, 02 Oct 1993 00:00:00 +0000 > quoted text zzzzzz EOF $im->add($mime); $im->done; my $rev = $git->qx(qw(rev-list HEAD)); like($rev, qr/\A[a-f0-9]{40,}/, "good revision committed"); @ls = $git->qx(qw(ls-tree -r --name-only HEAD)); chomp @ls; # multipart with two text bodies $mime = eml_load 't/plack-2-txt-bodies.eml'; $im->add($mime); # multipart with attached patch + filename $mime = eml_load 't/plack-attached-patch.eml'; $im->add($mime); # multipart collapsed to single quoted-printable text/plain $mime = eml_load 't/plack-qp.eml'; like($mime->body_raw, qr/hi =3D bye=/, 'our test used QP correctly'); $im->add($mime); my $crlf = < To: $addr Message-Id: Subject: carriage return in long subject Date: Fri, 02 Oct 1993 00:00:00 +0000 :( EOF $crlf =~ s/\n/\r\n/sg; $im->add(PublicInbox::Eml->new($crlf)); $im->done; } test_psgi($app, sub { my ($cb) = @_; foreach my $u (qw(robots.txt favicon.ico .well-known/foo)) { my $res = $cb->(GET("http://example.com/$u")); is($res->code, 404, "$u is missing"); } }); test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET('http://example.com/test/crlf@example.com/')); is($res->code, 200, 'retrieved CRLF as HTML'); unlike($res->content, qr/\r/, 'no CR in HTML'); $res = $cb->(GET('http://example.com/test/crlf@example.com/raw')); is($res->code, 200, 'retrieved CRLF raw'); like($res->content, qr/\r/, 'CR preserved in raw message'); }); # redirect with newsgroup test_psgi($app, sub { my ($cb) = @_; my $from = 'http://example.com/inbox.test'; my $to = 'http://example.com/test/'; my $res = $cb->(GET($from)); is($res->code, 301, 'newsgroup name is permanent redirect'); is($to, $res->header('Location'), 'redirect location matches'); $from .= '/'; is($res->code, 301, 'newsgroup name/ is permanent redirect'); is($to, $res->header('Location'), 'redirect location matches'); }); # redirect with trailing / test_psgi($app, sub { my ($cb) = @_; my $from = 'http://example.com/test'; my $to = "$from/"; my $res = $cb->(GET($from)); is(301, $res->code, 'is permanent redirect'); is($to, $res->header('Location'), 'redirect location matches with trailing slash'); }); foreach my $t (qw(t T)) { test_psgi($app, sub { my ($cb) = @_; my $u = $pfx . "/blah\@example.com/$t"; my $res = $cb->(GET($u)); is(301, $res->code, "redirect for missing /"); my $location = $res->header('Location'); like($location, qr!/\Q$t\E/#u\z!, 'redirected with missing /'); }); } foreach my $t (qw(f)) { test_psgi($app, sub { my ($cb) = @_; my $u = $pfx . "/blah\@example.com/$t"; my $res = $cb->(GET($u)); is(301, $res->code, "redirect for legacy /f"); my $location = $res->header('Location'); like($location, qr!/blah\@example\.com/\z!, 'redirected with missing /'); }); } test_psgi($app, sub { my ($cb) = @_; my $atomurl = 'http://example.com/test/new.atom'; my $res = $cb->(GET('http://example.com/test/new.html')); is(200, $res->code, 'success response received'); like($res->content, qr!href="new\.atom"!, 'atom URL generated'); like($res->content, qr!href="blah\@example\.com/"!, 'index generated'); like($res->content, qr!1993-10-02!, 'date set'); }); test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET($pfx . '/atom.xml')); is(200, $res->code, 'success response received for atom'); my $body = $res->content; like($body, qr!link\s+href="\Q$pfx\E/blah\@example\.com/"!s, 'atom feed generated correct URL'); like($body, qr/test for public-inbox/, "set title in XML feed"); like($body, qr/zzzzzz/, 'body included'); $res = $cb->(GET($pfx . '/description')); like($res->content, qr/test for public-inbox/, 'got description'); }); test_psgi($app, sub { my ($cb) = @_; my $path = '/blah@example.com/'; my $res = $cb->(GET($pfx . $path)); is(200, $res->code, "success for $path"); my $html = $res->content; like($html, qr!<title>hihi - Me!, 'HTML returned'); like($html, qr!(GET($pfx . $path)); is(301, $res->code, "redirect for $path"); my $location = $res->header('Location'); like($location, qr!/blah\@example\.com/\z!, '/$MESSAGE_ID/f/ redirected to /$MESSAGE_ID/'); $res = $cb->(GET($pfx . '/multipart@example.com/')); like($res->content, qr/hi\n.*-- Attachment #2.*\nbye\n/s, 'multipart split'); $res = $cb->(GET($pfx . '/patch@example.com/')); $html = $res->content; like($html, qr!see attached!, 'original body'); like($html, qr!.*Attachment #2: foo&(?:amp|#38);\.patch --!, 'parts split with filename'); $res = $cb->(GET($pfx . '/qp@example.com/')); like($res->content, qr/\bhi = bye\b/, "HTML output decoded QP"); }); test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET($pfx . '/blah@example.com/raw')); is(200, $res->code, 'success response received for /*/raw'); like($res->content, qr!^From !sm, "mbox returned"); $res = $cb->(GET($pfx . '/blah@example.com/t.mbox.gz')); is(501, $res->code, '501 when overview missing'); like($res->content, qr!\bOverview\b!, 'overview omission noted'); }); # legacy redirects foreach my $t (qw(m f)) { test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET($pfx . "/$t/blah\@example.com.txt")); is(301, $res->code, "redirect for old $t .txt link"); my $location = $res->header('Location'); like($location, qr!/blah\@example\.com/raw\z!, ".txt redirected to /raw"); }); } my %umap = ( 'm' => '', 'f' => '', 't' => 't/', ); while (my ($t, $e) = each %umap) { test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET($pfx . "/$t/blah\@example.com.html")); is(301, $res->code, "redirect for old $t .html link"); my $location = $res->header('Location'); like($location, qr!/blah\@example\.com/$e(?:#u)?\z!, ".html redirected to new location"); }); } foreach my $sfx (qw(mbox mbox.gz)) { test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET($pfx . "/t/blah\@example.com.$sfx")); is(301, $res->code, 'redirect for old thread link'); my $location = $res->header('Location'); like($location, qr!/blah\@example\.com/t\.mbox(?:\.gz)?\z!, "$sfx redirected to /mbox.gz"); }); } test_psgi($app, sub { my ($cb) = @_; # for a while, we used to support /$INBOX/$X40/ # when we "compressed" long Message-IDs to SHA-1 # Now we're stuck supporting them forever :< foreach my $path (@ls) { $path =~ tr!/!!d; my $from = "http://example.com/test/$path/"; my $res = $cb->(GET($from)); is(301, $res->code, 'is permanent redirect'); like($res->header('Location'), qr!/test/blah\@example\.com/!, 'redirect from x40 MIDs works'); } }); # dumb HTTP clone/fetch support test_psgi($app, sub { my ($cb) = @_; my $path = '/test/info/refs'; my $req = HTTP::Request->new('GET' => $path); my $res = $cb->($req); is(200, $res->code, 'refs readable'); my $orig = $res->content; $req->header('Range', 'bytes=5-10'); $res = $cb->($req); is(206, $res->code, 'got partial response'); is($res->content, substr($orig, 5, 6), 'partial body OK'); $req->header('Range', 'bytes=5-'); $res = $cb->($req); is(206, $res->code, 'got partial another response'); is($res->content, substr($orig, 5), 'partial body OK past end'); }); # things which should fail test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(PUT('/')); is(405, $res->code, 'no PUT to / allowed'); $res = $cb->(PUT('/test/')); is(405, $res->code, 'no PUT /$INBOX allowed'); # TODO # $res = $cb->(GET('/')); }); done_testing(); public-inbox-1.6.1/t/precheck.t000066400000000000000000000036201377346120300163350ustar00rootroot00000000000000# Copyright (C) 2014-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::MDA; sub do_checks { my ($s) = @_; my $recipient = 'foo@example.com'; ok(!PublicInbox::MDA->precheck($s, $recipient), "wrong ORIGINAL_RECIPIENT rejected"); $recipient = 'b@example.com'; ok(PublicInbox::MDA->precheck($s, $recipient), "ORIGINAL_RECIPIENT in To: is OK"); $recipient = 'c@example.com'; ok(PublicInbox::MDA->precheck($s, $recipient), "ORIGINAL_RECIPIENT in Cc: is OK"); $recipient = [ 'c@example.com', 'd@example.com' ]; ok(PublicInbox::MDA->precheck($s, $recipient), "alias list is OK"); } { my $s = PublicInbox::Eml->new(<<'EOF'); From: abc@example.com To: abc@example.com Cc: c@example.com, another-list@example.com Content-Type: text/plain Subject: list is fine Message-ID: Date: Wed, 09 Apr 2014 01:28:34 +0000 hello world EOF my $addr = [ 'c@example.com', 'd@example.com' ]; ok(PublicInbox::MDA->precheck($s, $addr), 'Cc list is OK'); } { do_checks(PublicInbox::Eml->new(<<'EOF')); From: a@example.com To: b@example.com Cc: c@example.com Content-Type: text/plain Subject: this is a subject Message-ID: Date: Wed, 09 Apr 2014 01:28:34 +0000 hello world EOF } { do_checks(PublicInbox::Eml->new(<<'EOF')); From: a@example.com To: b+plus@example.com Cc: John Doe Content-Type: text/plain Subject: this is a subject Message-ID: Date: Wed, 09 Apr 2014 01:28:34 +0000 hello world EOF } { my $recipient = 'b@example.com'; my $s = PublicInbox::Eml->new(<<'EOF'); To: b@example.com Cc: c@example.com Content-Type: text/plain Subject: this is a subject Message-ID: Date: Wed, 09 Apr 2014 01:28:34 +0000 hello world EOF ok(!PublicInbox::MDA->precheck($s, $recipient), "missing From: is rejected"); } done_testing(); public-inbox-1.6.1/t/psgi_attach.eml000066400000000000000000000011671377346120300173550ustar00rootroot00000000000000From: root@z Message-Id: Subject: hi MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain Content-Transfer-Encoding: quoted-printable Content-Disposition: inline; filename="queue-pee" abcdef=3Dg =3D=3Dblah --b Content-Type: appication/octet-stream Content-Transfer-Encoding: base64 Content-Disposition: inline; filename="bayce-sixty-four" YjY03q2+7wo= --b Content-Type: text/plain Content-Disposition: inline; filename="noop.txt" plain text pass through --b Content-Type: text/plain Content-Disposition: inline; filename=".dotfile" dotfile --b-- public-inbox-1.6.1/t/psgi_attach.t000066400000000000000000000103371377346120300170420ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = "$tmpdir/main.git"; my $addr = 'test-public@example.com'; my @mods = qw(HTTP::Request::Common Plack::Builder Plack::Test URI::Escape); require_mods(@mods); use_ok $_ foreach @mods; use_ok 'PublicInbox::WWW'; use PublicInbox::Import; use PublicInbox::Git; use PublicInbox::Config; use PublicInbox::Eml; use_ok 'PublicInbox::WwwAttach'; my $cfgpath = "$tmpdir/config"; open my $fh, '>', $cfgpath or BAIL_OUT $!; print $fh <new($cfgpath); my $git = PublicInbox::Git->new($inboxdir); my $im = PublicInbox::Import->new($git, 'test', $addr); $im->init_bare; my $qp = "abcdef=g\n==blah\n"; my $b64 = "b64\xde\xad\xbe\xef\n"; my $txt = "plain\ntext\npass\nthrough\n"; my $dot = "dotfile\n"; $im->add(eml_load('t/psgi_attach.eml')); $im->add(eml_load('t/data/message_embed.eml')); $im->done; my $www = PublicInbox::WWW->new($config); my $client = sub { my ($cb) = @_; my $res; $res = $cb->(GET('/test/Z%40B/')); my @href = ($res->content =~ /^href="([^"]+)"/gms); @href = grep(/\A[\d\.]+-/, @href); is_deeply([qw(1-queue-pee 2-bayce-sixty-four 3-noop.txt 4-a.txt)], \@href, 'attachment links generated'); $res = $cb->(GET('/test/Z%40B/1-queue-pee')); my $qp_res = $res->content; ok(length($qp_res) >= length($qp), 'QP length is close'); like($qp_res, qr/\n\z/s, 'trailing newline exists'); # is(index($qp_res, $qp), 0, 'QP trailing newline is there'); $qp_res =~ s/\r\n/\n/g; is(index($qp_res, $qp), 0, 'QP trailing newline is there'); $res = $cb->(GET('/test/Z%40B/2-base-sixty-four')); is(quotemeta($res->content), quotemeta($b64), 'Base64 matches exactly'); $res = $cb->(GET('/test/Z%40B/3-noop.txt')); my $txt_res = $res->content; ok(length($txt_res) >= length($txt), 'plain text almost matches'); like($txt_res, qr/\n\z/s, 'trailing newline exists in text'); is(index($txt_res, $txt), 0, 'plain text not truncated'); $res = $cb->(GET('/test/Z%40B/4-a.txt')); my $dot_res = $res->content; ok(length($dot_res) >= length($dot), 'dot almost matches'); $res = $cb->(GET('/test/Z%40B/4-any-filename.txt')); is($res->content, $dot_res, 'user-specified filename is OK'); my $mid = '20200418222508.GA13918@dcvr'; my $irt = '20200418222020.GA2745@dcvr'; $res = $cb->(GET("/test/$mid/")); unlike($res->content, qr! multipart/mixed, Size: 0 bytes!, '0-byte download not offered'); like($res->content, qr/\bhref="2-embed2x\.eml"/s, 'href to message/rfc822 attachment visible'); like($res->content, qr/\bhref="2\.1\.2-test\.eml"/s, 'href to nested message/rfc822 attachment visible'); $res = $cb->(GET("/test/$mid/2-embed2x.eml")); my $eml = PublicInbox::Eml->new(\($res->content)); is_deeply([ $eml->header_raw('Message-ID') ], [ "<$irt>" ], 'got attached eml'); my @subs = $eml->subparts; is(scalar(@subs), 2, 'attachment had 2 subparts'); like($subs[0]->body_str, qr/^testing embedded message\n*\z/sm, '1st attachment is as expected'); is($subs[1]->header('Content-Type'), 'message/rfc822', '2nd attachment is as expected'); $res = $cb->(GET("/test/$mid/2.1.2-test.eml")); $eml = PublicInbox::Eml->new(\($res->content)); is_deeply([ $eml->header_raw('Message-ID') ], [ '<20200418214114.7575-1-e@yhbt.net>' ], 'nested eml retrieved'); }; test_psgi(sub { $www->call(@_) }, $client); SKIP: { diag 'testing with index indexed'; require_mods('DBD::SQLite', 19); my $env = { PI_CONFIG => $cfgpath }; ok(run_script(['-index', $inboxdir], $env), 'indexed'); test_psgi(sub { $www->call(@_) }, $client); require_mods(qw(Plack::Test::ExternalServer), 18); my $sock = tcp_server() or die; my ($out, $err) = map { "$inboxdir/std$_.log" } qw(out err); my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, $env, { 3 => $sock }); my ($h, $p) = ($sock->sockhost, $sock->sockport); local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; Plack::Test::ExternalServer::test_psgi(client => $client); } done_testing(); public-inbox-1.6.1/t/psgi_bad_mids.t000066400000000000000000000046131377346120300173400ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::TestCommon; my @mods = qw(DBD::SQLite HTTP::Request::Common Plack::Test URI::Escape Plack::Builder PublicInbox::WWW); require_git 2.6; require_mods(@mods); use_ok($_) for @mods; use_ok 'PublicInbox::WWW'; use_ok 'PublicInbox::V2Writable'; my ($inboxdir, $for_destroy) = tmpdir(); my $cfgpfx = "publicinbox.bad-mids"; my $ibx = { inboxdir => $inboxdir, name => 'bad-mids', version => 2, -primary_address => 'test@example.com', indexlevel => 'basic', }; $ibx = PublicInbox::Inbox->new($ibx); my $im = PublicInbox::V2Writable->new($ibx, 1); $im->{parallel} = 0; my $msgs = <<''; F1V5OR6NMF.3M649JTLO9IXD@tux.localdomain/hehe1"'/foo F1V5NB0PTU.3U0DCVGAJ750Z@tux&.ampersand F1V5MIHGCU.2ABINKW6WBE8N@tux.localdomain/raw F1V5LF9D9C.2QT5PGXZQ050E@tux.localdomain/t.atom F1V58X3CMU.2DCCVAKQZGADV@tux.localdomain/../../../../foo F1TVKINT3G.2S6I36MXMHYG6@tux.localdomain" onclick="alert(1)" my @mids = split(/\n/, $msgs); my $i = 0; foreach my $mid (@mids) { my $data = << ""; Subject: test Message-ID: <$mid> From: a\@example.com To: b\@example.com Date: Fri, 02 Oct 1993 00:00:0$i +0000 my $mime = PublicInbox::Eml->new(\$data); ok($im->add($mime), "added $mid"); $i++ } $im->done; my $cfg = <{-primary_address} $cfgpfx.inboxdir=$inboxdir EOF my $config = PublicInbox::Config->new(\$cfg); my $www = PublicInbox::WWW->new($config); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; my $res = $cb->(GET('/bad-mids/')); is($res->code, 200, 'got 200 OK listing'); my $raw = $res->content; foreach my $mid (@mids) { ok(index($raw, $mid) < 0, "escaped $mid"); } my (@xmids) = ($raw =~ m!\bhref="([^"]+)/t\.mbox\.gz"!sg); is(scalar(@xmids), scalar(@mids), 'got escaped links to all messages'); @xmids = reverse @xmids; my %uxs = ( gt => '>', lt => '<' ); foreach my $i (0..$#xmids) { my $uri = $xmids[$i]; $uri =~ s/&#([0-9]+);/sprintf("%c", $1)/sge; $uri =~ s/&(lt|gt);/$uxs{$1}/sge; $res = $cb->(GET("/bad-mids/$uri/raw")); is($res->code, 200, 'got 200 OK raw message '.$uri); like($res->content, qr/Message-ID: <\Q$mids[$i]\E>/s, 'retrieved correct message'); } }); done_testing(); 1; public-inbox-1.6.1/t/psgi_mount.t000066400000000000000000000062251377346120300167410ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); my $maindir = "$tmpdir/main.git"; my $addr = 'test-public@example.com'; my $cfgpfx = "publicinbox.test"; my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape Plack::Builder Plack::App::URLMap); require_mods(@mods); use_ok $_ foreach @mods; use_ok 'PublicInbox::WWW'; use PublicInbox::Import; use PublicInbox::Git; use PublicInbox::Config; my $config = PublicInbox::Config->new(\<new($maindir); my $im = PublicInbox::Import->new($git, 'test', $addr); $im->init_bare; { my $mime = PublicInbox::Eml->new(< To: You Cc: $addr Message-Id: Subject: hihi Date: Thu, 01 Jan 1970 00:00:00 +0000 zzzzzz EOF $im->add($mime); $im->done; } my $www = PublicInbox::WWW->new($config); my $app = builder(sub { enable('Head'); mount('/a' => builder(sub { sub { $www->call(@_) } })); mount('/b' => builder(sub { sub { $www->call(@_) } })); }); test_psgi($app, sub { my ($cb) = @_; my $res; # Atom feed: $res = $cb->(GET('/a/test/new.atom')); like($res->content, qr!\bhttp://[^/]+/a/test/!, 'URLs which exist in Atom feed are mount-aware'); unlike($res->content, qr!\b\Qhttp://[^/]+/test/\E!, 'No URLs which are not mount-aware'); $res = $cb->(GET('/a/test/new.html')); like($res->content, qr!git clone --mirror http://[^/]+/a/test\b!, 'clone URL in new.html is mount-aware'); $res = $cb->(GET('/a/test/blah%40example.com/')); is($res->code, 200, 'OK with URLMap mount'); like($res->content, qr!git clone --mirror http://[^/]+/a/test\b!, 'clone URL in /$INBOX/$MESSAGE_ID/ is mount-aware'); $res = $cb->(GET('/a/test/blah%40example.com/raw')); is($res->code, 200, 'OK with URLMap mount'); like($res->content, qr!^List-Archive: !m, 'List-Archive set in /raw mboxrd'); like($res->content, qr!^Archived-At: !m, 'Archived-At set in /raw mboxrd'); # redirects $res = $cb->(GET('/a/test/m/blah%40example.com.html')); is($res->header('Location'), 'http://localhost/a/test/blah@example.com/', 'redirect functions properly under mount'); $res = $cb->(GET('/test/blah%40example.com/')); is($res->code, 404, 'intentional 404 with URLMap mount'); }); SKIP: { require_mods(qw(DBD::SQLite Search::Xapian IO::Uncompress::Gunzip), 3); my $ibx = $config->lookup_name('test'); require_ok 'PublicInbox::SearchIdx'; PublicInbox::SearchIdx->new($ibx, 1)->index_sync; test_psgi($app, sub { my ($cb) = @_; my $res = $cb->(GET('/a/test/blah@example.com/t.mbox.gz')); my $gz = $res->content; my $raw; IO::Uncompress::Gunzip::gunzip(\$gz => \$raw); like($raw, qr!^List-Archive: !m, 'List-Archive set in /t.mbox.gz mboxrd'); like($raw, qr!^Archived-At:\x20 !mx, 'Archived-At set in /t.mbox.gz mboxrd'); }); } done_testing(); public-inbox-1.6.1/t/psgi_multipart_not.t000066400000000000000000000034231377346120300204750ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::TestCommon; require_git 2.6; my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test URI::Escape Plack::Builder Plack::Test); require_mods(@mods); use_ok($_) for (qw(HTTP::Request::Common Plack::Test)); use_ok 'PublicInbox::WWW'; use_ok 'PublicInbox::V2Writable'; my ($repo, $for_destroy) = tmpdir(); my $ibx = PublicInbox::Inbox->new({ inboxdir => $repo, name => 'multipart-not', version => 2, -primary_address => 'test@example.com', }); my $im = PublicInbox::V2Writable->new($ibx, 1); $im->{parallel} = 0; my $mime = PublicInbox::Eml->new(<<'EOF'); Message-Id: <200308111450.h7BEoOu20077@mail.osdl.org> To: linux-kernel@vger.kernel.org Subject: [OSDL] linux-2.6.0-test3 reaim results Mime-Version: 1.0 Content-Type: multipart/mixed ; boundary="==_Exmh_120757360" Date: Mon, 11 Aug 2003 07:50:24 -0700 From: exmh user Freed^Wmultipart ain't what it used to be EOF ok($im->add($mime), 'added broken multipart message'); $im->done; my $cfgpfx = "publicinbox.v2test"; my $cfg = <{-primary_address} $cfgpfx.inboxdir=$repo EOF my $config = PublicInbox::Config->new(\$cfg); my $www = PublicInbox::WWW->new($config); my ($res, $raw); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; for my $u ('/v2test/?q=%22ain\'t what it used to be%22&x=t', '/v2test/new.atom', '/v2test/new.html') { $res = $cb->(GET($u)); $raw = $res->content; ok(index($raw, 'Freed^Wmultipart') >= 0, $u); ok(index($raw, 'Warning: decoded text') >= 0, $u.' warns'); } }); done_testing(); 1; public-inbox-1.6.1/t/psgi_scan_all.t000066400000000000000000000033751377346120300173560ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::TestCommon; my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape DBD::SQLite); require_git 2.6; require_mods(@mods); use_ok 'PublicInbox::V2Writable'; foreach my $mod (@mods) { use_ok $mod; } my ($tmp, $for_destroy) = tmpdir(); my $cfg = ''; foreach my $i (1..2) { my $cfgpfx = "publicinbox.test-$i"; my $addr = "test-$i\@example.com"; my $inboxdir = "$tmp/$i"; $cfg .= "$cfgpfx.address=$addr\n"; $cfg .= "$cfgpfx.inboxdir=$inboxdir\n"; $cfg .= "$cfgpfx.url=http://example.com/$i\n"; my $opt = { inboxdir => $inboxdir, name => "test-$i", version => 2, indexlevel => 'basic', -primary_address => $addr, }; my $ibx = PublicInbox::Inbox->new($opt); my $im = PublicInbox::V2Writable->new($ibx, 1); $im->{parallel} = 0; $im->init_inbox(0); my $mime = PublicInbox::Eml->new(< Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF ok($im->add($mime), "added message to $i"); $im->done; } my $config = PublicInbox::Config->new(\$cfg); use_ok 'PublicInbox::WWW'; my $www = PublicInbox::WWW->new($config); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; foreach my $i (1..2) { foreach my $end ('', '/') { my $res = $cb->(GET("/a-mid-$i\@b$end")); is($res->code, 302, 'got 302'); is($res->header('Location'), "http://example.com/$i/a-mid-$i\@b/", "redirected OK to $i"); } } foreach my $x (qw(inv@lid inv@lid/ i/v/a l/i/d/)) { my $res = $cb->(GET("/$x")); is($res->code, 404, "404 on $x"); } }); done_testing(); public-inbox-1.6.1/t/psgi_search.t000066400000000000000000000122511377346120300170400ustar00rootroot00000000000000# Copyright (C) 2017-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use IO::Uncompress::Gunzip qw(gunzip); use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::Inbox; use PublicInbox::InboxWritable; use bytes (); # only for bytes::length use PublicInbox::TestCommon; my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); require_mods(@mods); use_ok($_) for (qw(HTTP::Request::Common Plack::Test)); use_ok 'PublicInbox::WWW'; use_ok 'PublicInbox::SearchIdx'; my ($tmpdir, $for_destroy) = tmpdir(); my $ibx = PublicInbox::Inbox->new({ inboxdir => $tmpdir, address => 'git@vger.kernel.org', name => 'test', }); $ibx = PublicInbox::InboxWritable->new($ibx); $ibx->init_inbox(1); my $im = $ibx->importer(0); my $digits = '10010260936330'; my $ua = 'Pine.LNX.4.10'; my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com"; # n.b. these headers are not properly RFC2047-encoded my $mime = PublicInbox::Eml->new(< From: Ævar Arnfjörð Bjarmason To: git\@vger.kernel.org EOF $im->add($mime); $im->add(PublicInbox::Eml->new(<<"")); Message-ID: From: replier In-Reply-To: <$mid> Subject: mismatch $mime = PublicInbox::Eml->new(<<'EOF'); Subject: Message-ID: From: blank subject To: git@vger.kernel.org EOF $im->add($mime); $mime = PublicInbox::Eml->new(<<'EOF'); Message-ID: From: no subject at all To: git@vger.kernel.org EOF $im->add($mime); $im->done; PublicInbox::SearchIdx->new($ibx, 1)->index_sync; my $cfgpfx = "publicinbox.test"; my $config = PublicInbox::Config->new(\<new($config); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; my $res; $res = $cb->(GET('/test/?q=%C3%86var')); my $html = $res->content; like($html, qr/Ævar - /, 'HTML escaped in title'); my @res = ($html =~ m/\?q=(.+var)\b/g); ok(scalar(@res), 'saw query strings'); my %uniq = map { $_ => 1 } @res; is(1, scalar keys %uniq, 'all query values identical in HTML'); is('%C3%86var', (keys %uniq)[0], 'matches original query'); ok(index($html, 'by Ævar Arnfjörð Bjarmason') >= 0, "displayed Ævar's name properly in HTML"); like($html, qr/download mbox\.gz: .*?"full threads"/s, '"full threads" download option shown'); my $warn = []; local $SIG{__WARN__} = sub { push @$warn, @_ }; $res = $cb->(GET('/test/?q=s:test&l=5e')); is($res->code, 200, 'successful search result'); is_deeply([], $warn, 'no warnings from non-numeric comparison'); $res = $cb->(POST('/test/?q=s:bogus&x=m')); is($res->code, 404, 'failed search result gives 404'); is_deeply([], $warn, 'no warnings'); my $mid_re = qr/\Q$mid\E/o; while (length($digits) > 8) { $res = $cb->(GET("/test/$ua.$digits/")); is($res->code, 300, 'partial match found while truncated'); like($res->content, qr/\b1 partial match found\b/); like($res->content, $mid_re, 'found mid in response'); chop($digits); } $res = $cb->(GET('/test/')); $html = $res->content; like($html, qr/\bhref="no-subject-at-all[^>]+>\(no subject\)</, 'subject-less message linked from "/$INBOX/"'); like($html, qr/\bhref="blank-subject[^>]+>\(no subject\)</, 'blank subject message linked from "/$INBOX/"'); like($html, qr/test Ævar/, "displayed Ævar's name properly in topic view"); $res = $cb->(GET('/test/?q=tc:git')); like($html, qr/\bhref="no-subject-at-all[^>]+>\(no subject\)</, 'subject-less message linked from "/$INBOX/?q=..."'); like($html, qr/\bhref="blank-subject[^>]+>\(no subject\)</, 'blank subject message linked from "/$INBOX/?q=..."'); $res = $cb->(GET('/test/no-subject-at-all@example.com/raw')); like($res->header('Content-Disposition'), qr/filename=no-subject\.txt/); $res = $cb->(GET('/test/no-subject-at-all@example.com/t.mbox.gz')); like($res->header('Content-Disposition'), qr/filename=no-subject\.mbox\.gz/); # "full threads" mbox.gz download $res = $cb->(POST('/test/?q=s:test&x=m&t')); is($res->code, 200, 'successful mbox download with threads'); gunzip(\($res->content) => \(my $before)); is_deeply([ "Message-ID: <$mid>\n", "Message-ID: <reply\@asdf>\n" ], [ grep(/^Message-ID:/m, split(/^/m, $before)) ], 'got full thread'); # clobber has_threadid to emulate old versions: { my $sidx = PublicInbox::SearchIdx->new($ibx, 0); my $xdb = $sidx->idx_acquire; $xdb->set_metadata('has_threadid', '0'); $sidx->idx_release; } $config->each_inbox(sub { delete $_[0]->{search} }); $res = $cb->(GET('/test/?q=s:test')); is($res->code, 200, 'successful search w/o has_threadid'); unlike($html, qr/download mbox\.gz: .*?"full threads"/s, '"full threads" download option not shown w/o has_threadid'); # in case somebody uses curl to bypass <form> $res = $cb->(POST('/test/?q=s:test&x=m&t')); is($res->code, 200, 'successful mbox download w/ threads'); gunzip(\($res->content) => \(my $after)); isnt($before, $after); }); done_testing(); �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������public-inbox-1.6.1/t/psgi_text.t��������������������������������������������������������������������0000664�0000000�0000000�00000004036�13773461203�0016561�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); my $maindir = "$tmpdir/main.git"; my $addr = 'test-public@example.com'; my $cfgpfx = "publicinbox.test"; my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); require_mods(@mods, 'IO::Uncompress::Gunzip'); use_ok $_ foreach @mods; use PublicInbox::Import; use PublicInbox::Git; use PublicInbox::Config; use_ok 'PublicInbox::WWW'; use_ok 'PublicInbox::WwwText'; my $config = PublicInbox::Config->new(\<<EOF); $cfgpfx.address=$addr $cfgpfx.inboxdir=$maindir EOF PublicInbox::Import::init_bare($maindir); my $www = PublicInbox::WWW->new($config); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; my $gunzipped; my $req = GET('/test/_/text/help/'); my $res = $cb->($req); my $content = $res->content; like($content, qr!<title>public-inbox help.*!, 'default help'); $req->header('Accept-Encoding' => 'gzip'); $res = $cb->($req); is($res->header('Content-Encoding'), 'gzip', 'got gzip encoding'); is($res->header('Content-Type'), 'text/html; charset=UTF-8', 'got gzipped HTML'); IO::Uncompress::Gunzip::gunzip(\($res->content) => \$gunzipped); is($gunzipped, $content, 'gzipped content is correct'); $req = GET('/test/_/text/config/raw'); $res = $cb->($req); $content = $res->content; my $olen = $res->header('Content-Length'); my $f = "$tmpdir/cfg"; open my $fh, '>', $f or die; print $fh $content or die; close $fh or die; my $cfg = PublicInbox::Config->new($f); is($cfg->{"$cfgpfx.address"}, $addr, 'got expected address in config'); $req->header('Accept-Encoding' => 'gzip'); $res = $cb->($req); is($res->header('Content-Encoding'), 'gzip', 'got gzip encoding'); ok($res->header('Content-Length') < $olen, 'gzipped help is smaller'); IO::Uncompress::Gunzip::gunzip(\($res->content) => \$gunzipped); is($gunzipped, $content); }); done_testing(); public-inbox-1.6.1/t/psgi_v2-new.eml000066400000000000000000000004121377346120300172170ustar00rootroot00000000000000From: root@z Message-ID: Subject: hi MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain blah --b Content-Type: text/plain Content-Disposition: inline; filename="attach.txt" new --b-- public-inbox-1.6.1/t/psgi_v2-old.eml000066400000000000000000000004121377346120300172040ustar00rootroot00000000000000From: root@z Message-ID: Subject: hi MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain blah --b Content-Type: text/plain Content-Disposition: inline; filename="attach.txt" old --b-- public-inbox-1.6.1/t/psgi_v2.t000066400000000000000000000250011377346120300161170ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; require_git(2.6); use PublicInbox::Eml; use PublicInbox::Config; use PublicInbox::MID qw(mids); require_mods(qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test URI::Escape Plack::Builder)); use_ok($_) for (qw(HTTP::Request::Common Plack::Test)); use_ok 'PublicInbox::WWW'; use_ok 'PublicInbox::V2Writable'; my ($inboxdir, $for_destroy) = tmpdir(); my $cfgpath = "$inboxdir/$$.config"; SKIP: { require_mods(qw(Plack::Test::ExternalServer), 1); open my $fh, '>', $cfgpath or BAIL_OUT $!; print $fh < $cfgpath }; my $sock = tcp_server() or die; my ($out, $err) = map { "$inboxdir/std$_.log" } qw(out err); my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, $env, { 3 => $sock }); my ($h, $p) = ($sock->sockhost, $sock->sockport); local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; Plack::Test::ExternalServer::test_psgi(client => $client); $td->join('TERM'); open my $fh, '<', $err or BAIL_OUT $!; my $e = do { local $/; <$fh> }; if ($e =~ s/^Plack::Middleware::ReverseProxy missing,\n//gms) { $e =~ s/^URL generation for redirects .*\n//gms; } is($e, '', 'no errors'); } }; my $ibx = { inboxdir => $inboxdir, name => 'test-v2writable', version => 2, -primary_address => 'test@example.com', }; $ibx = PublicInbox::Inbox->new($ibx); my $new_mid; my $im = PublicInbox::V2Writable->new($ibx, 1); $im->{parallel} = 0; my $mime = PublicInbox::Eml->new(<<'EOF'); From oldbug-pre-a0c07cba0e5d8b6a Fri Oct 2 00:00:00 1993 From: a@example.com To: test@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF ok($im->add($mime), 'added one message'); $mime->body_set("hello world!\n"); my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; $mime->header_set(Date => 'Fri, 02 Oct 1993 00:01:00 +0000'); ok($im->add($mime), 'added duplicate-but-different message'); is(scalar(@warn), 1, 'got one warning'); my $mids = mids($mime->header_obj); $new_mid = $mids->[1]; $im->done; my $msg = $ibx->msg_by_mid('a-mid@b'); like($$msg, qr/\AFrom oldbug/s, '"From_" line stored to test old bug workaround'); my $cfgpfx = "publicinbox.v2test"; my $cfg = <{-primary_address} $cfgpfx.inboxdir=$inboxdir EOF my $config = PublicInbox::Config->new(\$cfg); my $www = PublicInbox::WWW->new($config); my ($res, $raw, @from_); my $client0 = sub { my ($cb) = @_; $res = $cb->(GET('/v2test/description')); like($res->content, qr!\$INBOX_DIR/description missing!, 'got v2 description missing message'); $res = $cb->(GET('/v2test/a-mid@b/raw')); $raw = $res->content; unlike($raw, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($raw, qr/^hello world$/m, 'got first message'); like($raw, qr/^hello world!$/m, 'got second message'); @from_ = ($raw =~ m/^From /mg); is(scalar(@from_), 2, 'two From_ lines'); $res = $cb->(GET("/v2test/$new_mid/raw")); $raw = $res->content; like($raw, qr/^hello world!$/m, 'second message with new Message-Id'); @from_ = ($raw =~ m/^From /mg); is(scalar(@from_), 1, 'only one From_ line'); # Atom feed should sort by Date: (if Received is missing) $res = $cb->(GET('/v2test/new.atom')); my @bodies = ($res->content =~ />(hello [^<]+)(GET('/v2test/new.html')); @bodies = ($res->content =~ /^(hello [^<]+)$/mg); is_deeply(\@bodies, [ "hello world!\n", "hello world\n" ], 'new.html ordering is chronological'); }; test_psgi(sub { $www->call(@_) }, $client0); $run_httpd->($client0, 9); $mime->header_set('Message-Id', 'a-mid@b'); $mime->body_set("hello ghosts\n"); ok($im->add($mime), 'added 3rd duplicate-but-different message'); is(scalar(@warn), 2, 'got another warning'); like($warn[0], qr/mismatched/, 'warned about mismatched messages'); is($warn[0], $warn[1], 'both warnings are the same'); $mids = mids($mime->header_obj); my $third = $mids->[-1]; $im->done; my $client1 = sub { my ($cb) = @_; $res = $cb->(GET("/v2test/$third/raw")); $raw = $res->content; like($raw, qr/^hello ghosts$/m, 'got third message'); @from_ = ($raw =~ m/^From /mg); is(scalar(@from_), 1, 'one From_ line'); $res = $cb->(GET('/v2test/a-mid@b/raw')); $raw = $res->content; like($raw, qr/^hello world$/m, 'got first message'); like($raw, qr/^hello world!$/m, 'got second message'); like($raw, qr/^hello ghosts$/m, 'got third message'); @from_ = ($raw =~ m/^From /mg); is(scalar(@from_), 3, 'three From_ lines'); $config->each_inbox(sub { $_[0]->search->reopen }); SKIP: { eval { require IO::Uncompress::Gunzip }; skip 'IO::Uncompress::Gunzip missing', 6 if $@; my ($in, $out, $status); my $req = GET('/v2test/a-mid@b/raw'); $req->header('Accept-Encoding' => 'gzip'); $res = $cb->($req); is($res->header('Content-Encoding'), 'gzip', 'gzip encoding'); $in = $res->content; IO::Uncompress::Gunzip::gunzip(\$in => \$out); is($out, $raw, 'gzip response matches'); $res = $cb->(GET('/v2test/a-mid@b/t.mbox.gz')); $in = $res->content; $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in t.mbox.gz'); like($out, qr/^hello world!$/m, 'got second in t.mbox.gz'); like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz'); @from_ = ($out =~ m/^From /mg); is(scalar(@from_), 3, 'three From_ lines in t.mbox.gz'); # search interface $res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m')); $in = $res->content; $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in mbox POST'); like($out, qr/^hello world!$/m, 'got second in mbox POST'); like($out, qr/^hello ghosts$/m, 'got third in mbox POST'); @from_ = ($out =~ m/^From /mg); is(scalar(@from_), 3, 'three From_ lines in mbox POST'); # all.mbox.gz interface $res = $cb->(GET('/v2test/all.mbox.gz')); $in = $res->content; $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out); unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($out, qr/^hello world$/m, 'got first in all.mbox'); like($out, qr/^hello world!$/m, 'got second in all.mbox'); like($out, qr/^hello ghosts$/m, 'got third in all.mbox'); @from_ = ($out =~ m/^From /mg); is(scalar(@from_), 3, 'three From_ lines in all.mbox'); }; $res = $cb->(GET('/v2test/?q=m:a-mid@b&x=t')); is($res->code, 200, 'success with threaded search'); my $raw = $res->content; ok($raw =~ s/\A.*>Results 1-3 of 3\b//s, 'got all results'); my @over = ($raw =~ m/\d{4}-\d+-\d+\s+\d+:\d+ +(?:\d+\% )?(.+)$/gm); is_deeply(\@over, [ '(GET('/v2test/?q=m:a-mid@b&x=A')); is($res->code, 200, 'success with Atom search'); SKIP: { require_mods(qw(XML::TreePP), 2); my $t = XML::TreePP->new->parse($res->content); like($t->{feed}->{-xmlns}, qr/\bAtom\b/, 'looks like an an Atom feed'); is(scalar @{$t->{feed}->{entry}}, 3, 'parsed three entries'); }; local $SIG{__WARN__} = 'DEFAULT'; $res = $cb->(GET('/v2test/a-mid@b/')); $raw = $res->content; like($raw, qr/^hello world$/m, 'got first message'); like($raw, qr/^hello world!$/m, 'got second message'); like($raw, qr/^hello ghosts$/m, 'got third message'); @from_ = ($raw =~ m/>From: /mg); is(scalar(@from_), 3, 'three From: lines'); foreach my $mid ('a-mid@b', $new_mid, $third) { like($raw, qr!>\Q$mid\E!s, "Message-ID $mid shown"); } like($raw, qr/\b3\+ messages\b/, 'thread overview shown'); }; test_psgi(sub { $www->call(@_) }, $client1); $run_httpd->($client1, 38); { my $exp = [ qw( ) ]; $mime->header_set('Message-Id', @$exp); $mime->header_set('Subject', '4th dupe'); local $SIG{__WARN__} = sub {}; ok($im->add($mime), 'added one message'); $im->done; my @h = $mime->header('Message-ID'); is_deeply($exp, \@h, 'reused existing Message-ID'); $config->each_inbox(sub { $_[0]->search->reopen }); } my $client2 = sub { my ($cb) = @_; my $res = $cb->(GET('/v2test/new.atom')); my @ids = ($res->content =~ m!urn:uuid:([^<]+)!sg); my %ids; $ids{$_}++ for @ids; is_deeply([qw(1 1 1 1)], [values %ids], 'feed ids unique'); $res = $cb->(GET('/v2test/reuse@mid/T/')); $raw = $res->content; like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /T/'); my @over = ($raw =~ m/^\d{4}-\d+-\d+\s+\d+:\d+ (.+)$/gm); is_deeply(\@over, [ '(GET('/v2test/reuse@mid/t/')); $raw = $res->content; like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /t/'); $res = $cb->(GET('/v2test/0/info/refs')); is($res->code, 200, 'got info refs for dumb clones'); $res = $cb->(GET('/v2test/0.git/info/refs')); is($res->code, 200, 'got info refs for dumb clones w/ .git suffix'); $res = $cb->(GET('/v2test/info/refs')); is($res->code, 404, 'v2 git URL w/o shard fails'); }; test_psgi(sub { $www->call(@_) }, $client2); $run_httpd->($client2, 8); { # ensure conflicted attachments can be resolved foreach my $body (qw(old new)) { $mime = eml_load "t/psgi_v2-$body.eml"; ok($im->add($mime), "added attachment $body"); } $im->done; $config->each_inbox(sub { $_[0]->search->reopen }); } my $client3 = sub { my ($cb) = @_; my $res = $cb->(GET('/v2test/a@dup/')); my @links = ($res->content =~ m!"\.\./([^/]+/2-attach\.txt)\"!g); is(scalar(@links), 2, 'both attachment links exist'); isnt($links[0], $links[1], 'attachment links are different'); { my $old = $cb->(GET('/v2test/' . $links[0])); my $new = $cb->(GET('/v2test/' . $links[1])); is($old->content, 'old', 'got expected old content'); is($new->content, 'new', 'got expected new content'); } $res = $cb->(GET('/v2test/?t=1970'.'01'.'01'.'000000')); is($res->code, 404, '404 for out-of-range t= param'); @warn = (); $res = $cb->(GET('/v2test/?t=1970'.'01'.'01')); is_deeply(\@warn, [], 'no warnings on YYYYMMDD only'); }; test_psgi(sub { $www->call(@_) }, $client3); $run_httpd->($client3, 4); done_testing(); 1; public-inbox-1.6.1/t/purge.t000066400000000000000000000051341377346120300156750ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite)); use Cwd qw(abs_path); # we need this since we chdir below local $ENV{HOME} = abs_path('t'); my $purge = abs_path('blib/script/public-inbox-purge'); my ($tmpdir, $for_destroy) = tmpdir(); use_ok 'PublicInbox::V2Writable'; my $inboxdir = "$tmpdir/v2"; my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'test-v2purge', version => 2, -primary_address => 'test@example.com', indexlevel => 'basic', }); my $raw = <<'EOF'; From: a@example.com To: test@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 Hello World EOF my $cfgfile = "$tmpdir/config"; local $ENV{PI_CONFIG} = $cfgfile; open my $cfg_fh, '>', $cfgfile or die "open: $!"; my $v2w = PublicInbox::V2Writable->new($ibx, {nproc => 1}); my $mime = PublicInbox::Eml->new($raw); ok($v2w->add($mime), 'add message to be purged'); $v2w->done; # failing cases, first: my $in = "$raw\nMOAR\n"; my ($out, $err) = ('', ''); my $opt = { 0 => \$in, 1 => \$out, 2 => \$err }; ok(run_script([$purge, '-f', $inboxdir], undef, $opt), 'purge -f OK'); $out = $err = ''; ok(!run_script([$purge, $inboxdir], undef, $opt), 'mismatch fails without -f'); is($? >> 8, 1, 'missed purge exits with 1'); # a successful case: $opt->{0} = \$raw; ok(run_script([$purge, $inboxdir], undef, $opt), 'match OK'); like($out, qr/\b[a-f0-9]{40,}/m, 'removed commit noted'); # add (old) vger filter to config file print $cfg_fh <add($mime), 'add vger-signatured message to be purged'); $v2w->done; my $pre_scrub = $raw . <<'EOF'; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ EOF $out = $err = ''; ok(chdir('/'), "chdir / OK for --all test"); $opt->{0} = \$pre_scrub; ok(run_script([$purge, '--all'], undef, $opt), 'scrub purge OK'); like($out, qr/\b[a-f0-9]{40,}/m, 'removed commit noted'); # diag "out: $out"; diag "err: $err"; $out = $err = ''; ok(!run_script([$purge, '--all' ], undef, $opt), 'scrub purge not idempotent without -f'); # diag "out: $out"; diag "err: $err"; done_testing(); public-inbox-1.6.1/t/qspawn.t000066400000000000000000000037541377346120300160720ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use_ok 'PublicInbox::Qspawn'; { my $cmd = [qw(sh -c), 'echo >&2 err; echo out']; my $qsp = PublicInbox::Qspawn->new($cmd, {}, { 2 => 1 }); my $res; $qsp->psgi_qx({}, undef, sub { $res = ${$_[0]} }); is($res, "err\nout\n", 'captured stderr and stdout'); $res = undef; $qsp = PublicInbox::Qspawn->new($cmd, {}, { 2 => \*STDOUT }); $qsp->psgi_qx({}, undef, sub { $res = ${$_[0]} }); is($res, "err\nout\n", 'captured stderr and stdout'); } sub finish_err ($) { my ($qsp) = @_; $qsp->finish; $qsp->{err}; } my $limiter = PublicInbox::Qspawn::Limiter->new(1); { my $x = PublicInbox::Qspawn->new([qw(true)]); my $run = 0; $x->start($limiter, sub { my ($self) = @_; is(0, sysread($self->{rpipe}, my $buf, 1), 'read zero bytes'); ok(!finish_err($self), 'no error on finish'); $run = 1; }); is($run, 1, 'callback ran alright'); } { my $x = PublicInbox::Qspawn->new([qw(false)]); my $run = 0; $x->start($limiter, sub { my ($self) = @_; is(0, sysread($self->{rpipe}, my $buf, 1), 'read zero bytes from false'); ok(finish_err($self), 'error on finish'); $run = 1; }); is($run, 1, 'callback ran alright'); } foreach my $cmd ([qw(sleep 1)], [qw(sh -c), 'sleep 1; false']) { my $s = PublicInbox::Qspawn->new($cmd); my @run; $s->start($limiter, sub { my ($self) = @_; push @run, 'sleep'; is(0, sysread($self->{rpipe}, my $buf, 1), 'read zero bytes'); }); my $n = 0; my @t = map { my $i = $n++; my $x = PublicInbox::Qspawn->new([qw(true)]); $x->start($limiter, sub { my ($self) = @_; push @run, $i; }); [$x, $i] } (0..2); if ($cmd->[-1] =~ /false\z/) { ok(finish_err($s), 'got error on false after sleep'); } else { ok(!finish_err($s), 'no error on sleep'); } ok(!finish_err($_->[0]), "true $_->[1] succeeded") foreach @t; is_deeply([qw(sleep 0 1 2)], \@run, 'ran in order'); } done_testing(); 1; public-inbox-1.6.1/t/replace.t000066400000000000000000000141561377346120300161720ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::InboxWritable; use PublicInbox::TestCommon; use Cwd qw(abs_path); require_git(2.6); # replace is v2 only, for now... require_mods(qw(DBD::SQLite)); local $ENV{HOME} = abs_path('t'); sub test_replace ($$$) { my ($v, $level, $opt) = @_; diag "v$v $level replace"; my $this = "pi-$v-$level-replace"; my ($tmpdir, $for_destroy) = tmpdir($this); my $ibx = PublicInbox::Inbox->new({ inboxdir => "$tmpdir/testbox", name => $this, version => $v, -primary_address => 'test@example.com', indexlevel => $level, }); my $orig = PublicInbox::Eml->new(<<'EOF'); From: Barbra Streisand To: test@example.com Subject: confidential Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 Top secret info about my house in Malibu... EOF my $im = PublicInbox::InboxWritable->new($ibx, {nproc=>1})->importer(0); # fake a bunch of epochs $im->{rotate_bytes} = $opt->{rotate_bytes} if $opt->{rotate_bytes}; if ($opt->{pre}) { $opt->{pre}->($im, 1, 2); $orig->header_set('References', '<1@example.com>'); } ok($im->add($orig), 'add message to be replaced'); if ($opt->{post}) { $opt->{post}->($im, 3, { 4 => 'replace@example.com' }); } $im->done; my $thread_a = $ibx->over->get_thread('replace@example.com'); my %before = map {; delete($_->{blob}) => $_ } @{$ibx->recent}; my $reject = PublicInbox::Eml->new($orig->as_string); foreach my $mid (['', ''], [], ['']) { $reject->header_set('Message-ID', @$mid); my $ok = eval { $im->replace($orig, $reject) }; like($@, qr/Message-ID.*may not be changed/, '->replace died on Message-ID change'); ok(!$ok, 'no replacement happened'); } # prepare the replacement my $expect = "Move along, nothing to see here\n"; my $repl = PublicInbox::Eml->new($orig->as_string); $repl->header_set('From', ''); $repl->header_set('Subject', 'redacted'); $repl->header_set('Date', 'Sat, 02 Oct 2010 00:00:00 +0000'); $repl->body_str_set($expect); my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; ok(my $cmts = $im->replace($orig, $repl), 'replaced message'); my $changed_epochs = 0; for my $tip (@$cmts) { next if !defined $tip; $changed_epochs++; like($tip, qr/\A[a-f0-9]{40,}\z/, 'replace returned current commit'); } is($changed_epochs, 1, 'only one epoch changed'); $im->done; my $m = PublicInbox::Eml->new($ibx->msg_by_mid('replace@example.com')); is($m->body, $expect, 'replaced message'); is_deeply(\@warn, [], 'no warnings on noop'); my @cat = qw(cat-file --buffer --batch --batch-all-objects); my $git = $ibx->git; my @all = $git->qx(@cat); is_deeply([grep(/confidential/, @all)], [], 'nothing confidential'); is_deeply([grep(/Streisand/, @all)], [], 'Streisand who?'); is_deeply([grep(/\bOct 1993\b/, @all)], [], 'nothing from Oct 1993'); my $t19931002 = qr/ 749520000 /; is_deeply([grep(/$t19931002/, @all)], [], "nothing matches $t19931002"); for my $dir (glob("$ibx->{inboxdir}/git/*.git")) { my ($bn) = ($dir =~ m!([^/]+)\z!); is(xsys(qw(git --git-dir), $dir, qw(fsck --strict --no-progress)), 0, "git fsck is clean in epoch $bn"); } my $thread_b = $ibx->over->get_thread('replace@example.com'); is_deeply([sort map { $_->{mid} } @$thread_b], [sort map { $_->{mid} } @$thread_a], 'threading preserved'); if (my $srch = $ibx->search) { for my $q ('f:streisand', 's:confidential', 'malibu') { my $mset = $srch->mset($q); is($mset->size, 0, "no match for $q"); } my @ok = ('f:redactor', 's:redacted', 'nothing to see'); if ($opt->{pre}) { push @ok, 'm:1@example.com', 'm:2@example.com', 's:message2', 's:message1'; } if ($opt->{post}) { push @ok, 'm:3@example.com', 'm:4@example.com', 's:message3', 's:message4'; } for my $q (@ok) { my $mset = $srch->mset($q); ok($mset->size, "got match for $q"); } } # check overview matches: my %after = map {; delete($_->{blob}) => $_ } @{$ibx->recent}; my @before_blobs = keys %before; foreach my $blob (@before_blobs) { delete $before{$blob} if delete $after{$blob}; } is(scalar keys %before, 1, 'one unique blob from before left'); is(scalar keys %after, 1, 'one unique blob from after left'); foreach my $blob (keys %before) { is($git->check($blob), undef, 'old blob not found'); my $smsg = $before{$blob}; is($smsg->{subject}, 'confidential', 'before subject'); is($smsg->{mid}, 'replace@example.com', 'before MID'); } foreach my $blob (keys %after) { ok($git->check($blob), 'new blob found'); my $smsg = $after{$blob}; is($smsg->{subject}, 'redacted', 'after subject'); is($smsg->{mid}, 'replace@example.com', 'before MID'); } # $git->cleanup; # needed if $im->{parallel}; @warn = (); is($im->replace($orig, $repl), undef, 'no-op replace returns undef'); is($im->purge($orig), undef, 'no-op purge returns undef'); is_deeply(\@warn, [], 'no warnings on noop'); # $im->done; # needed if $im->{parallel} } sub pad_msgs { my ($im, @range) = @_; for my $i (@range) { my $irt; if (ref($i) eq 'HASH') { ($i, $irt) = each %$i; } my $sec = sprintf('%0d', $i); my $mime = PublicInbox::Eml->new(< Date: Fri, 02, Jan 1970 00:00:$sec +0000 Subject: message$i message number$i EOF if (defined($irt)) { $mime->header_set('References', "<$irt>"); } $im->add($mime); } } my $opt = { pre => \&pad_msgs }; test_replace(2, 'basic', {}); test_replace(2, 'basic', $opt); test_replace(2, 'basic', $opt = { %$opt, post => \&pad_msgs }); test_replace(2, 'basic', $opt = { %$opt, rotate_bytes => 1 }); SKIP: { require PublicInbox::Search; PublicInbox::Search::load_xapian() or skip 'Search::Xapian missing', 8; for my $l (qw(medium)) { test_replace(2, $l, {}); $opt = { pre => \&pad_msgs }; test_replace(2, $l, $opt); test_replace(2, $l, $opt = { %$opt, post => \&pad_msgs }); test_replace(2, $l, $opt = { %$opt, rotate_bytes => 1 }); } }; done_testing(); public-inbox-1.6.1/t/reply.t000066400000000000000000000045431377346120300157110ustar00rootroot00000000000000# Copyright (C) 2017-2020 all contributors # License: AGPL-3+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use_ok 'PublicInbox::Reply'; my @q = ( 'foo@bar', 'foo@bar', 'a b', "'a b'", "a'b", "'a'\\''b'", ); while (@q) { my $input = shift @q; my $expect = shift @q; my $res = PublicInbox::Reply::squote_maybe($input); is($res, $expect, "quote $input => $res"); } my $mime = PublicInbox::Eml->new(<<'EOF'); From: from To: to Cc: cc@example.com Message-Id: Subject: hihi EOF my $hdr = $mime->header_obj; my $ibx = { -primary_address => 'primary@example.com' }; my ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); my $exp = [ '--in-reply-to=blah@example.com', '--to=from@example.com', '--cc=cc@example.com', '--cc=to@example.com' ]; is_deeply($arg, $exp, 'default reply is to :all'); $ibx->{replyto} = ':all'; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); is_deeply($arg, $exp, '":all" also works'); $exp = [ '--in-reply-to=blah@example.com', '--to=primary@example.com' ]; $ibx->{replyto} = ':list'; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); is_deeply($arg, $exp, '":list" works for centralized lists'); $exp = [ '--in-reply-to=blah@example.com', '--to=primary@example.com', '--cc=cc@example.com', '--cc=to@example.com' ]; $ibx->{replyto} = ':list,Cc,To'; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); is_deeply($arg, $exp, '":list,Cc,To" works for kinda centralized lists'); $ibx->{replyto} = 'new@example.com'; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); $exp = [ '--in-reply-to=blah@example.com', '--to=new@example.com' ]; is_deeply($arg, $exp, 'explicit address works, too'); $ibx->{replyto} = ':all'; $ibx->{obfuscate} = 1; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); $exp = [ '--in-reply-to=blah@example.com', '--to=from@example$(echo .)com', '--cc=cc@example$(echo .)com', '--cc=to@example$(echo .)com' ]; is_deeply($arg, $exp, 'address obfuscation works'); is($link, '', 'no mailto: link given'); $ibx->{replyto} = ':none=dead list'; $ibx->{obfuscate} = 1; ($arg, $link) = PublicInbox::Reply::mailto_arg_link($ibx, $hdr); is($$arg, 'dead list', ':none= works'); done_testing(); public-inbox-1.6.1/t/run.perl000077500000000000000000000134411377346120300160610ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # # Parallel test runner which preloads code and reuses worker processes # to give a nice speedup over prove(1). It also generates per-test # .log files (similar to automake tests). # # *.t files run by this should not rely on global state. # # Usage: $PERL -I lib -w t/run.perl -j4 # Or via prove(1): prove -lvw t/run.perl :: -j4 use strict; use v5.10.1; use IO::Handle; # ->autoflush use PublicInbox::TestCommon; use Cwd qw(getcwd); use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use Errno qw(EINTR); use Fcntl qw(:seek); use POSIX qw(_POSIX_PIPE_BUF WNOHANG); my $jobs = 1; my $repeat = 1; $| = 1; our $log_suffix = '.log'; my ($shuffle, %pids, @err); GetOptions('j|jobs=i' => \$jobs, 'repeat=i' => \$repeat, 'log=s' => \$log_suffix, 's|shuffle' => \$shuffle, ) or die "Usage: $0 [-j JOBS] [--log=SUFFIX] [--repeat RUNS]"; if (($ENV{TEST_RUN_MODE} // 2) == 0) { die "$0 is not compatible with TEST_RUN_MODE=0\n"; } my @tests = scalar(@ARGV) ? @ARGV : glob('t/*.t'); my $cwd = getcwd(); open my $OLDOUT, '>&STDOUT' or die "dup STDOUT: $!"; open my $OLDERR, '>&STDERR' or die "dup STDERR: $!"; $OLDOUT->autoflush(1); $OLDERR->autoflush(1); key2sub($_) for @tests; # precache if ($shuffle) { require List::Util; } elsif (open(my $prove_state, '<', '.prove') && eval { require YAML::XS }) { # reuse "prove --state=save" data to start slowest tests, first my $state = YAML::XS::Load(do { local $/; <$prove_state> }); my $t = $state->{tests}; @tests = sort { ($t->{$b}->{elapsed} // 0) <=> ($t->{$a}->{elapsed} // 0) } @tests; } our $tb = Test::More->builder; sub DIE (;$) { print $OLDERR @_; exit(1); } our ($worker, $worker_test); sub test_status () { $? = 255 if $? == 0 && !$tb->is_passing; my $status = $? ? 'not ok' : 'ok'; chdir($cwd) or DIE "chdir($cwd): $!"; if ($log_suffix ne '') { my $log = $worker_test; $log =~ s/\.t\z/$log_suffix/; my $skip = ''; if (open my $fh, '<', $log) { my @not_ok = grep(!/^(?:ok |[ \t]*#)/ms, <$fh>); pop @not_ok if $not_ok[-1] =~ /^[0-9]+\.\.[0-9]+$/; my $pfx = "# $log: "; print $OLDERR map { $pfx.$_ } @not_ok; seek($fh, 0, SEEK_SET) or die "seek: $!"; # show unique skip texts and the number of times # each text was skipped local $/; my @sk = (<$fh> =~ m/^ok [0-9]+ (# skip [^\n]+)/mgs); if (@sk) { my %nr; my @err = grep { !$nr{$_}++ } @sk; print $OLDERR "$pfx$_ ($nr{$_})\n" for @err; $skip = ' # total skipped: '.scalar(@sk); } } else { print $OLDERR "could not open: $log: $!\n"; } print $OLDOUT "$status $worker_test$skip\n"; } } # Test::Builder or Test2::Hub may call exit() from plan(skip_all => ...) END { test_status() if (defined($worker_test) && $worker == $$) } sub run_test ($) { my ($test) = @_; my $log_fh; if ($log_suffix ne '') { my $log = $test; $log =~ s/\.[^\.]+\z/$log_suffix/ or DIE "can't log for $test"; open $log_fh, '>', $log or DIE "open $log: $!"; $log_fh->autoflush(1); $tb->output($log_fh); $tb->failure_output($log_fh); $tb->todo_output($log_fh); open STDOUT, '>&', $log_fh or DIE "1>$log: $!"; open STDERR, '>&', $log_fh or DIE "2>$log: $!"; } $worker_test = $test; run_script([$test]); test_status(); $worker_test = undef; push @err, "$test ($?)" if $?; } sub UINT_SIZE () { 4 } # worker processes will SIGUSR1 the producer process when it # sees EOF on the pipe. On FreeBSD 11.2 and Perl 5.30.0, # sys/ioctl.ph gives the wrong value for FIONREAD(). my $producer = $$; my $eof; # we stop respawning if true my $start_worker = sub { my ($i, $j, $rd, $todo) = @_; defined(my $pid = fork) or DIE "fork: $!"; if ($pid == 0) { $worker = $$; while (1) { my $r = sysread($rd, my $buf, UINT_SIZE); if (!defined($r)) { next if $! == EINTR; DIE "sysread: $!"; } last if $r == 0; DIE "short read $r" if $r != UINT_SIZE; my $t = unpack('I', $buf); run_test($todo->[$t]); $tb->reset; } kill 'USR1', $producer if !$eof; # sets $eof in $producer DIE join('', map { "E: $_\n" } @err) if @err; exit(0); } else { $pids{$pid} = $j; } }; # negative $repeat means loop forever: for (my $i = $repeat; $i != 0; $i--) { my @todo = $shuffle ? List::Util::shuffle(@tests) : @tests; # single-producer, multi-consumer queue relying on POSIX semantics pipe(my ($rd, $wr)) or DIE "pipe: $!"; # fill the queue before forking so children can start earlier my $n = (_POSIX_PIPE_BUF / UINT_SIZE); if ($n >= $#todo) { print $wr join('', map { pack('I', $_) } (0..$#todo)) or DIE; close $wr or die; $wr = undef; } else { # write what we can... $wr->autoflush(1); print $wr join('', map { pack('I', $_) } (0..$n)) or DIE; $n += 1; # and send more ($n..$#todo), later } $eof = undef; local $SIG{USR1} = sub { $eof = 1 }; my $sigchld = sub { my ($sig) = @_; my $flags = $sig ? WNOHANG : 0; while (1) { my $pid = waitpid(-1, $flags) or return; return if $pid < 0; my $j = delete $pids{$pid}; if (!defined($j)) { push @err, "reaped unknown $pid ($?)"; next; } push @err, "job[$j] ($?)" if $?; # skip_all can exit(0), respawn if needed: if (!$eof) { print $OLDERR "# respawning job[$j]\n"; $start_worker->($i, $j, $rd, \@todo); } } }; # start the workers to consume the queue for (my $j = 0; $j < $jobs; $j++) { $start_worker->($i, $j, $rd, \@todo); } if ($wr) { local $SIG{CHLD} = $sigchld; # too many tests to fit in the pipe before starting workers, # send the rest now the workers are running print $wr join('', map { pack('I', $_) } ($n..$#todo)) or DIE; close $wr or die; } $sigchld->(0) while scalar(keys(%pids)); DIE join('', map { "E: $_\n" } @err) if @err; } print $OLDOUT "1..".($repeat * scalar(@tests))."\n" if $repeat >= 0; public-inbox-1.6.1/t/search-amsg.eml000066400000000000000000000010571377346120300172570ustar00rootroot00000000000000Subject: see attachment Message-ID: From: "John Smith" To: list@example.com MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: multipart/mixed; boundary="b" --b Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="attached_fart.txt" inside the attachment= --b Content-Type: text/plain; charset="US-ASCII" Content-Disposition: attachment; filename="part_deux.txt" Content-Transfer-Encoding: quoted-printable inside another= --b-- public-inbox-1.6.1/t/search-thr-index.t000066400000000000000000000054601377346120300177220ustar00rootroot00000000000000# Copyright (C) 2017-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use bytes (); # only for bytes::length use Test::More; use PublicInbox::TestCommon; use PublicInbox::MID qw(mids); use PublicInbox::Eml; require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::SearchIdx; require PublicInbox::Smsg; require PublicInbox::Inbox; use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; PublicInbox::Import::init_bare($git_dir); my $ibx = PublicInbox::Inbox->new({inboxdir => $git_dir}); my $rw = PublicInbox::SearchIdx->new($ibx, 1); ok($rw, "search indexer created"); my $data = <<'EOF'; Subject: [RFC 00/14] Message-Id: <1-bw@g> Subject: [RFC 09/14] Message-Id: <10-bw@g> In-Reply-To: <1-bw@g> References: <1-bw@g> Subject: [RFC 03/14] Message-Id: <4-bw@g> In-Reply-To: <1-bw@g> References: <1-bw@g> EOF my $num = 0; # nb. using internal API, fragile! my $xdb = $rw->begin_txn_lazy; my @mids; foreach (reverse split(/\n\n/, $data)) { $_ .= "\n"; my $mime = PublicInbox::Eml->new(\$_); $mime->header_set('From' => 'bw@g'); $mime->header_set('To' => 'git@vger.kernel.org'); my $bytes = bytes::length($mime->as_string); my $mid = mids($mime->header_obj)->[0]; my $smsg = bless { bytes => $bytes, num => ++$num, mid => $mid, blob => '', }, 'PublicInbox::Smsg'; my $doc_id = $rw->add_message($mime, $smsg); push @mids, $mid; ok($doc_id, 'message added: '. $mid); } my $prev; my %tids; my $dbh = $rw->{oidx}->dbh; foreach my $mid (@mids) { my $msgs = $rw->{oidx}->get_thread($mid); is(3, scalar(@$msgs), "got all messages from $mid"); foreach my $m (@$msgs) { my $tid = $dbh->selectrow_array(<<'', undef, $m->{num}); SELECT tid FROM over WHERE num = ? LIMIT 1 $tids{$tid}++; } } is(scalar keys %tids, 1, 'all messages have the same tid'); $rw->commit_txn_lazy; $xdb = $rw->begin_txn_lazy; { my $mime = PublicInbox::Eml->new(<<''); Subject: [RFC 00/14] Message-Id: <1-bw@g> From: bw@g To: git@vger.kernel.org my $dbh = $rw->{oidx}->dbh; my ($id, $prev); my $reidx = $rw->{oidx}->next_by_mid('1-bw@g', \$id, \$prev); ok(defined $reidx); my $num = $reidx->{num}; my $tid0 = $dbh->selectrow_array(<<'', undef, $num); SELECT tid FROM over WHERE num = ? LIMIT 1 my $bytes = bytes::length($mime->as_string); my $mid = mids($mime->header_obj)->[0]; my $smsg = bless { bytes => $bytes, num => $num, mid => $mid, blob => '', }, 'PublicInbox::Smsg'; my $doc_id = $rw->add_message($mime, $smsg); ok($doc_id, 'message reindexed'. $mid); is($doc_id, $num, "article number unchanged: $num"); my $tid1 = $dbh->selectrow_array(<<'', undef, $num); SELECT tid FROM over WHERE num = ? LIMIT 1 is($tid1, $tid0, 'tid unchanged on reindex'); } $rw->commit_txn_lazy; done_testing(); 1; public-inbox-1.6.1/t/search.t000066400000000000000000000366151377346120300160300ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::SearchIdx; require PublicInbox::Inbox; require PublicInbox::InboxWritable; use PublicInbox::Eml; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/a.git"; my $ibx = PublicInbox::Inbox->new({ inboxdir => $git_dir }); my ($root_id, $last_id); is(0, xsys(qw(git init --shared -q --bare), $git_dir), "git init (main)") or BAIL_OUT("`git init --shared' failed, weird FS or seccomp?"); eval { PublicInbox::Search->new($ibx)->xdb }; ok($@, "exception raised on non-existent DB"); my $rw = PublicInbox::SearchIdx->new($ibx, 1); $ibx->with_umask(sub { $rw->idx_acquire; $rw->idx_release; }); $rw = undef; my $rw_commit = sub { $rw->commit_txn_lazy if $rw; $rw = PublicInbox::SearchIdx->new($ibx, 1); $rw->{qp_flags} = 0; # quiet a warning $rw->begin_txn_lazy; $ibx->search->reopen; }; sub oct_is ($$$) { my ($got, $exp, $msg) = @_; is(sprintf('0%03o', $got), sprintf('0%03o', $exp), $msg); } { # git repository perms oct_is($ibx->_git_config_perm(), &PublicInbox::InboxWritable::PERM_GROUP, 'undefined permission is group'); my @t = ( [ '0644', 0022, '644 => umask(0022)' ], [ '0600', 0077, '600 => umask(0077)' ], [ '0640', 0027, '640 => umask(0027)' ], [ 'group', 0007, 'group => umask(0007)' ], [ 'everybody', 0002, 'everybody => umask(0002)' ], [ 'umask', umask, 'umask => existing umask' ], ); for (@t) { my ($perm, $exp, $msg) = @$_; my $got = PublicInbox::InboxWritable::_umask_for( PublicInbox::InboxWritable->_git_config_perm($perm)); oct_is($got, $exp, $msg); } } { my $crlf_adjust = \&PublicInbox::SearchIdx::crlf_adjust; is($crlf_adjust->("hi\r\nworld\r\n"), 0, 'no adjustment needed'); is($crlf_adjust->("hi\nworld\n"), 2, 'LF-only counts two CR'); is($crlf_adjust->("hi\r\nworld\n"), 1, 'CRLF/LF-mix 1 counts 1 CR'); is($crlf_adjust->("hi\nworld\r\n"), 1, 'CRLF/LF-mix 2 counts 1 CR'); } $ibx->with_umask(sub { my $root = PublicInbox::Eml->new(<<'EOF'); Date: Fri, 02 Oct 1993 00:00:00 +0000 Subject: Hello world Message-ID: From: John Smith To: list@example.com List-Id: I'm not mad \m/ EOF my $last = PublicInbox::Eml->new(<<'EOF'); Date: Sat, 02 Oct 2010 00:00:00 +0000 Subject: Re: Hello world In-Reply-To: Message-ID: From: John Smith To: list@example.com Cc: foo@example.com List-Id: there's nothing goodbye forever :< EOF my $rv; $rw_commit->(); $root_id = $rw->add_message($root); is($root_id, int($root_id), "root_id is an integer: $root_id"); $last_id = $rw->add_message($last); is($last_id, int($last_id), "last_id is an integer: $last_id"); }); sub filter_mids { my ($msgs) = @_; sort(map { $_->{mid} } @$msgs); } my $query = sub { my ($query_string, $opt) = @_; my $mset = $ibx->search->mset($query_string, $opt); $ibx->search->mset_to_smsg($ibx, $mset); }; { $rw_commit->(); my $found = $query->('m:root@s'); is(scalar(@$found), 1, "message found"); is($found->[0]->{mid}, 'root@s', 'mid set correctly') if @$found; my ($res, @res); my @exp = sort qw(root@s last@s); $res = $query->('s:(Hello world)'); @res = filter_mids($res); is_deeply(\@res, \@exp, 'got expected results for s:() match'); $res = $query->('s:"Hello world"'); @res = filter_mids($res); is_deeply(\@res, \@exp, 'got expected results for s:"" match'); $res = $query->('s:"Hello world"', {limit => 1}); is(scalar @$res, 1, "limit works"); my $first = $res->[0]; $res = $query->('s:"Hello world"', {offset => 1}); is(scalar @$res, 1, "offset works"); my $second = $res->[0]; isnt($first, $second, "offset returned different result from limit"); } # ghost vivication $ibx->with_umask(sub { $rw_commit->(); my $rmid = ''; my $reply_to_ghost = PublicInbox::Eml->new(<<"EOF"); Date: Sat, 02 Oct 2010 00:00:00 +0000 Subject: Re: ghosts Message-ID: In-Reply-To: $rmid From: Time Traveler To: list\@example.com -_- EOF my $rv; my $reply_id = $rw->add_message($reply_to_ghost); is($reply_id, int($reply_id), "reply_id is an integer: $reply_id"); my $was_ghost = PublicInbox::Eml->new(<<"EOF"); Date: Sat, 02 Oct 2010 00:00:01 +0000 Subject: ghosts Message-ID: $rmid From: Laggy Sender To: list\@example.com are real EOF my $ghost_id = $rw->add_message($was_ghost); is($ghost_id, int($ghost_id), "ghost_id is an integer: $ghost_id"); my $msgs = $rw->{oidx}->get_thread('ghost-message@s'); is(scalar(@$msgs), 2, 'got both messages in ghost thread'); foreach (qw(sid tid)) { is($msgs->[0]->{$_}, $msgs->[1]->{$_}, "{$_} match"); } isnt($msgs->[0]->{num}, $msgs->[1]->{num}, "num do not match"); ok($_->{num} > 0, 'positive art num') foreach @$msgs }); # search thread on ghost { $rw_commit->(); # subject my $res = $query->('ghost'); my @exp = sort qw(ghost-message@s ghost-reply@s); my @res = filter_mids($res); is_deeply(\@res, \@exp, 'got expected results for Subject match'); # body $res = $query->('goodbye'); is(scalar(@$res), 1, "goodbye message found"); is($res->[0]->{mid}, 'last@s', 'got goodbye message body') if @$res; # datestamp $res = $query->('dt:20101002000001..20101002000001'); @res = filter_mids($res); is_deeply(\@res, ['ghost-message@s'], 'exact Date: match works'); $res = $query->('dt:20101002000002..20101002000002'); is_deeply($res, [], 'exact Date: match down to the second'); } # long message-id $ibx->with_umask(sub { $rw_commit->(); my $long_mid = 'last' . ('x' x 60). '@s'; my $long = PublicInbox::Eml->new(< In-Reply-To: Message-ID: <$long_mid>, From: "Long I.D." To: list\@example.com wut EOF my $long_id = $rw->add_message($long); is($long_id, int($long_id), "long_id is an integer: $long_id"); $rw_commit->(); my $res; my @res; my $long_reply_mid = 'reply-to-long@1'; my $long_reply = PublicInbox::Eml->new(< In-Reply-To: <$long_mid> From: no1 To: list\@example.com no References EOF ok($rw->add_message($long_reply) > $long_id, "inserted long reply"); $rw_commit->(); my $t = $ibx->over->get_thread('root@s'); is(scalar(@$t), 4, "got all 4 messages in thread"); my @exp = sort($long_reply_mid, 'root@s', 'last@s', $long_mid); @res = filter_mids($t); is_deeply(\@res, \@exp, "get_thread works"); }); # quote prioritization $ibx->with_umask(sub { $rw_commit->(); $rw->add_message(PublicInbox::Eml->new(<<'EOF')); Date: Sat, 02 Oct 2010 00:00:01 +0000 Subject: Hello Message-ID: From: Quoter To: list@example.com > theatre illusions fade EOF $rw->add_message(PublicInbox::Eml->new(<<'EOF')); Date: Sat, 02 Oct 2010 00:00:02 +0000 Subject: Hello Message-ID: From: Non-Quoter To: list@example.com theatre fade EOF $rw_commit->(); my $res = $query->("theatre"); is(scalar(@$res), 2, "got both matches"); if (@$res == 2) { is($res->[0]->{mid}, 'nquote@a', 'non-quoted scores higher'); is($res->[1]->{mid}, 'quote@a', 'quoted result still returned'); } $res = $query->("illusions"); is(scalar(@$res), 1, "got a match for quoted text"); is($res->[0]->{mid}, 'quote@a', "quoted result returned if nothing else") if scalar(@$res); }); # circular references $ibx->with_umask(sub { my $s = 'foo://'. ('Circle' x 15).'/foo'; my $doc_id = $rw->add_message(PublicInbox::Eml->new(< References: In-Reply-To: From: Circle To: list\@example.com LOOP! EOF ok($doc_id > 0, "doc_id defined with circular reference"); $rw_commit->(); my $smsg = $query->('m:circle@a', {limit=>1})->[0]; is(defined($smsg), 1, 'found m:circl@a'); if (defined $smsg) { is($smsg->{references}, '', "no references created"); is($smsg->{subject}, $s, 'long subject not rewritten'); } }); { my $msgs = $query->('d:19931002..20101002'); ok(scalar(@$msgs) > 0, 'got results within range'); $msgs = $query->('d:20101003..'); is(scalar(@$msgs), 0, 'nothing after 20101003'); $msgs = $query->('d:..19931001'); is(scalar(@$msgs), 0, 'nothing before 19931001'); } $ibx->with_umask(sub { my $mime = eml_load 't/utf8.eml'; my $doc_id = $rw->add_message($mime); ok($doc_id > 0, 'message indexed doc_id with UTF-8'); $rw_commit->(); my $msg = $query->('m:testmessage@example.com', {limit => 1})->[0]; is(defined($msg), 1, 'found testmessage@example.com'); if (defined $msg) { is($mime->header('Subject'), $msg->{subject}, 'UTF-8 subject preserved'); } }); # names and addresses { my $mset = $ibx->search->mset('t:list@example.com'); is($mset->size, 9, 'searched To: successfully'); foreach my $m ($mset->items) { my $smsg = $ibx->over->get_art($m->get_docid); like($smsg->{to}, qr/\blist\@example\.com\b/, 'to appears'); my $doc = $m->get_document; my $col = PublicInbox::Search::BYTES(); my $bytes = PublicInbox::Smsg::get_val($doc, $col); like($bytes, qr/\A[0-9]+\z/, '$bytes stored as digit'); ok($bytes > 0, '$bytes is > 0'); is($bytes, $smsg->{bytes}, 'bytes Xapian value matches Over'); $col = PublicInbox::Search::UID(); my $uid = PublicInbox::Smsg::get_val($doc, $col); is($uid, $smsg->{num}, 'UID column matches {num}'); is($uid, $m->get_docid, 'UID column matches docid'); } $mset = $ibx->search->mset('tc:list@example.com'); is($mset->size, 9, 'searched To+Cc: successfully'); foreach my $m ($mset->items) { my $smsg = $ibx->over->get_art($m->get_docid); my $tocc = join("\n", $smsg->{to}, $smsg->{cc}); like($tocc, qr/\blist\@example\.com\b/, 'tocc appears'); } foreach my $pfx ('tcf:', 'c:') { my $mset = $ibx->search->mset($pfx . 'foo@example.com'); is($mset->items, 1, "searched $pfx successfully for Cc:"); foreach my $m ($mset->items) { my $smsg = $ibx->over->get_art($m->get_docid); like($smsg->{cc}, qr/\bfoo\@example\.com\b/, 'cc appears'); } } foreach my $pfx ('', 'tcf:', 'f:') { my $res = $query->($pfx . 'Laggy'); is(scalar(@$res), 1, "searched $pfx successfully for From:"); foreach my $smsg (@$res) { like($smsg->{from_name}, qr/Laggy Sender/, "From appears with $pfx"); } } } { $rw_commit->(); my $res = $query->('b:hello'); is(scalar(@$res), 0, 'no match on body search only'); $res = $query->('bs:smith'); is(scalar(@$res), 0, 'no match on body+subject search for From'); $res = $query->('q:theatre'); is(scalar(@$res), 1, 'only one quoted body'); like($res->[0]->{from_name}, qr/\AQuoter/, 'got quoted body') if (scalar(@$res)); $res = $query->('nq:theatre'); is(scalar @$res, 1, 'only one non-quoted body'); like($res->[0]->{from_name}, qr/\ANon-Quoter/, 'got non-quoted body') if (scalar(@$res)); foreach my $pfx (qw(b: bs:)) { $res = $query->($pfx . 'theatre'); is(scalar @$res, 2, "searched both bodies for $pfx"); like($res->[0]->{from_name}, qr/\ANon-Quoter/, "non-quoter first for $pfx") if scalar(@$res); } } $ibx->with_umask(sub { my $amsg = eml_load 't/search-amsg.eml'; my $oid = ('0'x40); my $smsg = bless { blob => $oid }, 'PublicInbox::Smsg'; ok($rw->add_message($amsg, $smsg), 'added attachment'); $rw_commit->(); my $n = $query->('n:attached_fart.txt'); is(scalar @$n, 1, 'got result for n:'); my $res = $query->('part_deux.txt'); is(scalar @$res, 1, 'got result without n:'); is($n->[0]->{mid}, $res->[0]->{mid}, 'same result with and without') if scalar(@$res); my $txt = $query->('"inside another"'); is(scalar @$txt, 1, 'found inside another'); is($txt->[0]->{mid}, $res->[0]->{mid}, 'search inside text attachments works') if scalar(@$txt); my $art; if (scalar(@$n) >= 1) { my $mid = $n->[0]->{mid}; my ($id, $prev); $art = $ibx->over->next_by_mid($mid, \$id, \$prev); ok($art, 'article exists in OVER DB'); } $rw->_msgmap_init; $rw->unindex_eml($oid, $amsg); $rw->commit_txn_lazy; SKIP: { skip('$art not defined', 1) unless defined $art; is($ibx->over->get_art($art->{num}), undef, 'gone from OVER DB'); }; }); my $all_mask = 07777; my $dir_mask = 02770; # FreeBSD and apparently OpenBSD does not allow non-root users to set S_ISGID, # so git doesn't set it, either (see DIR_HAS_BSD_GROUP_SEMANTICS in git.git) if ($^O =~ /(?:free|open)bsd/i) { $all_mask = 0777; $dir_mask = 0770; } foreach my $f ("$git_dir/public-inbox/msgmap.sqlite3", "$git_dir/public-inbox", glob("$git_dir/public-inbox/xapian*/"), glob("$git_dir/public-inbox/xapian*/*")) { my @st = stat($f); my ($bn) = (split(m!/!, $f))[-1]; oct_is($st[2] & $all_mask, -f _ ? 0660 : $dir_mask, "sharedRepository respected for $bn"); } $ibx->with_umask(sub { $rw_commit->(); my $digits = '10010260936330'; my $ua = 'Pine.LNX.4.10'; my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com"; is($ibx->search->mset("m:$digits")->size, 0, 'no results yet'); my $pine = PublicInbox::Eml->new(< From: torvalds\@transmeta To: list\@example.com EOF my $x = $rw->add_message($pine); $rw->commit_txn_lazy; $ibx->search->reopen; is($ibx->search->mset("m:$digits")->size, 1, 'searching only digit yielded result'); my $wild = $digits; for my $i (1..6) { chop($wild); is($ibx->search->mset("m:$wild*")->size, 1, "searching chopped($i) digit yielded result $wild "); } is($ibx->search->mset('m:Pine m:LNX m:10010260936330')->size, 1); }); { # List-Id searching my $found = $query->('lid:i.m.just.bored'); is_deeply([ filter_mids($found) ], [ 'root@s' ], 'got expected mid on exact lid: search'); $found = $query->('lid:just.bored'); is_deeply($found, [], 'got nothing on lid: search'); $found = $query->('lid:*.just.bored'); is_deeply($found, [], 'got nothing on lid: search'); $found = $query->('l:i.m.just.bored'); is_deeply([ filter_mids($found) ], [ 'root@s' ], 'probabilistic search works on full List-Id contents'); $found = $query->('l:just.bored'); is_deeply([ filter_mids($found) ], [ 'root@s' ], 'probabilistic search works on partial List-Id contents'); $found = $query->('lid:mad'); is_deeply($found, [], 'no match on phrase with lid:'); $found = $query->('lid:bored'); is_deeply($found, [], 'no match on partial List-Id with lid:'); $found = $query->('l:nothing'); is_deeply($found, [], 'matched on phrase with l:'); } $ibx->with_umask(sub { $rw_commit->(); my $doc_id = $rw->add_message(eml_load('t/data/message_embed.eml')); ok($doc_id > 0, 'messages within messages'); $rw->commit_txn_lazy; $ibx->search->reopen; my $n_test_eml = $query->('n:test.eml'); is(scalar(@$n_test_eml), 1, 'got a result'); my $n_embed2x_eml = $query->('n:embed2x.eml'); is_deeply($n_test_eml, $n_embed2x_eml, '.eml filenames searchable'); for my $m (qw(20200418222508.GA13918@dcvr 20200418222020.GA2745@dcvr 20200418214114.7575-1-e@yhbt.net)) { is($query->("m:$m")->[0]->{mid}, '20200418222508.GA13918@dcvr', 'probabilistic m:'.$m); is($query->("mid:$m")->[0]->{mid}, '20200418222508.GA13918@dcvr', 'boolean mid:'.$m); } is($query->('dfpost:4dc62c50')->[0]->{mid}, '20200418222508.GA13918@dcvr', 'diff search reaches inside message/rfc822'); is($query->('s:"mail header experiments"')->[0]->{mid}, '20200418222508.GA13918@dcvr', 'Subject search reaches inside message/rfc822'); }); done_testing(); 1; public-inbox-1.6.1/t/sigfd.t000066400000000000000000000042101377346120300156410ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors use strict; use Test::More; use IO::Handle; use POSIX qw(:signal_h); use Errno qw(ENOSYS); use PublicInbox::Syscall qw($SFD_NONBLOCK); require_ok 'PublicInbox::Sigfd'; SKIP: { if ($^O ne 'linux' && !eval { require IO::KQueue }) { skip 'signalfd requires Linux or IO::KQueue to emulate', 10; } my $new = POSIX::SigSet->new; $new->fillset or die "sigfillset: $!"; my $old = POSIX::SigSet->new; sigprocmask(SIG_SETMASK, $new, $old) or die "sigprocmask $!"; my $hit = {}; my $sig = {}; local $SIG{HUP} = sub { $hit->{HUP}->{normal}++ }; local $SIG{TERM} = sub { $hit->{TERM}->{normal}++ }; local $SIG{INT} = sub { $hit->{INT}->{normal}++ }; for my $s (qw(HUP TERM INT)) { $sig->{$s} = sub { $hit->{$s}->{sigfd}++ }; } my $sigfd = PublicInbox::Sigfd->new($sig, 0); if ($sigfd) { require PublicInbox::DS; ok($sigfd, 'Sigfd->new works'); kill('HUP', $$) or die "kill $!"; kill('INT', $$) or die "kill $!"; my $fd = fileno($sigfd->{sock}); ok($fd >= 0, 'fileno(Sigfd->{sock}) works'); my $rvec = ''; vec($rvec, $fd, 1) = 1; is(select($rvec, undef, undef, undef), 1, 'select() works'); ok($sigfd->wait_once, 'wait_once reported success'); for my $s (qw(HUP INT)) { is($hit->{$s}->{sigfd}, 1, "sigfd fired $s"); is($hit->{$s}->{normal}, undef, 'normal $SIG{$s} not fired'); } $sigfd = undef; my $nbsig = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK); ok($nbsig, 'Sigfd->new $SFD_NONBLOCK works'); is($nbsig->wait_once, undef, 'nonblocking ->wait_once'); ok($! == Errno::EAGAIN, 'got EAGAIN'); kill('HUP', $$) or die "kill $!"; PublicInbox::DS->SetPostLoopCallback(sub {}); # loop once PublicInbox::DS->EventLoop; is($hit->{HUP}->{sigfd}, 2, 'HUP sigfd fired in event loop'); kill('TERM', $$) or die "kill $!"; kill('HUP', $$) or die "kill $!"; PublicInbox::DS->EventLoop; PublicInbox::DS->Reset; is($hit->{TERM}->{sigfd}, 1, 'TERM sigfd fired in event loop'); is($hit->{HUP}->{sigfd}, 3, 'HUP sigfd fired in event loop'); } else { skip('signalfd disabled?', 10); } sigprocmask(SIG_SETMASK, $old) or die "sigprocmask $!"; } done_testing; public-inbox-1.6.1/t/solve/000077500000000000000000000000001377346120300155135ustar00rootroot00000000000000public-inbox-1.6.1/t/solve/0001-simple-mod.patch000066400000000000000000000011251377346120300211570ustar00rootroot00000000000000From: WEB DESIGN EXPERT To: meta@public-inbox.org Subject: [PATCH] TODO: take expert web design advice Date: Mon, 1 Apr 2019 08:15:20 +0000 Message-Id: <20190401081523.16213-1-BOFH@YHBT.net> Content-Type: text/plain; charset=utf-8 --- TODO | 2 ++ Ω | 5 -- 1 file changed, 2 insertions(+) diff --git a/TODO b/TODO index 605013e..69df7d5 100644 --- a/TODO +++ b/TODO @@ -109,3 +109,5 @@ all need to be considered for everything we introduce) * Optional history squashing to reduce commit and intermediate tree objects + + * Make use of and tags public-inbox-1.6.1/t/solve/0002-rename-with-modifications.patch000066400000000000000000000024511377346120300241630ustar00rootroot00000000000000From: POLITICAL CORRECTNESS EXPERT To: meta@public-inbox.org Subject: [PATCH] POLITICALLY CORRECT FILE NAMING Date: Mon, 1 Apr 2019 08:15:20 +0000 Message-Id: <20190401081523.16213-2-BOFH@YHBT.net> HACKING MIGHT GET US REPORTED TO EFF-BEE-EYE AND USE MARKDOWN CUZ MOAR FLAVORS == BETTER --- HACKING => CONTRIBUTING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename HACKING => CONTRIBUTING.md (94%) diff --git a/HACKING b/CONTRIBUTING.md similarity index 94% rename from HACKING rename to CONTRIBUTING.md index 3435775..0a92431 100644 --- a/HACKING +++ b/CONTRIBUTING.md @@ -1,5 +1,5 @@ -hacking public-inbox --------------------- +contributing to public-inbox +---------------------------- Send all patches and "git request-pull"-formatted emails to our self-hosting inbox at meta@public-inbox.org @@ -15,7 +15,7 @@ Please consider our goals in mind: Decentralization, Accessibility, Compatibility, Performance These goals apply to everyone: users viewing over the web or NNTP, -sysadmins running public-inbox, and other hackers working public-inbox. +sysadmins running public-inbox, and other contributors working public-inbox. We will reject any feature which advocates or contributes to any particular instance of a public-inbox becoming a single point of failure. public-inbox-1.6.1/t/solver_git.t000066400000000000000000000151271377346120300167330ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Cwd qw(abs_path); use PublicInbox::TestCommon; require_git(2.6); use PublicInbox::Spawn qw(popen_rd); require_mods(qw(DBD::SQLite Search::Xapian Plack::Util)); my $git_dir = xqx([qw(git rev-parse --git-dir)], undef, {2 => \(my $null)}); $? == 0 or plan skip_all => "$0 must be run from a git working tree"; chomp $git_dir; # needed for alternates, and --absolute-git-dir is only in git 2.13+ $git_dir = abs_path($git_dir); use_ok "PublicInbox::$_" for (qw(Inbox V2Writable Git SolverGit WWW)); my ($inboxdir, $for_destroy) = tmpdir(); my $opts = { inboxdir => $inboxdir, name => 'test-v2writable', version => 2, -primary_address => 'test@example.com', }; my $ibx = PublicInbox::Inbox->new($opts); my $im = PublicInbox::V2Writable->new($ibx, 1); $im->{parallel} = 0; my $deliver_patch = sub ($) { $im->add(eml_load($_[0])); $im->done; }; $deliver_patch->('t/solve/0001-simple-mod.patch'); my $v1_0_0_tag = 'cb7c42b1e15577ed2215356a2bf925aef59cdd8d'; my $v1_0_0_tag_short = substr($v1_0_0_tag, 0, 16); my $git = PublicInbox::Git->new($git_dir); $ibx->{-repo_objs} = [ $git ]; my $res; my $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); open my $log, '+>>', "$inboxdir/solve.log" or die "open: $!"; my $psgi_env = { 'psgi.errors' => \*STDERR, 'psgi.url_scheme' => 'http', 'HTTP_HOST' => 'example.com' }; $solver->solve($psgi_env, $log, '69df7d5', {}); ok($res, 'solved a blob!'); my $wt_git = $res->[0]; is(ref($wt_git), 'PublicInbox::Git', 'got a git object for the blob'); my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0'; is($res->[1], $expect, 'resolved blob to unabbreviated identifier'); is($res->[2], 'blob', 'type specified'); is($res->[3], 4405, 'size returned'); is(ref($wt_git->cat_file($res->[1])), 'SCALAR', 'wt cat-file works'); is_deeply([$expect, 'blob', 4405], [$wt_git->check($res->[1])], 'wt check works'); if (0) { # TODO: check this? seek($log, 0, 0); my $z = do { local $/; <$log> }; diag $z; } my $oid = $expect; for my $i (1..2) { my $more; my $s = PublicInbox::SolverGit->new($ibx, sub { $more = $_[0] }); $s->solve($psgi_env, $log, $oid, {}); is($more->[1], $expect, 'resolved blob to long OID '.$i); chop($oid); } $solver = undef; $res = undef; my $wt_git_dir = $wt_git->{git_dir}; $wt_git = undef; ok(!-d $wt_git_dir, 'no references to WT held'); $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); $solver->solve($psgi_env, $log, '0'x40, {}); is($res, undef, 'no error on z40'); my $git_v2_20_1_tag = '7a95a1cd084cb665c5c2586a415e42df0213af74'; $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); $solver->solve($psgi_env, $log, $git_v2_20_1_tag, {}); is($res, undef, 'no error on a tag not in our repo'); $deliver_patch->('t/solve/0002-rename-with-modifications.patch'); $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); $solver->solve($psgi_env, $log, '0a92431', {}); ok($res, 'resolved without hints'); my $hints = { oid_a => '3435775', path_a => 'HACKING', path_b => 'CONTRIBUTING' }; $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] }); $solver->solve($psgi_env, $log, '0a92431', $hints); my $hinted = $res; # don't compare ::Git objects: shift @$res; shift @$hinted; is_deeply($res, $hinted, 'hints work (or did not hurt :P'); my @psgi = qw(HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); SKIP: { require_mods(@psgi, 7 + scalar(@psgi)); use_ok($_) for @psgi; my $binfoo = "$inboxdir/binfoo.git"; require PublicInbox::Import; PublicInbox::Import::init_bare($binfoo); require_ok 'PublicInbox::ViewVCS'; my $big_size = do { no warnings 'once'; $PublicInbox::ViewVCS::MAX_SIZE + 1; }; my %bin = (big => $big_size, small => 1); my %oid; # (small|big) => OID my $cmd = [ qw(git hash-object -w --stdin) ]; my $env = { GIT_DIR => $binfoo }; while (my ($label, $size) = each %bin) { pipe(my ($rin, $win)) or die; my $rout = popen_rd($cmd , $env, { 0 => $rin }); $rin = undef; print { $win } ("\0" x $size) or die; close $win or die; chomp($oid{$label} = <$rout>); close $rout or die "$?"; } # ensure the PSGI frontend (ViewVCS) works: my $name = $ibx->{name}; my $cfgpfx = "publicinbox.$name"; my $cfgpath = "$inboxdir/httpd-config"; open my $cfgfh, '>', $cfgpath or die; print $cfgfh <{address}; inboxdir = $inboxdir coderepo = public-inbox coderepo = binfoo url = http://example.com/$name [coderepo "public-inbox"] dir = $git_dir cgiturl = http://example.com/public-inbox [coderepo "binfoo"] dir = $binfoo cgiturl = http://example.com/binfoo EOF close $cfgfh or die; my $cfg = PublicInbox::Config->new($cfgpath); my $www = PublicInbox::WWW->new($cfg); my $non_existent = 'ee5e32211bf62ab6531bdf39b84b6920d0b6775a'; my $client = sub { my ($cb) = @_; my $mid = '20190401081523.16213-1-BOFH@YHBT.net'; my @warn; my $res = do { local $SIG{__WARN__} = sub { push @warn, @_ }; $cb->(GET("/$name/$mid/")); }; is_deeply(\@warn, [], 'no warnings from rendering diff'); like($res->content, qr!>Ω!, 'omega escaped'); $res = $cb->(GET("/$name/3435775/s/")); is($res->code, 200, 'success with existing blob'); $res = $cb->(GET("/$name/".('0'x40).'/s/')); is($res->code, 404, 'failure with null OID'); $res = $cb->(GET("/$name/$non_existent/s/")); is($res->code, 404, 'failure with null OID'); $res = $cb->(GET("/$name/$v1_0_0_tag/s/")); is($res->code, 200, 'shows commit (unabbreviated)'); $res = $cb->(GET("/$name/$v1_0_0_tag_short/s/")); is($res->code, 200, 'shows commit (abbreviated)'); while (my ($label, $size) = each %bin) { $res = $cb->(GET("/$name/$oid{$label}/s/")); is($res->code, 200, "$label binary file"); ok(index($res->content, "blob $size bytes") >= 0, "showed $label binary blob size"); $res = $cb->(GET("/$name/$oid{$label}/s/raw")); is($res->code, 200, "$label raw binary download"); is($res->content, "\0" x $size, "$label content matches"); } }; test_psgi(sub { $www->call(@_) }, $client); SKIP: { require_mods(qw(Plack::Test::ExternalServer), 7); my $env = { PI_CONFIG => $cfgpath }; my $sock = tcp_server() or die; my ($out, $err) = map { "$inboxdir/std$_.log" } qw(out err); my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, $env, { 3 => $sock }); my ($h, $p) = ($sock->sockhost, $sock->sockport); local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; Plack::Test::ExternalServer::test_psgi(client => $client); } } done_testing(); public-inbox-1.6.1/t/spamcheck_spamc.t000066400000000000000000000026351377346120300176770ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use IO::File; use Fcntl qw(:DEFAULT SEEK_SET); use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); use_ok 'PublicInbox::Spamcheck::Spamc'; my $spamc = PublicInbox::Spamcheck::Spamc->new; $spamc->{checkcmd} = [qw(cat)]; { open my $fh, '+>', "$tmpdir/file" or die "open failed: $!"; ok(!$spamc->spamcheck($fh), 'empty '.ref($fh)); } ok(!$spamc->spamcheck(IO::File->new_tmpfile), 'IO::File->new_tmpfile'); my $dst = ''; my $src = <<'EOF'; Date: Thu, 01 Jan 1970 00:00:00 +0000 To: From: Subject: test Message-ID: EOF ok($spamc->spamcheck(PublicInbox::Eml->new($src), \$dst), 'PublicInbox::Eml works'); is($dst, $src, 'input == output'); $dst = ''; $spamc->{checkcmd} = ['sh', '-c', 'cat; false']; ok(!$spamc->spamcheck(PublicInbox::Eml->new($src), \$dst), 'Failed check works'); is($dst, $src, 'input == output for spammy example'); for my $l (qw(ham spam)) { my $file = "$tmpdir/$l.out"; $spamc->{$l.'cmd'} = ['tee', $file ]; my $method = $l.'learn'; ok($spamc->$method(PublicInbox::Eml->new($src)), "$method OK"); open my $fh, '<', $file or die "failed to open $file: $!"; is(eval { local $/, <$fh> }, $src, "$l command ran alright"); } done_testing(); public-inbox-1.6.1/t/spawn.t000066400000000000000000000066611377346120300157110ustar00rootroot00000000000000# Copyright (C) 2015-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Spawn qw(which spawn popen_rd); use PublicInbox::Sigfd; { my $true = which('true'); ok($true, "'true' command found with which()"); } { my $pid = spawn(['true']); ok($pid, 'spawned process'); is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process'); is($?, 0, 'true exited successfully'); } { # ensure waitpid(-1, 0) and SIGCHLD works in spawned process my $script = <<'EOF'; $| = 1; # unbuffer stdout defined(my $pid = fork) or die "fork: $!"; if ($pid == 0) { exit } elsif ($pid > 0) { my $waited = waitpid(-1, 0); $waited == $pid or die "mismatched child $pid != $waited"; $? == 0 or die "child err: $>"; $SIG{CHLD} = sub { print "HI\n"; exit }; print "RDY $$\n"; select(undef, undef, undef, 0.01) while 1; } EOF my $oldset = PublicInbox::Sigfd::block_signals(); my $rd = popen_rd([$^X, '-e', $script]); diag 'waiting for child to reap grandchild...'; chomp(my $line = readline($rd)); my ($rdy, $pid) = split(' ', $line); is($rdy, 'RDY', 'got ready signal, waitpid(-1) works in child'); ok(kill('CHLD', $pid), 'sent SIGCHLD to child'); is(readline($rd), "HI\n", '$SIG{CHLD} works in child'); ok(close $rd, 'popen_rd close works'); PublicInbox::Sigfd::sig_setmask($oldset); } { my ($r, $w); pipe $r, $w or die "pipe failed: $!"; my $pid = spawn(['echo', 'hello world'], undef, { 1 => fileno($w) }); close $w or die "close pipe[1] failed: $!"; is(<$r>, "hello world\n", 'read stdout of spawned from pipe'); is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process'); is($?, 0, 'true exited successfully'); } { my ($r, $w); pipe $r, $w or die "pipe failed: $!"; my $pid = spawn(['sh', '-c', 'echo $HELLO'], { 'HELLO' => 'world' }, { 1 => $w }); close $w or die "close pipe[1] failed: $!"; is(<$r>, "world\n", 'read stdout of spawned from pipe'); is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process'); is($?, 0, 'sh exited successfully'); } { my $fh = popen_rd([qw(echo hello)]); ok(fileno($fh) >= 0, 'tied fileno works'); my $l = <$fh>; is($l, "hello\n", 'tied readline works'); $l = <$fh>; ok(!$l, 'tied readline works for EOF'); } { my $fh = popen_rd([qw(printf foo\nbar)]); ok(fileno($fh) >= 0, 'tied fileno works'); my @line = <$fh>; is_deeply(\@line, [ "foo\n", 'bar' ], 'wantarray works on readline'); } { my $fh = popen_rd([qw(echo hello)]); my $buf; is(sysread($fh, $buf, 6), 6, 'sysread got 6 bytes'); is($buf, "hello\n", 'tied gets works'); is(sysread($fh, $buf, 6), 0, 'sysread got EOF'); $? = 1; ok(close($fh), 'close succeeds'); is($?, 0, '$? set properly'); } { my $fh = popen_rd([qw(false)]); ok(!close($fh), 'close fails on false'); isnt($?, 0, '$? set properly: '.$?); } SKIP: { eval { require BSD::Resource; defined(BSD::Resource::RLIMIT_CPU()) } or skip 'BSD::Resource::RLIMIT_CPU missing', 3; my ($r, $w); pipe($r, $w) or die "pipe: $!"; my $cmd = ['sh', '-c', 'while true; do :; done']; my $fd = fileno($w); my $opt = { RLIMIT_CPU => [ 1, 1 ], RLIMIT_CORE => [ 0, 0 ], 1 => $fd }; my $pid = spawn($cmd, undef, $opt); close $w or die "close(w): $!"; my $rset = ''; vec($rset, fileno($r), 1) = 1; ok(select($rset, undef, undef, 5), 'child died before timeout'); is(waitpid($pid, 0), $pid, 'XCPU child process reaped'); isnt($?, 0, 'non-zero exit status'); } done_testing(); 1; public-inbox-1.6.1/t/thread-cycle.t000066400000000000000000000076021377346120300171210ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; require_mods 'Email::Simple'; use_ok('PublicInbox::SearchThread'); my $mt = eval { require Mail::Thread; no warnings 'once'; $Mail::Thread::nosubject = 1; $Mail::Thread::noprune = 1; }; sub make_objs { my @simples; my $n = 0; my @msgs = map { my $msg = $_; $msg->{ds} ||= ++$n; $msg->{references} =~ s/\s+/ /sg if $msg->{references}; $msg->{blob} = '0'x40; # any dummy value will do, here my $simple = Email::Simple->create(header => [ 'Message-ID' => "<$msg->{mid}>", 'References' => $msg->{references}, ]); push @simples, $simple; bless $msg, 'PublicInbox::Smsg' } @_; (\@simples, \@msgs); } my ($simples, $smsgs) = make_objs( # data from t/testbox-6 in Mail::Thread 2.55: { mid => '20021124145312.GA1759@nlin.net' }, { mid => 'slrnau448m.7l4.markj+0111@cloaked.freeserve.co.uk', references => '<20021124145312.GA1759@nlin.net>', }, { mid => '15842.10677.577458.656565@jupiter.akutech-local.de', references => '<20021124145312.GA1759@nlin.net> ', }, { mid => '20021125171807.GK8236@somanetworks.com', references => '<20021124145312.GA1759@nlin.net> <15842.10677.577458.656565@jupiter.akutech-local.de>', }, { mid => '15843.12163.554914.469248@jupiter.akutech-local.de', references => '<20021124145312.GA1759@nlin.net> <15842.10677.577458.656565@jupiter.akutech-local.de> ', }, { mid => 'E18GPHf-0000zp-00@cloaked.freeserve.co.uk', references => '<20021124145312.GA1759@nlin.net> <15842.10677.577458.656565@jupiter.akutech-local.de>' } ); my $st = thread_to_s($smsgs); SKIP: { skip 'Mail::Thread missing', 1 unless $mt; check_mt($st, $simples, 'Mail::Thread output matches'); } my @backwards = ( { mid => 1, references => '<2> <3> <4>' }, { mid => 4, references => '<2> <3>' }, { mid => 5, references => '<6> <7> <8> <3> <2>' }, { mid => 9, references => '<6> <3>' }, { mid => 10, references => '<8> <7> <6>' }, { mid => 2, references => '<6> <7> <8> <3>' }, { mid => 3, references => '<6> <7> <8>' }, { mid => 6, references => '<8> <7>' }, { mid => 7, references => '<8>' }, { mid => 8, references => '' } ); ($simples, $smsgs) = make_objs(@backwards); my $backward = thread_to_s($smsgs); SKIP: { skip 'Mail::Thread missing', 1 unless $mt; check_mt($backward, $simples, 'matches Mail::Thread backwards'); } ($simples, $smsgs) = make_objs(reverse @backwards); my $forward = thread_to_s($smsgs); unless ('Mail::Thread sorts by Date') { SKIP: { skip 'Mail::Thread missing', 1 unless $mt; check_mt($forward, $simples, 'matches Mail::Thread forwards'); } } if ('sorting by Date') { is("\n".$backward, "\n".$forward, 'forward and backward matches'); } done_testing(); sub thread_to_s { my ($msgs) = @_; my $rootset = PublicInbox::SearchThread::thread($msgs, sub { [ sort { $a->{mid} cmp $b->{mid} } @{$_[0]} ] }); my $st = ''; my @q = map { (0, $_) } @$rootset; while (@q) { my $level = shift @q; my $node = shift @q or next; $st .= (" "x$level). "$node->{mid}\n"; my $cl = $level + 1; unshift @q, map { ($cl, $_) } @{$node->{children}}; } $st; } sub check_mt { my ($st, $simples, $msg) = @_; my $mt = Mail::Thread->new(@$simples); $mt->thread; $mt->order(sub { sort { $a->messageid cmp $b->messageid } @_ }); my $check = ''; my @q = map { (0, $_) } $mt->rootset; while (@q) { my $level = shift @q; my $node = shift @q or next; $check .= (" "x$level) . $node->messageid . "\n"; unshift @q, $level + 1, $node->child, $level, $node->next; } is("\n".$check, "\n".$st, $msg); } public-inbox-1.6.1/t/thread-index-gap.t000066400000000000000000000035041377346120300176730ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::InboxWritable; use PublicInbox::Config; use List::Util qw(shuffle); require_mods(qw(DBD::SQLite)); require_git(2.6); chomp(my @msgs = split(/\n\n/, <<'EOF')); # "git log" order Subject: [bug#45000] [PATCH 1/9] References: <20201202045335.31096-1-j@example.com> Message-Id: <20201202045540.31248-1-j@example.com> Subject: [bug#45000] [PATCH 0/9] Message-Id: <20201202045335.31096-1-j@example.com> Subject: [bug#45000] [PATCH 0/9] References: <20201202045335.31096-1-j@example.com> Message-ID: <86sg8o1mou.fsf@example.com> Subject: [bug#45000] [PATCH 8/9] Message-Id: <20201202045540.31248-8-j@example.com> References: <20201202045540.31248-1-j@example.com> EOF my ($home, $for_destroy) = tmpdir(); local $ENV{HOME} = $home; for my $msgs (['orig', reverse @msgs], ['shuffle', shuffle(@msgs)]) { my $desc = shift @$msgs; my $n = "index-cap-$desc"; run_script([qw(-init -L basic -V2), $n, "$home/$n", "http://example.com/$n", "$n\@example.com"]) or BAIL_OUT 'init'; my $ibx = PublicInbox::Config->new->lookup_name($n); my $im = PublicInbox::InboxWritable->new($ibx)->importer(0); for my $m (@$msgs) { $im->add(PublicInbox::Eml->new("$m\nFrom: x\@example.com\n\n")); } $im->done; my $over = $ibx->over; my @tid = $over->dbh->selectall_array('SELECT DISTINCT(tid) FROM over'); is(scalar(@tid), 1, "only one thread initially ($desc)"); $over->dbh_close; run_script([qw(-index --reindex --rethread), $ibx->{inboxdir}]) or BAIL_OUT 'rethread'; @tid = $over->dbh->selectall_array('SELECT DISTINCT(tid) FROM over'); is(scalar(@tid), 1, "only one thread after rethread ($desc)"); } done_testing; public-inbox-1.6.1/t/time.t000066400000000000000000000015071377346120300155110ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use POSIX qw(strftime); use PublicInbox::Eml; use PublicInbox::MsgTime qw(msg_datestamp); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: b@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $ts = msg_datestamp($mime->header_obj); is(strftime('%Y-%m-%d %H:%M:%S', gmtime($ts)), '1993-10-02 00:00:00', 'got expected date with 2 digit year'); $mime->header_set(Date => 'Fri, 02 Oct 101 01:02:03 +0000'); $ts = msg_datestamp($mime->header_obj); is(strftime('%Y-%m-%d %H:%M:%S', gmtime($ts)), '2001-10-02 01:02:03', 'got expected date with 3 digit year'); done_testing(); public-inbox-1.6.1/t/uri_imap.t000066400000000000000000000043301377346120300163550ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; require_mods 'URI::Split'; use_ok 'PublicInbox::URIimap'; is(PublicInbox::URIimap->new('https://example.com/'), undef, 'invalid scheme ignored'); my $uri = PublicInbox::URIimap->new('imaps://EXAMPLE.com/'); is($uri->host, 'EXAMPLE.com', 'host ok'); is($uri->canonical->host, 'example.com', 'host canonicalized'); is($uri->canonical->as_string, 'imaps://example.com/', 'URI canonicalized'); is($uri->port, 993, 'imaps port'); is($uri->auth, undef); is($uri->user, undef); $uri = PublicInbox::URIimap->new('imaps://foo@0/'); is($uri->host, '0', 'numeric host'); is($uri->user, 'foo', 'user extracted'); $uri = PublicInbox::URIimap->new('imap://0/INBOX.sub#frag')->canonical; is($uri->as_string, 'imap://0/INBOX.sub', 'no fragment'); is($uri->scheme, 'imap'); $uri = PublicInbox::URIimap->new('imaps://;AUTH=ANONYMOUS@0/'); is($uri->auth, 'ANONYMOUS', 'AUTH=ANONYMOUS accepted'); $uri = PublicInbox::URIimap->new('imaps://bar%40example.com;AUTH=99%25@0/'); is($uri->auth, '99%', 'decoded AUTH'); is($uri->user, 'bar@example.com', 'decoded user'); is($uri->mailbox, undef, 'mailbox is undef'); $uri = PublicInbox::URIimap->new('imaps://ipv6@[::1]'); is($uri->host, '::1', 'IPv6 host'); is($uri->mailbox, undef, 'mailbox is undef'); $uri = PublicInbox::URIimap->new('imaps://0:666/INBOX'); is($uri->port, 666, 'port read'); is($uri->mailbox, 'INBOX'); $uri = PublicInbox::URIimap->new('imaps://0/INBOX.sub'); is($uri->mailbox, 'INBOX.sub'); is($uri->scheme, 'imaps'); is(PublicInbox::URIimap->new('imap://0:143/')->canonical->as_string, 'imap://0/'); is(PublicInbox::URIimap->new('imaps://0:993/')->canonical->as_string, 'imaps://0/'); $uri = PublicInbox::URIimap->new('imap://NSA:Hunter2@0/INBOX'); is($uri->user, 'NSA'); is($uri->password, 'Hunter2'); $uri = PublicInbox::URIimap->new('imap://0/%'); is($uri->mailbox, '%', "RFC 2192 '%' supported"); $uri = PublicInbox::URIimap->new('imap://0/%25'); $uri = PublicInbox::URIimap->new('imap://0/*'); is($uri->mailbox, '*', "RFC 2192 '*' supported"); # TODO: support UIDVALIDITY and other params done_testing; public-inbox-1.6.1/t/utf8.eml000066400000000000000000000005611377346120300157520ustar00rootroot00000000000000Date: Thu, 01 Jan 1970 00:00:00 +0000 To: =?utf-8?Q?El=C3=A9anor?= From: =?utf-8?Q?El=C3=A9anor?= Subject: Testing for =?utf-8?Q?El=C3=A9anor?= Message-ID: MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline Content-Transfer-Encoding: 8bit This is a test message for Eléanor public-inbox-1.6.1/t/v1-add-remove-add.t000066400000000000000000000023241377346120300176460ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Import; use PublicInbox::TestCommon; use PublicInbox::Eml; require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::SearchIdx; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx = { inboxdir => $inboxdir, name => 'test-add-remove-add', -primary_address => 'test@example.com', }; $ibx = PublicInbox::Inbox->new($ibx); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); $im->init_bare; ok($im->add($mime), 'message added'); ok($im->remove($mime), 'message removed'); ok($im->add($mime), 'message added again'); $im->done; my $rw = PublicInbox::SearchIdx->new($ibx, 1); $rw->index_sync; my $msgs = $ibx->recent({limit => 10}); is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); is(scalar @$msgs, 1, 'only one message in history'); is($ibx->mm->num_for('a-mid@b'), 2, 'exists with second article number'); done_testing(); public-inbox-1.6.1/t/v1reindex.t000066400000000000000000000357461377346120300164740ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::ContentHash qw(content_digest); use File::Path qw(remove_tree); use PublicInbox::TestCommon; use PublicInbox::Eml; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::SearchIdx'; use_ok 'PublicInbox::Import'; use_ok 'PublicInbox::OverIdx'; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx_config = { inboxdir => $inboxdir, name => 'test-v1reindex', -primary_address => 'test@example.com', indexlevel => 'full', }; my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $minmax; my $msgmap; my ($mark1, $mark2, $mark3, $mark4); { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); $im->init_bare; foreach my $i (1..10) { $mime->header_set('Message-Id', "<$i\@example.com>"); ok($im->add($mime), "message $i added"); if ($i == 4) { $mark1 = $im->get_mark($im->{tip}); $im->remove($mime); $mark2 = $im->get_mark($im->{tip}); } } if ('test remove later') { $mark3 = $im->get_mark($im->{tip}); $mime->header_set('Message-Id', "<5\@example.com>"); $im->remove($mime); $mark4 = $im->get_mark($im->{tip}); } $im->done; my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync() }; is($@, '', 'no error from indexing'); $minmax = [ $ibx->mm->minmax ]; ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = @$minmax; $msgmap = $ibx->mm->msg_range(\$min, $max); is_deeply($msgmap, [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected'); } { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing'); $im->done; is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } my $xap = "$inboxdir/public-inbox/xapian".PublicInbox::Search::SCHEMA_VERSION(); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed'); { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is(scalar(@warn), 0, 'no warnings from reindexing'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = @$minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'medium'; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my $mset = $ibx->search->mset('hello world'); isnt($mset->size, 0, 'got Xapian search results'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'basic'; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); isnt($ibx->search, 'no search for basic'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } # upgrade existing basic to medium # note: changing indexlevels is not yet supported in v2, # and may not be without more effort # no removals { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'medium'; my $ibx = PublicInbox::Inbox->new(\%config); my $rw = PublicInbox::SearchIdx->new($ibx, 1); eval { $rw->index_sync({reindex => 1}) }; is($@, '', 'no error from indexing'); is_deeply(\@warn, [], 'no warnings'); my $mset = $ibx->search->reopen->mset('hello world'); isnt($mset->size, 0, 'search OK after basic -> medium'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } # An incremental indexing test ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark1 4 simple additions in the same index_sync eval { $rw->index_sync({ref => $mark1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 4, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [4, '4@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark2 A delete separated form and add in the same index_sync eval { $rw->index_sync({ref => $mark2}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark3 adds following the delete at mark2 eval { $rw->index_sync({ref => $mark3}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [5, '5@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark4 A delete of an older message eval { $rw->index_sync({ref => $mark4}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } # Another incremental indexing test ok(unlink "$inboxdir/public-inbox/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark2 an add and it's delete in the same index_sync eval { $rw->index_sync({ref => $mark2}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark3 adds following the delete at mark2 eval { $rw->index_sync({ref => $mark3}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [5, '5@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::Import->new($ibx->git, undef, undef, $ibx); my $rw = PublicInbox::SearchIdx->new($ibx, 1); # mark4 A delete of an older message eval { $rw->index_sync({ref => $mark4}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my $ibx = PublicInbox::Inbox->new({ %$ibx_config }); my $f = $ibx->over->{dbh}->sqlite_db_filename; my $over = PublicInbox::OverIdx->new($f); my $dbh = $over->dbh; my $non_ghost_tids = sub { $dbh->selectall_arrayref(<<''); SELECT tid FROM over WHERE num > 0 ORDER BY tid ASC }; my $before = $non_ghost_tids->(); # mess up threading: my $tid = PublicInbox::OverIdx::get_counter($dbh, 'thread'); my $nr = $dbh->do('UPDATE over SET tid = ?', undef, $tid); my $rw = PublicInbox::SearchIdx->new($ibx, 1); my @pr; my $pr = sub { push @pr, @_ }; $rw->index_sync({reindex => 1, rethread => 1, -progress => $pr }); my @n = $dbh->selectrow_array(<(); ok($after->[0]->[0] > $before->[-1]->[0], 'all tids greater than before'); is(scalar @$after, scalar @$before, 'thread count unchanged'); is_deeply([], \@warn, 'no warnings'); # diag "@pr"; # XXX do we care? } done_testing(); public-inbox-1.6.1/t/v2-add-remove-add.t000066400000000000000000000021121377346120300176420ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::V2Writable'; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx = { inboxdir => "$inboxdir/v2", name => 'test-v2writable', version => 2, -primary_address => 'test@example.com', }; $ibx = PublicInbox::Inbox->new($ibx); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 Message-ID: hello world EOF my $im = PublicInbox::V2Writable->new($ibx, 1); $im->{parallel} = 0; ok($im->add($mime), 'message added'); ok($im->remove($mime), 'message removed'); ok($im->add($mime), 'message added again'); $im->done; my $msgs = $ibx->recent({limit => 1000}); is($msgs->[0]->{mid}, 'a-mid@b', 'message exists in history'); is(scalar @$msgs, 1, 'only one message in history'); done_testing(); public-inbox-1.6.1/t/v2dupindex.t000066400000000000000000000040571377346120300166460ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ # we can index a message from a mirror which bypasses dedupe. use strict; use Test::More; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite)); my ($tmpdir, $for_destroy) = tmpdir(); use_ok 'PublicInbox::Import'; use_ok 'PublicInbox::Git'; use_ok 'PublicInbox::InboxWritable'; my $ibx = PublicInbox::InboxWritable->new({ inboxdir => $tmpdir, name => 'test-v2dupindex', version => 2, indexlevel => 'basic', -primary_address => 'test@example.com', }, { nproc => 1 }); $ibx->init_inbox(1); my $v2w = $ibx->importer; $v2w->add(eml_load('t/plack-qp.eml')); $v2w->add(eml_load('t/mda-mime.eml')); $v2w->done; my $git0 = PublicInbox::Git->new("$tmpdir/git/0.git"); my $im = PublicInbox::Import->new($git0, undef, undef, $ibx); $im->{path_type} = 'v2'; $im->{lock_path} = undef; # bypass duplicate filters (->header_set is optional) my $eml = eml_load('t/plack-qp.eml'); $eml->header_set('X-This-Is-Not-Checked-By-ContentHash', 'blah'); ok($im->add($eml), 'add seen message directly'); ok($im->add(eml_load('t/mda-mime.eml')), 'add another seen message directly'); ok($im->add(eml_load('t/iso-2202-jp.eml')), 'add another new message'); $im->done; # mimic a fresh clone by dropping indices my @sqlite = (glob("$tmpdir/*sqlite3*"), glob("$tmpdir/xap*/*sqlite3*")); is(unlink(@sqlite), scalar(@sqlite), 'unlinked SQLite indices'); my @shards = glob("$tmpdir/xap*/?"); is(scalar(@shards), 0, 'no Xapian shards to drop'); my $rdr = { 2 => \(my $err = '') }; ok(run_script([qw(-index -Lbasic), $tmpdir], undef, $rdr), '-indexed'); my @n = $ibx->over->dbh->selectrow_array('SELECT COUNT(*) FROM over'); is_deeply(\@n, [ 3 ], 'identical message not re-indexed'); my $mm = $ibx->mm->{dbh}->selectall_arrayref(<<''); SELECT num,mid FROM msgmap ORDER BY num ASC is_deeply($mm, [ [ 1, 'qp@example.com' ], [ 2, 'multipart-html-sucks@11' ], [ 3, '199707281508.AAA24167@hoyogw.example' ] ], 'msgmap omits redundant message'); done_testing; public-inbox-1.6.1/t/v2mda.t000066400000000000000000000060461377346120300155670ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Fcntl qw(SEEK_SET); use Cwd; use PublicInbox::TestCommon; use PublicInbox::Eml; require_git(2.6); my $V = 2; require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::V2Writable'; my ($tmpdir, $for_destroy) = tmpdir(); my $ibx = { inboxdir => "$tmpdir/inbox", name => 'test-v2writable', address => [ 'test@example.com' ], }; my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 Message-ID: List-ID: hello world EOF my $main_bin = getcwd()."/t/main-bin"; my $fail_bin = getcwd()."/t/fail-bin"; local $ENV{PI_DIR} = "$tmpdir/foo"; my $fail_path = "$fail_bin:blib/script:$ENV{PATH}"; local $ENV{PATH} = "$main_bin:blib/script:$ENV{PATH}"; my $faildir = "$tmpdir/fail"; local $ENV{PI_EMERGENCY} = $faildir; ok(mkdir $faildir); my @cmd = (qw(-init), "-V$V", $ibx->{name}, $ibx->{inboxdir}, 'http://localhost/test', $ibx->{address}->[0]); ok(run_script(\@cmd), 'initialized v2 inbox'); my $rdr = { 0 => \($mime->as_string) }; local $ENV{ORIGINAL_RECIPIENT} = 'test@example.com'; ok(run_script(['-mda'], undef, $rdr), 'mda delivered a message'); $ibx = PublicInbox::Inbox->new($ibx); if ($V == 1) { ok(run_script([ '-index', "$tmpdir/inbox" ]), 'v1 indexed'); } my $msgs = $ibx->over->recent; is(scalar(@$msgs), 1, 'only got one message'); my $eml = $ibx->smsg_eml($msgs->[0]); is($eml->as_string, $mime->as_string, 'injected message'); { my @new = glob("$faildir/new/*"); is_deeply(\@new, [], 'nothing in faildir'); local $ENV{PATH} = $fail_path; $mime->header_set('Message-ID', ''); $rdr->{0} = \($mime->as_string); ok(run_script(['-mda'], undef, $rdr), 'mda did not die on "spam"'); @new = glob("$faildir/new/*"); is(scalar(@new), 1, 'got a message in faildir'); $msgs = $ibx->over->recent; is(scalar(@$msgs), 1, 'no new message'); my $config = "$ENV{PI_DIR}/config"; ok(-f $config, 'config exists'); my $k = 'publicinboxmda.spamcheck'; is(xsys('git', 'config', "--file=$config", $k, 'none'), 0, 'disabled spamcheck for mda'); ok(run_script(['-mda'], undef, $rdr), 'mda did not die'); my @again = glob("$faildir/new/*"); is_deeply(\@again, \@new, 'no new message in faildir'); $msgs = $ibx->over->recent; is(scalar(@$msgs), 2, 'new message added OK'); } { my $patch = 't/data/0001.patch'; open my $fh, '<', $patch or die "failed to open $patch: $!\n"; $rdr->{0} = \(do { local $/; <$fh> }); ok(run_script(['-mda'], undef, $rdr), 'mda delivered a patch'); my $post = $ibx->search->reopen->mset('dfpost:6e006fd7'); is($post->size, 1, 'got one result for dfpost'); my $pre = $ibx->search->mset('dfpre:090d998'); is($pre->size, 1, 'got one result for dfpre'); $pre = $ibx->search->mset_to_smsg($ibx, $pre); $post = $ibx->search->mset_to_smsg($ibx, $post); is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases'); } done_testing(); public-inbox-1.6.1/t/v2mirror.t000066400000000000000000000171641377346120300163430ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; use File::Path qw(remove_tree); use Cwd qw(abs_path); require_git(2.6); local $ENV{HOME} = abs_path('t'); # Integration tests for HTTP cloning + mirroring require_mods(qw(Plack::Util Plack::Builder HTTP::Date HTTP::Status Search::Xapian DBD::SQLite)); use IO::Socket; use POSIX qw(dup2); use_ok 'PublicInbox::V2Writable'; use PublicInbox::InboxWritable; use PublicInbox::Eml; use PublicInbox::Config; # FIXME: too much setup my ($tmpdir, $for_destroy) = tmpdir(); my $pi_config = "$tmpdir/config"; { open my $fh, '>', $pi_config or die "open($pi_config): $!"; print $fh <<"" or die "print $pi_config: $!"; [publicinbox "v2"] inboxdir = $tmpdir/in address = test\@example.com close $fh or die "close($pi_config): $!"; } local $ENV{PI_CONFIG} = $pi_config; my $cfg = PublicInbox::Config->new($pi_config); my $ibx = $cfg->lookup('test@example.com'); ok($ibx, 'inbox found'); $ibx->{version} = 2; my $v2w = PublicInbox::V2Writable->new($ibx, 1); ok $v2w, 'v2w loaded'; $v2w->{parallel} = 0; my $mime = PublicInbox::Eml->new(<<''); From: Me To: You Subject: a Date: Thu, 01 Jan 1970 00:00:00 +0000 my $old_rotate_bytes = $v2w->{rotate_bytes}; $v2w->{rotate_bytes} = 500; # force rotating for my $i (1..9) { $mime->header_set('Message-ID', "<$i\@example.com>"); $mime->header_set('Subject', "subject = $i"); ok($v2w->add($mime), "add msg $i OK"); } my $epoch_max = $v2w->{epoch_max}; ok($epoch_max > 0, "multiple epochs"); $v2w->done; { my $smsg = $ibx->over->get_art(1); like($smsg->{lines}, qr/\A[0-9]+\z/, 'lines is a digit'); like($smsg->{bytes}, qr/\A[0-9]+\z/, 'bytes is a digit'); } $ibx->cleanup; my $sock = tcp_server(); ok($sock, 'sock created'); my $cmd = [ '-httpd', '-W0', "--stdout=$tmpdir/out", "--stderr=$tmpdir/err" ]; my $td = start_script($cmd, undef, { 3 => $sock }); my ($host, $port) = ($sock->sockhost, $sock->sockport); $sock = undef; my @cmd; foreach my $i (0..$epoch_max) { my $sfx = $i == 0 ? '.git' : ''; @cmd = (qw(git clone --mirror -q), "http://$host:$port/v2/$i$sfx", "$tmpdir/m/git/$i.git"); is(xsys(@cmd), 0, "cloned $i.git"); ok(-d "$tmpdir/m/git/$i.git", "mirror $i OK"); } @cmd = ("-init", '-j1', '-V2', 'm', "$tmpdir/m", 'http://example.com/m', 'alt@example.com'); ok(run_script(\@cmd), 'initialized public-inbox -V2'); my @shards = glob("$tmpdir/m/xap*/?"); is(scalar(@shards), 1, 'got a single shard on init'); ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'indexed'); my $mibx = { inboxdir => "$tmpdir/m", address => 'alt@example.com' }; $mibx = PublicInbox::Inbox->new($mibx); is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax'); $v2w->{rotate_bytes} = $old_rotate_bytes; for my $i (10..15) { $mime->header_set('Message-ID', "<$i\@example.com>"); $mime->header_set('Subject', "subject = $i"); ok($v2w->add($mime), "add msg $i OK"); } $v2w->done; $ibx->cleanup; my $fetch_each_epoch = sub { foreach my $i (0..$epoch_max) { my $dir = "$tmpdir/m/git/$i.git"; is(xsys('git', "--git-dir=$dir", 'fetch', '-q'), 0, 'fetch successful'); } }; $fetch_each_epoch->(); my $mset = $mibx->search->reopen->mset('m:15@example.com'); is(scalar($mset->items), 0, 'new message not found in mirror, yet'); ok(run_script([qw(-index -j0), "$tmpdir/m"]), 'index updated'); is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax'); $mset = $mibx->search->reopen->mset('m:15@example.com'); is(scalar($mset->items), 1, 'found message in mirror'); # purge: $mime->header_set('Message-ID', '<10@example.com>'); $mime->header_set('Subject', 'subject = 10'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; ok($v2w->purge($mime), 'purge a message'); my $warn = join('', @warn); like($warn, qr/purge rewriting/); my @subj = ($warn =~ m/^# subject .*$/mg); is_deeply(\@subj, ["# subject = 10"], "only rewrote one"); } $v2w->done; my $msgs = $mibx->over->get_thread('10@example.com'); my $to_purge = $msgs->[0]->{blob}; like($to_purge, qr/\A[a-f0-9]{40,}\z/, 'read blob to be purged'); $mset = $ibx->search->reopen->mset('m:10@example.com'); is(scalar($mset->items), 0, 'purged message gone from origin'); $fetch_each_epoch->(); { $ibx->cleanup; PublicInbox::InboxWritable::cleanup($mibx); $v2w->done; my $cmd = [ qw(-index --prune -j0), "$tmpdir/m" ]; my ($out, $err) = ('', ''); my $opt = { 1 => \$out, 2 => \$err }; ok(run_script($cmd, undef, $opt), '-index --prune'); like($err, qr/discontiguous range/, 'warned about discontiguous range'); unlike($err, qr/fatal/, 'no scary fatal error shown'); } $mset = $mibx->search->reopen->mset('m:10@example.com'); is(scalar($mset->items), 0, 'purged message not found in mirror'); is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'minmax still synced'); for my $i ((1..9),(11..15)) { $mset = $mibx->search->mset("m:$i\@example.com"); is(scalar($mset->items), 1, "$i\@example.com remains visible"); } is($mibx->git->check($to_purge), undef, 'unindex+prune successful in mirror'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; $v2w->index_sync; is_deeply(\@warn, [], 'no warnings from index_sync after purge'); } # deletes happen in a different fetch window { $mset = $mibx->search->reopen->mset('m:1@example.com'); is(scalar($mset->items), 1, '1@example.com visible in mirror'); $mime->header_set('Message-ID', '<1@example.com>'); $mime->header_set('Subject', 'subject = 1'); ok($v2w->remove($mime), 'removed <1@example.com> from source'); $v2w->done; $ibx->cleanup; $fetch_each_epoch->(); PublicInbox::InboxWritable::cleanup($mibx); my $cmd = [ qw(-index -j0), "$tmpdir/m" ]; my ($out, $err) = ('', ''); my $opt = { 1 => \$out, 2 => \$err }; ok(run_script($cmd, undef, $opt), 'index ran'); is($err, '', 'no errors reported by index'); $mset = $mibx->search->reopen->mset('m:1@example.com'); is(scalar($mset->items), 0, '1@example.com no longer visible in mirror'); } if ('sequential-shard') { $mset = $mibx->search->mset('m:15@example.com'); is(scalar($mset->items), 1, 'large message not indexed'); remove_tree(glob("$tmpdir/m/xap*"), glob("$tmpdir/m/msgmap.*")); my $cmd = [ qw(-index -j9 --sequential-shard), "$tmpdir/m" ]; ok(run_script($cmd), '--sequential-shard works'); my @shards = glob("$tmpdir/m/xap*/?"); is(scalar(@shards), 8, 'got expected shard count'); PublicInbox::InboxWritable::cleanup($mibx); $mset = $mibx->search->mset('m:15@example.com'); is(scalar($mset->items), 1, 'search works after --sequential-shard'); } if ('max size') { $mime->header_set('Message-ID', '<2big@a>'); my $max = '2k'; $mime->body_str_set("z\n" x 1024); ok($v2w->add($mime), "add big message"); $v2w->done; $ibx->cleanup; $fetch_each_epoch->(); PublicInbox::InboxWritable::cleanup($mibx); my $cmd = [qw(-index -j0), "$tmpdir/m", "--max-size=$max" ]; my $opt = { 2 => \(my $err) }; ok(run_script($cmd, undef, $opt), 'indexed with --max-size'); like($err, qr/skipping [a-f0-9]{40,}/, 'warned about skipping message'); $mset = $mibx->search->reopen->mset('m:2big@a'); is(scalar($mset->items), 0, 'large message not indexed'); { open my $fh, '>>', $pi_config or die; print $fh <search->reopen->mset('m:2big@a'); is(scalar($mset->items), 0, 'large message not re-indexed'); } ok($td->kill, 'killed httpd'); $td->join; done_testing(); 1; public-inbox-1.6.1/t/v2reindex.t000066400000000000000000000420701377346120300164610ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::ContentHash qw(content_digest); use File::Path qw(remove_tree); use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::V2Writable'; use_ok 'PublicInbox::OverIdx'; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx_config = { inboxdir => $inboxdir, name => 'test-v2writable', version => 2, -primary_address => 'test@example.com', indexlevel => 'full', }; my $agpl = do { open my $fh, '<', 'COPYING' or die "can't open COPYING: $!"; local $/; <$fh>; }; my $phrase = q("defending all users' freedom"); my $mime = PublicInbox::Eml->new(<<'EOF'.$agpl); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 EOF my $minmax; my $msgmap; my ($mark1, $mark2, $mark3, $mark4); { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, {nproc => 1}); my $im0 = $im->importer(0); foreach my $i (1..10) { $mime->header_set('Message-Id', "<$i\@example.com>"); ok($im->add($mime), "message $i added"); if ($i == 4) { $mark1 = $im0->get_mark($im0->{tip}); $im->remove($mime); $mark2 = $im0->get_mark($im0->{tip}); } } if ('test remove later') { $mark3 = $im0->get_mark($im0->{tip}); $mime->header_set('Message-Id', "<5\@example.com>"); $im->remove($mime); $mark4 = $im0->get_mark($im0->{tip}); } $im->done; $minmax = [ $ibx->mm->minmax ]; ok(defined $minmax->[0] && defined $minmax->[1], 'minmax defined'); is_deeply($minmax, [ 1, 10 ], 'minmax as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = @$minmax; $msgmap = $ibx->mm->msg_range(\$min, $max); is_deeply($msgmap, [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected'); } { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, 1); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing'); $im->done; delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } my $xap = "$inboxdir/xap".PublicInbox::Search::SCHEMA_VERSION(); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed'); { my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, 1); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, 1); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is(scalar(@warn), 0, 'no warnings from reindexing'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } my %sizes; ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx, 1); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); my $mset = $ibx->search->mset($phrase); isnt($mset->size, 0, "phrase search succeeds on indexlevel=full"); for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'medium'; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); if (0) { # not sure why, but Xapian seems to fallback to terms and # phrase searches still work delete $ibx->{search}; my $mset = $ibx->search->mset($phrase); is($mset->size, 0, 'phrase search does not work on medium'); } my $words = $phrase; $words =~ tr/"'//d; my $mset = $ibx->search->mset($words); isnt($mset->size, 0, "normal search works on indexlevel=medium"); for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } ok($sizes{full} > $sizes{medium}, 'medium is smaller than full'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'basic'; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync({reindex => 1}) }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; ok(-d $xap, 'Xapian directories recreated'); delete $ibx->{mm}; is_deeply([ $ibx->mm->minmax ], $minmax, 'minmax unchanged'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); isnt($ibx->search, 'no search for basic'); for (glob("$xap/*/*")) { $sizes{$ibx->{indexlevel}} += -s _ if -f $_ } ok($sizes{medium} > $sizes{basic}, 'basic is smaller than medium'); my ($min, $max) = $ibx->mm->minmax; is_deeply($ibx->mm->msg_range(\$min, $max), $msgmap, 'msgmap unchanged'); } # An incremental indexing test ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark1 4 simple additions in the same index_sync $ibx->{ref_head} = $mark1; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 4, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [4, '4@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark2 A delete separated from an add in the same index_sync $ibx->{ref_head} = $mark2; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark3 adds following the delete at mark2 $ibx->{ref_head} = $mark3; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [5, '5@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark4 A delete of an older message $ibx->{ref_head} = $mark4; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } # Another incremental indexing test ok(unlink "$inboxdir/msgmap.sqlite3", 'remove msgmap'); remove_tree($xap); ok(!-d $xap, 'Xapian directories removed again'); { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark2 an add and it's delete in the same index_sync $ibx->{ref_head} = $mark2; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 3, 'max as expected'); is($ibx->mm->num_highwater, 4, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark3 adds following the delete at mark2 $ibx->{ref_head} = $mark3; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [5, '5@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); # mark4 A delete of an older message $ibx->{ref_head} = $mark4; my $im = PublicInbox::V2Writable->new($ibx); eval { $im->index_sync() }; is($@, '', 'no error from reindexing without msgmap'); is_deeply(\@warn, [], 'no warnings'); $im->done; my ($min, $max) = $ibx->mm->minmax; is($min, 1, 'min as expected'); is($max, 10, 'max as expected'); is($ibx->mm->num_highwater, 10, 'num_highwater as expected'); is_deeply($ibx->mm->msg_range(\$min, $max), [ [1, '1@example.com' ], [2, '2@example.com' ], [3, '3@example.com' ], [6, '6@example.com' ], [7, '7@example.com' ], [8, '8@example.com' ], [9, '9@example.com' ], [10, '10@example.com' ], ], 'msgmap as expected' ); } my $check_rethread = sub { my ($desc) = @_; my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; my $ibx = PublicInbox::Inbox->new(\%config); my $f = $ibx->over->{dbh}->sqlite_db_filename; my $over = PublicInbox::OverIdx->new($f); my $dbh = $over->dbh; my $non_ghost_tids = sub { $dbh->selectall_arrayref(<<''); SELECT tid FROM over WHERE num > 0 ORDER BY tid ASC }; my $before = $non_ghost_tids->(); # mess up threading: my $tid = PublicInbox::OverIdx::get_counter($dbh, 'thread'); my $nr = $dbh->do('UPDATE over SET tid = ?', undef, $tid); diag "messing up all threads with tid=$tid"; my $v2w = PublicInbox::V2Writable->new($ibx); my @pr; my $pr = sub { push @pr, @_ }; $v2w->index_sync({reindex => 1, rethread => 1, -progress => $pr}); # diag "@pr"; # nobody cares is_deeply(\@warn, [], 'no warnings on reindex + rethread'); my @n = $dbh->selectrow_array(<(); ok($after->[0]->[0] > $before->[-1]->[0], 'all tids greater than before'); is(scalar @$after, scalar @$before, 'thread count unchanged'); }; $check_rethread->('no-monster'); # A real example from linux-renesas-soc on lore where a 3-headed monster # of a message has 3 sets of common headers. Another normal message # previously existed with a single Message-ID that conflicts with one # of the Message-IDs in the 3-headed monster. { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my %config = %$ibx_config; $config{indexlevel} = 'medium'; my $ibx = PublicInbox::Inbox->new(\%config); my $im = PublicInbox::V2Writable->new($ibx); my $m3 = PublicInbox::Eml->new(<<'EOF'); Date: Tue, 24 May 2016 14:34:22 -0700 (PDT) Message-Id: <20160524.143422.552507610109476444.d@example.com> To: t@example.com Cc: c@example.com Subject: Re: [PATCH v2 2/2] uno From: In-Reply-To: <1463825855-7363-2-git-send-email-y@example.com> References: <1463825855-7363-1-git-send-email-y@example.com> <1463825855-7363-2-git-send-email-y@example.com> Date: Wed, 25 May 2016 10:01:51 +0900 From: h@example.com To: g@example.com Cc: m@example.com Subject: Re: [PATCH] dos Message-ID: <20160525010150.GD7292@example.com> References: <1463498133-23918-1-git-send-email-g+r@example.com> In-Reply-To: <1463498133-23918-1-git-send-email-g+r@example.com> From: s@example.com To: h@example.com Cc: m@example.com Subject: [PATCH 12/13] tres Date: Wed, 01 Jun 2016 01:32:35 +0300 Message-ID: <1923946.Jvi0TDUXFC@wasted.example.com> In-Reply-To: <13205049.n7pM8utpHF@wasted.example.com> References: <13205049.n7pM8utpHF@wasted.example.com> Somehow we got a message with 3 sets of headers into one message, could've been something broken on the archiver side. EOF my $m1 = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: t@example.com Subject: [PATCH 12/13] Date: Wed, 01 Jun 2016 01:32:35 +0300 Message-ID: <1923946.Jvi0TDUXFC@wasted.example.com> In-Reply-To: <13205049.n7pM8utpHF@wasted.example.com> References: <13205049.n7pM8utpHF@wasted.example.com> This is probably one of the original messages EOF $im->add($m1); $im->add($m3); $im->done; remove_tree($xap); eval { $im->index_sync() }; is($@, '', 'no error from initial indexing'); is_deeply(\@warn, [], 'no warnings from initial index'); eval { $im->index_sync({reindex=>1}) }; is($@, '', 'no error from reindexing after reused Message-ID (x3)'); is_deeply(\@warn, [], 'no warnings on reindex'); my %uniq; for my $s (qw(uno dos tres)) { my $mset = $ibx->search->mset("s:$s"); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar(@$msgs), 1, "only one result for `$s'"); $uniq{$msgs->[0]->{num}}++; } is_deeply([values %uniq], [3], 'search on different subjects'); } # XXX: not deterministic when dealing with ambiguous messages, oh well $check_rethread->('3-headed-monster once'); $check_rethread->('3-headed-monster twice'); done_testing(); public-inbox-1.6.1/t/v2writable.t000066400000000000000000000254631377346120300166430ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::Eml; use PublicInbox::ContentHash qw(content_digest content_hash); use PublicInbox::TestCommon; use Cwd qw(abs_path); require_git(2.6); require_mods(qw(DBD::SQLite Search::Xapian)); local $ENV{HOME} = abs_path('t'); use_ok 'PublicInbox::V2Writable'; umask 007; my ($inboxdir, $for_destroy) = tmpdir(); my $ibx = { inboxdir => $inboxdir, name => 'test-v2writable', version => 2, -primary_address => 'test@example.com', }; $ibx = PublicInbox::Inbox->new($ibx); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 hello world EOF my $im = PublicInbox::V2Writable->new($ibx, {nproc => 1}); is($im->{shards}, 1, 'one shard when forced'); ok($im->add($mime), 'ordinary message added'); foreach my $f ("$inboxdir/msgmap.sqlite3", glob("$inboxdir/xap*/*"), glob("$inboxdir/xap*/*/*")) { my @st = stat($f); my ($bn) = (split(m!/!, $f))[-1]; is($st[2] & 07777, -f _ ? 0660 : 0770, "default sharedRepository respected for $bn"); } my $git0; if ('ensure git configs are correct') { my @cmd = (qw(git config), "--file=$inboxdir/all.git/config", qw(core.sharedRepository 0644)); is(xsys(@cmd), 0, "set sharedRepository in all.git"); $git0 = PublicInbox::Git->new("$inboxdir/git/0.git"); chomp(my $v = $git0->qx(qw(config core.sharedRepository))); is($v, '0644', 'child repo inherited core.sharedRepository'); chomp($v = $git0->qx(qw(config --bool repack.writeBitmaps))); is($v, 'true', 'child repo inherited repack.writeBitmaps'); } { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; is($im->add($mime), undef, 'obvious duplicate rejected'); is(scalar(@warn), 0, 'no warning about resent message'); @warn = (); $mime->header_set('Message-Id', '', ''); is($im->add($mime), undef, 'secondary MID ignored if first matches'); my $sec = PublicInbox::Eml->new($mime->as_string); $sec->header_set('Date'); $sec->header_set('Message-Id', '', ''); ok($im->add($sec), 'secondary MID used if data is different'); like(join(' ', @warn), qr/mismatched/, 'warned about mismatch'); like(join(' ', @warn), qr/alternative/, 'warned about alternative'); is_deeply([ '', '' ], [ $sec->header_obj->header_raw('Message-Id') ], 'no new Message-Id added'); my $sane_mid = qr/\A<[\w\-\.]+\@\w+>\z/; @warn = (); $mime->header_set('Message-Id', ''); $mime->body_set('different'); ok($im->add($mime), 'reused mid ok'); like(join(' ', @warn), qr/reused/, 'warned about reused MID'); my @mids = $mime->header_obj->header_raw('Message-Id'); is($mids[0], '', 'original mid not changed'); like($mids[1], $sane_mid, 'new MID added'); is(scalar(@mids), 2, 'only one new MID added'); @warn = (); $mime->header_set('Message-Id', ''); $mime->body_set('this one needs a random mid'); my $hdr = $mime->header_obj; my $gen = PublicInbox::Import::digest2mid(content_digest($mime), $hdr); unlike($gen, qr![\+/=]!, 'no URL-unfriendly chars in Message-Id'); my $fake = PublicInbox::Eml->new($mime->as_string); $fake->header_set('Message-Id', "<$gen>"); ok($im->add($fake), 'fake added easily'); is_deeply(\@warn, [], 'no warnings from a faker'); ok($im->add($mime), 'random MID made'); like(join(' ', @warn), qr/using random/, 'warned about using random'); @mids = $mime->header_obj->header_raw('Message-Id'); is($mids[0], '', 'original mid not changed'); like($mids[1], $sane_mid, 'new MID added'); is(scalar(@mids), 2, 'only one new MID added'); @warn = (); $mime->header_set('Message-Id'); ok($im->add($mime), 'random MID made for MID free message'); @mids = $mime->header_obj->header_raw('Message-Id'); like($mids[0], $sane_mid, 'mid was generated'); is(scalar(@mids), 1, 'new generated'); @warn = (); $mime->header_set('Message-Id', ''); ok($im->add($mime), 'message added with space in Message-Id'); is_deeply([], \@warn); } { $mime->header_set('Message-Id', '', ''); $mime->header_set('X-Alt-Message-Id', ''); $mime->header_set('References', ''); ok($im->add($mime), 'message with multiple Message-ID'); $im->done; my $total = $ibx->over->dbh->selectrow_array(<<''); SELECT COUNT(*) FROM over WHERE num > 0 is($ibx->mm->num_highwater, $total, 'got expected highwater value'); my $mset1 = $ibx->search->reopen->mset('m:abcde@1'); is($mset1->size, 1, 'message found by first MID'); my $mset2 = $ibx->search->mset('m:abcde@2'); is($mset2->size, 1, 'message found by second MID'); is((($mset1->items)[0])->get_docid, (($mset2->items)[0])->get_docid, 'same document') if ($mset1->size); my $alt = $ibx->search->mset('m:alt-id-for-nntp'); is($alt->size, 1, 'message found by alt MID (NNTP)'); is((($alt->items)[0])->get_docid, (($mset1->items)[0])->get_docid, 'same document') if ($mset1->size); $mime->header_set('X-Alt-Message-Id'); my %uniq; for my $mid (qw(abcde@1 abcde@2 alt-id-for-nntp)) { my $msgs = $ibx->over->get_thread($mid); my $key = join(' ', sort(map { $_->{num} } @$msgs)); $uniq{$key}++; } is(scalar(keys(%uniq)), 1, 'all alt Message-ID queries give same smsg'); is_deeply([values(%uniq)], [3], '3 queries, 3 results'); } { use Net::NNTP; my $err = "$inboxdir/stderr.log"; my $out = "$inboxdir/stdout.log"; my $group = 'inbox.comp.test.v2writable'; my $pi_config = "$inboxdir/pi_config"; open my $fh, '>', $pi_config or die "open: $!\n"; print $fh < $pi_config }; my $td = start_script($cmd, $env, { 3 => $sock }); my $host_port = $sock->sockhost . ':' . $sock->sockport; my $n = Net::NNTP->new($host_port); $n->group($group); my $x = $n->xover('1-'); my %uniq; foreach my $num (sort { $a <=> $b } keys %$x) { my $mid = $x->{$num}->[3]; is($uniq{$mid}++, 0, "MID for $num is unique in XOVER"); is_deeply($n->xhdr('Message-ID', $num), { $num => $mid }, "XHDR lookup OK on num $num"); # FIXME PublicInbox::NNTP (server) doesn't handle spaces in # Message-ID, but neither does Net::NNTP (client) next if $mid =~ / /; is_deeply($n->xhdr('Message-ID', $mid), { $mid => $mid }, "XHDR lookup OK on MID $mid ($num)"); } my %nn; foreach my $mid (@{$n->newnews(0, $group)}) { is($nn{$mid}++, 0, "MID is unique in NEWNEWS"); } is_deeply([sort keys %nn], [sort keys %uniq]); my %lg; foreach my $num (@{$n->listgroup($group)}) { is($lg{$num}++, 0, "num is unique in LISTGROUP"); } is_deeply([sort keys %lg], [sort keys %$x], 'XOVER and LISTGROUPS return the same article numbers'); my $xref = $n->xhdr('Xref', '1-'); is_deeply([sort keys %lg], [sort keys %$xref], 'Xref range OK'); my $mids = $n->xhdr('Message-ID', '1-'); is_deeply([sort keys %lg], [sort keys %$xref], 'Message-ID range OK'); my $rover = $n->xrover('1-'); is_deeply([sort keys %lg], [sort keys %$rover], 'XROVER range OK'); }; { my @log = qw(log --no-decorate --no-abbrev --no-notes --no-color); my @before = $git0->qx(@log, qw(--pretty=oneline)); my $before = $git0->qx(@log, qw(--pretty=raw --raw -r)); $im = PublicInbox::V2Writable->new($ibx, {nproc => 2}); is($im->{shards}, 1, 'detected single shard from previous'); my ($mark, $rm_mime, $smsg) = $im->remove($mime, 'test removal'); is(content_hash($rm_mime), content_hash($mime), 'removed object returned matches'); ok(defined($mark), 'mark set'); $im->done; my @after = $git0->qx(@log, qw(--pretty=oneline)); my $tip = shift @after; like($tip, qr/\A[a-f0-9]+ test removal\n\z/s, 'commit message propagated to git'); is_deeply(\@after, \@before, 'only one commit written to git'); my $mid = $smsg->{mid}; is($ibx->mm->num_for($mid), undef, 'no longer in Msgmap by mid'); my $num = $smsg->{num}; like($num, qr/\A\d+\z/, 'numeric number in return message'); is($ibx->mm->mid_for($num), undef, 'no longer in Msgmap by num'); my $mset = $ibx->search->reopen->mset('m:'.$mid); is($mset->size, 0, 'no longer found in Xapian'); my @log1 = (@log, qw(-1 --pretty=raw --raw -r --no-renames)); is($ibx->over->get_art($num), undef, 'removal propagated to Over DB'); my $after = $git0->qx(@log1); if ($after =~ m!( [a-f0-9]+ )A\td$!m) { my $oid = $1; ok(index($before, $oid) > 0, 'no new blob introduced'); } else { fail('failed to extract blob from log output'); } is($im->remove($mime, 'test removal'), undef, 'remove is idempotent'); $im->done; is($git0->qx(@log1), $after, 'no git history made with idempotent remove'); eval { $im->done }; ok(!$@, '->done is idempotent'); } { ok($im->add($mime), 'add message to be purged'); local $SIG{__WARN__} = sub {}; ok(my $cmt = $im->purge($mime), 'purged message'); like($cmt->[0], qr/\A[a-f0-9]{40,}\z/, 'purge returned current commit'); $im->done; # again is($im->purge($mime), undef, 'no-op returns undef'); } { my $x = 'x'x250; my $y = 'y'x250; local $SIG{__WARN__} = sub {}; $mime->header_set('Subject', 'long mid'); $mime->header_set('Message-ID', "<$x>"); ok($im->add($mime), 'add excessively long Message-ID'); $mime->header_set('Message-ID', "<$y>"); $mime->header_set('References', "<$x>"); ok($im->add($mime), 'add excessively long References'); $im->barrier; my $msgs = $ibx->over->get_thread('x'x244); is(2, scalar(@$msgs), 'got both messages'); is($msgs->[0]->{mid}, 'x'x244, 'stored truncated mid'); is($msgs->[1]->{references}, '<'.('x'x244).'>', 'stored truncated ref'); is($msgs->[1]->{mid}, 'y'x244, 'stored truncated mid(2)'); $im->done; } my $tmp = { inboxdir => "$inboxdir/non-existent/subdir", name => 'nope', version => 2, -primary_address => 'test@example.com', }; eval { my $nope = PublicInbox::V2Writable->new($tmp); $nope->add($mime); }; ok($@, 'V2Writable fails on non-existent dir'); { my $v2w = PublicInbox::V2Writable->new($tmp, 1); ok($v2w, 'creat flag works'); $v2w->{parallel} = 0; $v2w->init_inbox(0); my $alt = "$tmp->{inboxdir}/all.git/objects/info/alternates"; open my $fh, '>>', $alt or die $!; print $fh "$inboxdir/all.git/objects\n" or die $!; chmod(0664, $fh) or die "fchmod: $!"; close $fh or die $!; open $fh, '<', $alt or die $!; my $before = do { local $/; <$fh> }; ok($v2w->git_init(3), 'init a new epoch'); open $fh, '<', $alt or die $!; my $after = do { local $/; <$fh> }; ok(index($after, $before) > 0, 'old contents preserved after adding epoch'); like($after, qr!\A[^\n]+?/3\.git/objects\n!s, 'first line is newest epoch'); my $mode = (stat($alt))[2] & 07777; is($mode, 0664, sprintf('0%03o', $mode).' is 0664'); } done_testing(); public-inbox-1.6.1/t/view.t000066400000000000000000000020261377346120300155220ustar00rootroot00000000000000# Copyright (C) 2013-2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; require_mods('Plack::Util'); use_ok 'PublicInbox::View'; # this only tests View.pm internals which are subject to change, # see t/plack.t for tests against the PSGI interface. my $cols = PublicInbox::View::COLS(); my @addr; until (length(join(', ', @addr)) > ($cols * 2)) { push @addr, '"l, f" '; my $n = int(rand(20)) + 1; push @addr, ('x'x$n).'@x'; } my $orig = join(', ', @addr); my $res = PublicInbox::View::fold_addresses($orig.''); isnt($res, $orig, 'folded result'); unlike($res, qr/l,\n\tf/s, '"last, first" no broken'); my @nospc = ($res, $orig); s/\s+//g for @nospc; is($nospc[0], $nospc[1], 'no addresses lost in translation'); my $tws = PublicInbox::View::fold_addresses($orig.' '); # (Email::Simple drops leading whitespace, but not trailing) $tws =~ s/ \z//; is($tws, $res, 'not thrown off by trailing whitespace'); done_testing(); public-inbox-1.6.1/t/watch_filter_rubylang.t000066400000000000000000000054261377346120300211350ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use PublicInbox::TestCommon; use Test::More; use PublicInbox::Eml; use PublicInbox::Config; require_mods(qw(DBD::SQLite Search::Xapian)); use_ok 'PublicInbox::Watch'; use_ok 'PublicInbox::Emergency'; my ($tmpdir, $for_destroy) = tmpdir(); local $ENV{PI_CONFIG} = "$tmpdir/pi_config"; my @v = qw(V1); SKIP: { if (require_git(2.6, 1)) { use_ok 'PublicInbox::V2Writable'; push @v, 'V2'; } else { skip 'git 2.6+ needed for V2', 40; } } for my $v (@v) { my @warn; local $SIG{__WARN__} = sub { push @warn, @_ }; my $cfgpfx = "publicinbox.$v"; my $inboxdir = "$tmpdir/$v"; my $maildir = "$tmpdir/md-$v"; my $spamdir = "$tmpdir/spam-$v"; my $addr = "test-$v\@example.com"; my @cmd = ('-init', "-$v", $v, $inboxdir, "http://example.com/$v", $addr); ok(run_script(\@cmd), 'public-inbox init OK'); if ($v eq 'V1') { ok(run_script(['-index', $inboxdir]), 'v1 indexed'); } PublicInbox::Emergency->new($spamdir); for my $i (1..15) { my $msg = < Date: Sat, 05 Jan 2019 04:19:17 +0000 something EOF PublicInbox::Emergency->new($maildir)->prepare(\$msg); } my $spam = < Date: Sat, 05 Jan 2019 04:19:17 +0000 spam EOF PublicInbox::Emergency->new($maildir)->prepare(\"$spam"); my $orig = <new(\$orig); my $ibx = $config->lookup_name($v); ok($ibx, 'found inbox by name'); my $w = PublicInbox::Watch->new($config); for my $i (1..2) { $w->scan('full'); } # make sure all serials are searchable: for my $i (1..15) { my $mset = $ibx->search->mset("alerts:$i"); is($mset->size, 1, "got one result for alerts:$i"); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i"); } is($ibx->search->mset('b:spam')->size, 1, 'got spam message'); my $nr = unlink <$maildir/new/*>; is(16, $nr); { PublicInbox::Emergency->new($spamdir)->prepare(\$spam); my @new = glob("$spamdir/new/*"); my @p = split(m!/+!, $new[0]); ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S")); is(unlink($new[0]), 1); } $w->scan('full'); $config = PublicInbox::Config->new(\$orig); $ibx = $config->lookup_name($v); is($ibx->search->reopen->mset('b:spam')->size, 0, 'spam removed'); is_deeply([], \@warn, 'no warnings'); } done_testing(); public-inbox-1.6.1/t/watch_imap.t000066400000000000000000000013321377346120300166630ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Config; # see t/imapd*.t for tests against a live IMAP server use_ok 'PublicInbox::Watch'; my $cfg = PublicInbox::Config->new(\<new($cfg); is($watch->{imap}->{'imap://example.com/INBOX.a'}->[0]->{name}, 'i', 'watched an inbox'); is($watch->{imap}->{'imap://example.com/INBOX.spam'}, 'watchspam', 'watched spam folder'); done_testing; public-inbox-1.6.1/t/watch_maildir.t000066400000000000000000000157621377346120300173720ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Eml; use Cwd; use PublicInbox::Config; use PublicInbox::TestCommon; use PublicInbox::Import; my ($tmpdir, $for_destroy) = tmpdir(); my $git_dir = "$tmpdir/test.git"; my $maildir = "$tmpdir/md"; my $spamdir = "$tmpdir/spam"; use_ok 'PublicInbox::Watch'; use_ok 'PublicInbox::Emergency'; my $cfgpfx = "publicinbox.test"; my $addr = 'test-public@example.com'; PublicInbox::Import::init_bare($git_dir); my $msg = < Date: Sat, 18 Jun 2016 00:00:00 +0000 something EOF PublicInbox::Emergency->new($maildir)->prepare(\$msg); ok(POSIX::mkfifo("$maildir/cur/fifo", 0777), 'create FIFO to ensure we do not get stuck on it :P'); my $sem = PublicInbox::Emergency->new($spamdir); # create dirs { my @w; local $SIG{__WARN__} = sub { push @w, @_ }; my $config = PublicInbox::Config->new(\<new($config); is(scalar grep(/is a spam folder/, @w), 1, 'got warning about spam'); is_deeply($wm->{mdmap}, { "$spamdir/cur" => 'watchspam' }, 'only got the spam folder to watch'); } my $cfg_path = "$tmpdir/config"; { open my $fh, '>', $cfg_path or BAIL_OUT $!; print $fh <new($cfg_path); PublicInbox::Watch->new($config)->scan('full'); my $git = PublicInbox::Git->new($git_dir); my @list = $git->qx(qw(rev-list refs/heads/master)); is(scalar @list, 1, 'one revision in rev-list'); my $write_spam = sub { is(scalar glob("$spamdir/new/*"), undef, 'no spam existing'); $sem->prepare(\$msg); $sem->commit; my @new = glob("$spamdir/new/*"); is(scalar @new, 1); my @p = split(m!/+!, $new[0]); ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S")); is(unlink($new[0]), 1); }; $write_spam->(); is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam'); PublicInbox::Watch->new($config)->scan('full'); @list = $git->qx(qw(rev-list refs/heads/master)); is(scalar @list, 2, 'two revisions in rev-list'); @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master)); is(scalar @list, 0, 'tree is empty'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); # check with scrubbing { $msg .= qq(-- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo\@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::Watch->new($config)->scan('full'); @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master)); is(scalar @list, 1, 'tree has one file'); my $mref = $git->cat_file('HEAD:'.$list[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam'); $write_spam->(); PublicInbox::Watch->new($config)->scan('full'); @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master)); is(scalar @list, 0, 'tree is empty'); @list = $git->qx(qw(rev-list refs/heads/master)); is(scalar @list, 4, 'four revisions in rev-list'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); } { my $fail_bin = getcwd()."/t/fail-bin"; ok(-x "$fail_bin/spamc", "mock spamc exists"); my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc ham mock local $ENV{PATH} = $fail_path; PublicInbox::Emergency->new($maildir)->prepare(\$msg); $config->{'publicinboxwatch.spamcheck'} = 'spamc'; { local $SIG{__WARN__} = sub {}; # quiet spam check warning PublicInbox::Watch->new($config)->scan('full'); } @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master)); is(scalar @list, 0, 'tree has no files spamc checked'); is(unlink(glob("$maildir/new/*")), 1); } { my $main_bin = getcwd()."/t/main-bin"; ok(-x "$main_bin/spamc", "mock spamc exists"); my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock local $ENV{PATH} = $main_path; PublicInbox::Emergency->new($maildir)->prepare(\$msg); $config->{'publicinboxwatch.spamcheck'} = 'spamc'; @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master)); PublicInbox::Watch->new($config)->scan('full'); @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master)); is(scalar @list, 1, 'tree has one file after spamc checked'); # XXX: workaround some weird caching/memoization in cat-file, # shouldn't be an issue in real-world use, though... $git = PublicInbox::Git->new($git_dir); my $mref = $git->cat_file('refs/heads/master:'.$list[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); } # end-to-end test which actually uses inotify/kevent { my $env = { PI_CONFIG => $cfg_path }; $git->cleanup; # n.b. --no-scan is only intended for testing atm my $wm = start_script([qw(-watch --no-scan)], $env); my $eml = eml_load('t/data/0001.patch'); $eml->header_set('Cc', $addr); my $em = PublicInbox::Emergency->new($maildir); $em->prepare(\($eml->as_string)); use_ok 'PublicInbox::InboxIdle'; use_ok 'PublicInbox::DS'; my $delivered = 0; my $cb = sub { my ($ibx) = @_; diag "message delivered to `$ibx->{name}'"; $delivered++; }; PublicInbox::DS->Reset; my $ii = PublicInbox::InboxIdle->new($config); my $obj = bless \$cb, 'PublicInbox::TestCommon::InboxWakeup'; $config->each_inbox(sub { $_[0]->subscribe_unlock('ident', $obj) }); PublicInbox::DS->SetPostLoopCallback(sub { $delivered == 0 }); # wait for -watch to setup inotify watches my $sleep = 1; if (eval { require Linux::Inotify2 } && -d "/proc/$wm->{pid}/fd") { my $end = time + 2; my (@ino, @ino_info); do { @ino = grep { (readlink($_)//'') =~ /\binotify\b/ } glob("/proc/$wm->{pid}/fd/*"); } until (@ino || time > $end || !tick); if (scalar(@ino) == 1) { my $ino_fd = (split('/', $ino[0]))[-1]; my $ino_fdinfo = "/proc/$wm->{pid}/fdinfo/$ino_fd"; while (time < $end && open(my $fh, '<', $ino_fdinfo)) { @ino_info = grep(/^inotify wd:/, <$fh>); last if @ino_info >= 3; tick; } $sleep = undef if @ino_info >= 3; } } if ($sleep) { diag "waiting ${sleep}s for -watch to start up"; sleep $sleep; } $em->commit; # wake -watch up diag 'waiting for -watch to import new message'; PublicInbox::DS->EventLoop; $wm->kill; $wm->join; $ii->close; PublicInbox::DS->Reset; my $head = $git->qx(qw(cat-file commit HEAD)); my $subj = $eml->header('Subject'); like($head, qr/^\Q$subj\E/sm, 'new commit made'); } sub is_maildir { my ($dir) = @_; PublicInbox::Watch::is_maildir($dir); } is(is_maildir('maildir:/hello//world'), '/hello/world', 'extra slash gone'); is(is_maildir('maildir:/hello/world/'), '/hello/world', 'trailing slash gone'); is(is_maildir('faildir:/hello/world/'), undef, 'non-maildir rejected'); done_testing; public-inbox-1.6.1/t/watch_maildir_v2.t000066400000000000000000000154151377346120300177740ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Eml; use Cwd; use PublicInbox::Config; use PublicInbox::TestCommon; use PublicInbox::Import; require_git(2.6); require_mods(qw(Search::Xapian DBD::SQLite)); require PublicInbox::V2Writable; my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = "$tmpdir/v2"; my $maildir = "$tmpdir/md"; my $spamdir = "$tmpdir/spam"; use_ok 'PublicInbox::Watch'; use_ok 'PublicInbox::Emergency'; my $cfgpfx = "publicinbox.test"; my $addr = 'test-public@example.com'; my @cmd = ('-init', '-V2', 'test', $inboxdir, 'http://example.com/v2list', $addr); local $ENV{PI_CONFIG} = "$tmpdir/pi_config"; ok(run_script(\@cmd), 'public-inbox init OK'); my $msg = < Date: Sat, 18 Jun 2016 00:00:00 +0000 something EOF PublicInbox::Emergency->new($maildir)->prepare(\$msg); ok(POSIX::mkfifo("$maildir/cur/fifo", 0777), 'create FIFO to ensure we do not get stuck on it :P'); my $sem = PublicInbox::Emergency->new($spamdir); # create dirs my $orig = <new(\$orig); my $ibx = $config->lookup_name('test'); ok($ibx, 'found inbox by name'); PublicInbox::Watch->new($config)->scan('full'); my $total = scalar @{$ibx->over->recent}; is($total, 1, 'got one revision'); # my $git = PublicInbox::Git->new("$inboxdir/git/0.git"); # my @list = $git->qx(qw(rev-list refs/heads/master)); # is(scalar @list, 1, 'one revision in rev-list'); my $write_spam = sub { is(scalar glob("$spamdir/new/*"), undef, 'no spam existing'); $sem->prepare(\$msg); $sem->commit; my @new = glob("$spamdir/new/*"); is(scalar @new, 1); my @p = split(m!/+!, $new[0]); ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S")); is(unlink($new[0]), 1); }; $write_spam->(); is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam'); PublicInbox::Watch->new($config)->scan('full'); is_deeply($ibx->over->recent, [], 'deleted file'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); # check with scrubbing { $msg .= qq(-- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo\@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::Watch->new($config)->scan('full'); my $msgs = $ibx->over->recent; is(scalar(@$msgs), 1, 'got one file back'); my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam'); $write_spam->(); PublicInbox::Watch->new($config)->scan('full'); $msgs = $ibx->over->recent; is(scalar(@$msgs), 0, 'inbox is empty again'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); } { my $fail_bin = getcwd()."/t/fail-bin"; ok(-x "$fail_bin/spamc", "mock spamc exists"); my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc ham mock local $ENV{PATH} = $fail_path; PublicInbox::Emergency->new($maildir)->prepare(\$msg); $config->{'publicinboxwatch.spamcheck'} = 'spamc'; { local $SIG{__WARN__} = sub {}; # quiet spam check warning PublicInbox::Watch->new($config)->scan('full'); } my $msgs = $ibx->over->recent; is(scalar(@$msgs), 0, 'inbox is still empty'); is(unlink(glob("$maildir/new/*")), 1); } { my $main_bin = getcwd()."/t/main-bin"; ok(-x "$main_bin/spamc", "mock spamc exists"); my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock local $ENV{PATH} = $main_path; PublicInbox::Emergency->new($maildir)->prepare(\$msg); $config->{'publicinboxwatch.spamcheck'} = 'spamc'; PublicInbox::Watch->new($config)->scan('full'); my $msgs = $ibx->over->recent; is(scalar(@$msgs), 1, 'inbox has one mail after spamc OK-ed a message'); my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); delete $config->{'publicinboxwatch.spamcheck'}; } { my $patch = 't/data/0001.patch'; open my $fh, '<', $patch or die "failed to open $patch: $!\n"; $msg = do { local $/; <$fh> }; PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::Watch->new($config)->scan('full'); my $post = $ibx->search->reopen->mset('dfpost:6e006fd7'); is($post->size, 1, 'diff postimage found'); my $pre = $ibx->search->mset('dfpre:090d998b6c2c'); is($pre->size, 1, 'diff preimage found'); $pre = $ibx->search->mset_to_smsg($ibx, $pre); $post = $ibx->search->mset_to_smsg($ibx, $post); is(scalar(@$pre), 1, 'diff preimage found'); is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message'); } # multiple inboxes in the same maildir { my $v1repo = "$tmpdir/v1"; my $v1pfx = "publicinbox.v1"; my $v1addr = 'v1-public@example.com'; PublicInbox::Import::init_bare($v1repo); my $cfg2 = <new(\$cfg2); my $both = < Date: Sat, 18 Jun 2016 00:00:00 +0000 both EOF PublicInbox::Emergency->new($maildir)->prepare(\$both); PublicInbox::Watch->new($config)->scan('full'); my $mset = $ibx->search->reopen->mset('m:both@b.com'); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); my $v1 = $config->lookup_name('v1'); my $msg = $v1->git->cat_file($msgs->[0]->{blob}); is($both, $$msg, 'got original message back from v1'); $msg = $ibx->git->cat_file($msgs->[0]->{blob}); is($both, $$msg, 'got original message back from v2'); } { my $want = <<'EOF'; From: List-Id: Message-ID: EOF my $do_not_want = <<'EOF'; From: List-Id: X-Mailing-List: no@example.com Message-ID: EOF my $cfg = $orig."$cfgpfx.listid=i.want.you.to.want.me\n"; PublicInbox::Emergency->new($maildir)->prepare(\$want); PublicInbox::Emergency->new($maildir)->prepare(\$do_not_want); my $config = PublicInbox::Config->new(\$cfg); PublicInbox::Watch->new($config)->scan('full'); $ibx = $config->lookup_name('test'); my $num = $ibx->mm->num_for('do.want@example.com'); ok(defined $num, 'List-ID matched for watch'); $num = $ibx->mm->num_for('do.not.want@example.com'); is($num, undef, 'unaccepted List-ID matched for watch'); $cfg = $orig."$cfgpfx.watchheader=X-Mailing-List:no\@example.com\n"; $config = PublicInbox::Config->new(\$cfg); PublicInbox::Watch->new($config)->scan('full'); $ibx = $config->lookup_name('test'); $num = $ibx->mm->num_for('do.not.want@example.com'); ok(defined $num, 'X-Mailing-List matched'); } done_testing; public-inbox-1.6.1/t/watch_multiple_headers.t000066400000000000000000000036741377346120300212760ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Config; use PublicInbox::TestCommon; require_git(2.6); require_mods(qw(Search::Xapian DBD::SQLite)); my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = "$tmpdir/v2"; my $maildir = "$tmpdir/md"; use_ok 'PublicInbox::Watch'; use_ok 'PublicInbox::Emergency'; my $cfgpfx = "publicinbox.test"; my $addr = 'test-public@example.com'; my @cmd = ('-init', '-V2', 'test', $inboxdir, 'http://example.com/list', $addr); local $ENV{PI_CONFIG} = "$tmpdir/pi_config"; ok(run_script(\@cmd), 'public-inbox init OK'); my $msg_to = < Date: Sat, 18 Apr 2020 00:00:00 +0000 content1 EOF my $msg_cc = < Date: Sat, 18 Apr 2020 00:01:00 +0000 content2 EOF my $msg_none = < Date: Sat, 18 Apr 2020 00:02:00 +0000 content3 EOF PublicInbox::Emergency->new($maildir)->prepare(\$msg_to); PublicInbox::Emergency->new($maildir)->prepare(\$msg_cc); PublicInbox::Emergency->new($maildir)->prepare(\$msg_none); my $cfg = <new(\$cfg); PublicInbox::Watch->new($config)->scan('full'); my $ibx = $config->lookup_name('test'); ok($ibx, 'found inbox by name'); my $num = $ibx->mm->num_for('to@a.com'); ok(defined $num, 'Matched for address in To:'); $num = $ibx->mm->num_for('cc@a.com'); ok(defined $num, 'Matched for address in Cc:'); $num = $ibx->mm->num_for('none@a.com'); is($num, undef, 'No match without address in To: or Cc:'); done_testing; public-inbox-1.6.1/t/watch_nntp.t000066400000000000000000000011651377346120300167200ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::Config; # see t/nntpd*.t for tests against a live NNTP server use_ok 'PublicInbox::Watch'; my $nntp_url = \&PublicInbox::Watch::nntp_url; is('news://example.com/inbox.foo', $nntp_url->('NEWS://examplE.com/inbox.foo'), 'lowercased'); is('nntps://example.com/inbox.foo', $nntp_url->('nntps://example.com/inbox.foo'), 'nntps:// accepted'); is('nntps://example.com/inbox.foo', $nntp_url->('SNEWS://example.com/inbox.foo'), 'snews => nntps'); done_testing; public-inbox-1.6.1/t/www_altid.t000066400000000000000000000052521377346120300165550ustar00rootroot00000000000000# Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Inbox; use PublicInbox::InboxWritable; use PublicInbox::Config; use PublicInbox::Spawn qw(which spawn); which('sqlite3') or plan skip_all => 'sqlite3 binary missing'; require_mods(qw(DBD::SQLite HTTP::Request::Common Plack::Test URI::Escape Plack::Builder IO::Uncompress::Gunzip)); use_ok($_) for qw(Plack::Test HTTP::Request::Common); require_ok 'PublicInbox::Msgmap'; require_ok 'PublicInbox::AltId'; require_ok 'PublicInbox::WWW'; my ($inboxdir, $for_destroy) = tmpdir(); my $aid = 'xyz'; my $spec = "serial:$aid:file=blah.sqlite3"; if ('setup') { my $opts = { inboxdir => $inboxdir, name => 'test', -primary_address => 'test@example.com', }; my $ibx = PublicInbox::Inbox->new($opts); $ibx = PublicInbox::InboxWritable->new($ibx, 1); my $im = $ibx->importer(0); my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com Message-Id: EOF $im->add($mime); $im->done; mkdir "$inboxdir/public-inbox" or die; my $altid = PublicInbox::AltId->new($ibx, $spec, 1); $altid->mm_alt->mid_set(1, 'a@example.com'); } my $cfgpath = "$inboxdir/cfg"; open my $fh, '>', $cfgpath or die; print $fh <new($cfgpath); my $www = PublicInbox::WWW->new($cfg); my $cmpfile = "$inboxdir/cmp.sqlite3"; my $client = sub { my ($cb) = @_; my $res = $cb->(POST("/test/$aid.sql.gz")); is($res->code, 200, 'retrieved gzipped dump'); IO::Uncompress::Gunzip::gunzip(\($res->content) => \(my $buf)); pipe(my ($r, $w)) or die; my $cmd = ['sqlite3', $cmpfile]; my $pid = spawn($cmd, undef, { 0 => $r }); print $w $buf or die; close $w or die; is(waitpid($pid, 0), $pid, 'sqlite3 exited'); is($?, 0, 'sqlite3 loaded dump'); my $mm_cmp = PublicInbox::Msgmap->new_file($cmpfile); is($mm_cmp->mid_for(1), 'a@example.com', 'sqlite3 dump valid'); $mm_cmp = undef; unlink $cmpfile or die; }; test_psgi(sub { $www->call(@_) }, $client); SKIP: { require_mods(qw(Plack::Test::ExternalServer), 4); my $env = { PI_CONFIG => $cfgpath }; my $sock = tcp_server() or die; my ($out, $err) = map { "$inboxdir/std$_.log" } qw(out err); my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, $env, { 3 => $sock }); my ($h, $p) = ($sock->sockhost, $sock->sockport); local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; Plack::Test::ExternalServer::test_psgi(client => $client); } done_testing; public-inbox-1.6.1/t/www_listing.t000066400000000000000000000134161377346120300171320ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # manifest.js.gz generation and grok-pull integration test use strict; use warnings; use Test::More; use PublicInbox::Spawn qw(which); use PublicInbox::TestCommon; use PublicInbox::Import; require_mods(qw(URI::Escape Plack::Builder Digest::SHA IO::Compress::Gzip IO::Uncompress::Gunzip HTTP::Tiny)); require PublicInbox::WwwListing; require PublicInbox::ManifestJsGz; my $json = do { no warnings 'once'; $PublicInbox::ManifestJsGz::json; } or plan skip_all => "JSON module missing"; use_ok 'PublicInbox::Git'; my ($tmpdir, $for_destroy) = tmpdir(); my $bare = PublicInbox::Git->new("$tmpdir/bare.git"); PublicInbox::Import::init_bare($bare->{git_dir}); is(PublicInbox::ManifestJsGz::fingerprint($bare), undef, 'empty repo has no fingerprint'); { my $fi_data = './t/git.fast-import-data'; open my $fh, '<', $fi_data or die "open $fi_data: $!"; my $env = { GIT_DIR => $bare->{git_dir} }; is(xsys([qw(git fast-import --quiet)], $env, { 0 => $fh }), 0, 'fast-import'); } like(PublicInbox::ManifestJsGz::fingerprint($bare), qr/\A[a-f0-9]{40}\z/, 'got fingerprint with non-empty repo'); sub tiny_test { my ($json, $host, $port) = @_; my $tmp; my $http = HTTP::Tiny->new; my $res = $http->get("http://$host:$port/"); is($res->{status}, 200, 'got HTML listing'); like($res->{content}, qr!!si, 'listing looks like HTML'); $res = $http->get("http://$host:$port/", {'Accept-Encoding'=>'gzip'}); is($res->{status}, 200, 'got gzipped HTML listing'); IO::Uncompress::Gunzip::gunzip(\(delete $res->{content}) => \$tmp); like($tmp, qr!!si, 'unzipped listing looks like HTML'); $res = $http->get("http://$host:$port/manifest.js.gz"); is($res->{status}, 200, 'got manifest'); IO::Uncompress::Gunzip::gunzip(\(delete $res->{content}) => \$tmp); unlike($tmp, qr/"modified":\s*"/, 'modified is an integer'); my $manifest = $json->decode($tmp); ok(my $clone = $manifest->{'/alt'}, '/alt in manifest'); is($clone->{owner}, "lorelei \x{100}", 'owner set'); is($clone->{reference}, '/bare', 'reference detected'); is($clone->{description}, "we're all clones", 'description read'); ok(my $bare = $manifest->{'/bare'}, '/bare in manifest'); is($bare->{description}, 'Unnamed repository', 'missing $GIT_DIR/description fallback'); like($bare->{fingerprint}, qr/\A[a-f0-9]{40}\z/, 'fingerprint'); is($clone->{fingerprint}, $bare->{fingerprint}, 'fingerprint matches'); is(HTTP::Date::time2str($bare->{modified}), $res->{headers}->{'last-modified'}, 'modified field and Last-Modified header match'); ok(my $v2epoch0 = $manifest->{'/v2/git/0.git'}, 'v2 epoch 0 appeared'); like($v2epoch0->{description}, qr/ \[epoch 0\]\z/, 'epoch 0 in description'); ok(my $v2epoch1 = $manifest->{'/v2/git/1.git'}, 'v2 epoch 1 appeared'); like($v2epoch1->{description}, qr/ \[epoch 1\]\z/, 'epoch 1 in description'); } my $td; SKIP: { my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $alt = "$tmpdir/alt.git"; my $cfgfile = "$tmpdir/config"; my $v2 = "$tmpdir/v2"; my $sock = tcp_server(); ok($sock, 'sock created'); my ($host, $port) = ($sock->sockhost, $sock->sockport); my @clone = qw(git clone -q -s --bare); is(xsys(@clone, $bare->{git_dir}, $alt), 0, 'clone shared repo'); PublicInbox::Import::init_bare("$v2/all.git"); for my $i (0..2) { is(xsys(@clone, $alt, "$v2/git/$i.git"), 0, "clone epoch $i") } ok(open(my $fh, '>', "$v2/inbox.lock"), 'mock a v2 inbox'); open $fh, '>', "$alt/description" or die; print $fh "we're all clones\n" or die; close $fh or die; is(xsys('git', "--git-dir=$alt", qw(config gitweb.owner), "lorelei \xc4\x80"), 0, 'set gitweb user'); ok(unlink("$bare->{git_dir}/description"), 'removed bare/description'); open $fh, '>', $cfgfile or die; print $fh <<"" or die; [publicinbox] wwwlisting = all [publicinbox "bare"] inboxdir = $bare->{git_dir} url = http://$host/bare address = bare\@example.com [publicinbox "alt"] inboxdir = $alt url = http://$host/alt address = alt\@example.com [publicinbox "v2"] inboxdir = $v2 url = http://$host/v2 address = v2\@example.com close $fh or die; my $env = { PI_CONFIG => $cfgfile }; my $cmd = [ '-httpd', '-W0', "--stdout=$out", "--stderr=$err" ]; $td = start_script($cmd, $env, { 3 => $sock }); $sock = undef; tiny_test($json, $host, $port); my $grok_pull = which('grok-pull') or skip('skipping grok-pull integration test', 2); ok(mkdir("$tmpdir/mirror"), 'prepare grok mirror dest'); open $fh, '>', "$tmpdir/repos.conf" or die; print $fh <<"" or die; # You can pull from multiple grok mirrors, just create # a separate section for each mirror. The name can be anything. [test] site = http://$host:$port manifest = http://$host:$port/manifest.js.gz toplevel = $tmpdir/mirror mymanifest = $tmpdir/local-manifest.js.gz close $fh or die; xsys($grok_pull, '-c', "$tmpdir/repos.conf"); is($? >> 8, 127, 'grok-pull exit code as expected'); for (qw(alt bare v2/git/0.git v2/git/1.git v2/git/2.git)) { ok(-d "$tmpdir/mirror/$_", "grok-pull created $_"); } # support per-inbox manifests, handy for v2: # /$INBOX/v2/manifest.js.gz open $fh, '>', "$tmpdir/per-inbox.conf" or die; print $fh <<"" or die; # You can pull from multiple grok mirrors, just create # a separate section for each mirror. The name can be anything. [v2] site = http://$host:$port manifest = http://$host:$port/v2/manifest.js.gz toplevel = $tmpdir/per-inbox mymanifest = $tmpdir/per-inbox-manifest.js.gz close $fh or die; ok(mkdir("$tmpdir/per-inbox"), 'prepare single-v2-inbox mirror'); xsys($grok_pull, '-c', "$tmpdir/per-inbox.conf"); is($? >> 8, 127, 'grok-pull exit code as expected'); for (qw(v2/git/0.git v2/git/1.git v2/git/2.git)) { ok(-d "$tmpdir/per-inbox/$_", "grok-pull created $_"); } } done_testing(); public-inbox-1.6.1/t/www_static.t000066400000000000000000000065051377346120300167510ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape); require_mods(@mods, 'IO::Uncompress::Gunzip'); use_ok $_ foreach @mods; use_ok 'PublicInbox::WwwStatic'; my $app = sub { my $ws = PublicInbox::WwwStatic->new(docroot => $tmpdir, @_); sub { $ws->call(shift) }; }; test_psgi($app->(), sub { my $cb = shift; my $res = $cb->(GET('/')); is($res->code, 404, '404 on "/" by default'); open my $fh, '>', "$tmpdir/index.html" or die; print $fh 'hi' or die; close $fh or die; $res = $cb->(GET('/')); is($res->code, 200, '200 with index.html'); is($res->content, 'hi', 'default index.html returned'); $res = $cb->(HEAD('/')); is($res->code, 200, '200 on HEAD /'); is($res->content, '', 'no content'); is($res->header('Content-Length'), '2', 'content-length set'); like($res->header('Content-Type'), qr!^text/html\b!, 'content-type is html'); }); test_psgi($app->(autoindex => 1, index => []), sub { my $cb = shift; my $res = $cb->(GET('/')); my $updir = 'href="../">../'; is($res->code, 200, '200 with autoindex default'); my $ls = $res->content; like($ls, qr/index\.html/, 'got listing with index.html'); ok(index($ls, $updir) < 0, 'no updir at /'); mkdir("$tmpdir/dir") or die; rename("$tmpdir/index.html", "$tmpdir/dir/index.html") or die; $res = $cb->(GET('/dir/')); is($res->code, 200, '200 with autoindex for dir/'); $ls = $res->content; ok(index($ls, $updir) > 0, 'updir at /dir/'); for my $up (qw(/../ .. /dir/.. /dir/../)) { is($cb->(GET($up))->code, 403, "`$up' traversal rejected"); } $res = $cb->(GET('/dir')); is($res->code, 302, '302 w/o slash'); like($res->header('Location'), qr!://[^/]+/dir/\z!, 'redirected w/ slash'); rename("$tmpdir/dir/index.html", "$tmpdir/dir/foo") or die; link("$tmpdir/dir/foo", "$tmpdir/dir/foo.gz") or die; $res = $cb->(GET('/dir/')); unlike($res->content, qr/>foo\.gzcontent, qr/>foo(GET('/dir/foo/bar')); is($res->code, 404, 'using file as dir fails'); unlink("$tmpdir/dir/foo") or die; $res = $cb->(GET('/dir/')); like($res->content, qr/>foo\.gz', "$tmpdir/dir/foo" or die; print $fh "uncompressed\n" or die; close $fh or die; utime(0, 0, "$tmpdir/dir/foo") or die; $res = $cb->(GET('/dir/')); my $html = $res->content; like($html, qr/>foofoo\.gz(GET('/dir/foo')); is($res->content, "uncompressed\n", 'got uncompressed on mtime mismatch'); utime(0, 0, "$tmpdir/dir/foo.gz") or die; my $get = GET('/dir/foo'); $get->header('Accept-Encoding' => 'gzip'); $res = $cb->($get); is($res->content, "hi", 'got compressed on mtime match'); $get = GET('/dir/'); $get->header('Accept-Encoding' => 'gzip'); $res = $cb->($get); my $in = $res->content; my $out = ''; IO::Uncompress::Gunzip::gunzip(\$in => \$out); like($out, qr/\A/, 'got HTML start after gunzip'); like($out, qr{$}, 'got HTML end after gunzip'); }); done_testing(); public-inbox-1.6.1/t/x-unknown-alpine.eml000066400000000000000000000012731377346120300202770ustar00rootroot00000000000000Date: Sat, 13 Aug 2016 12:14:15 +0200 (CEST) From: Alpine User To: Subject: charset=X-UNKNOWN test Message-ID: User-Agent: Alpine 2.20 (DEB 67 2015-01-07) MIME-Version: 1.0 Content-Type: multipart/mixed; BOUNDARY="8323329-703494712-1471083256=:4924" This message is in MIME format. The first part should be readable text, while the remaining parts are likely unreadable without MIME-aware tools. --8323329-703494712-1471083256=:4924 Content-Type: text/plain; charset=X-UNKNOWN Content-Transfer-Encoding: QUOTED-PRINTABLE =09https://example.com/ =E2=80=A2 bullet point --8323329-703494712-1471083256=:4924-- public-inbox-1.6.1/t/xcpdb-reshard.t000066400000000000000000000050041377346120300172750ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite Search::Xapian)); require_git('2.6'); use PublicInbox::Eml; use PublicInbox::InboxWritable; require PublicInbox::Search; my $mime = PublicInbox::Eml->new(<<'EOF'); From: a@example.com To: test@example.com Subject: this is a subject Date: Fri, 02 Oct 1993 00:00:00 +0000 EOF my ($this) = (split('/', $0))[-1]; my ($tmpdir, $for_destroy) = tmpdir(); local $ENV{PI_CONFIG} = "$tmpdir/config"; my $ibx = PublicInbox::Inbox->new({ inboxdir => "$tmpdir/testbox", name => $this, version => 2, -primary_address => 'test@example.com', indexlevel => 'medium', }); my @xcpdb = qw(-xcpdb -q); my $nproc = 8; my $ndoc = 13; my $im = PublicInbox::InboxWritable->new($ibx, {nproc => $nproc})->importer; for my $i (1..$ndoc) { $mime->header_set('Message-ID', ""); ok($im->add($mime), "message $i added"); } $im->done; my @shards = grep(m!/\d+\z!, glob("$ibx->{inboxdir}/xap*/*")); is(scalar(@shards), $nproc - 1, 'got expected shards'); my $orig = $ibx->over->query_xover(1, $ndoc); my %nums = map {; "$_->{num}" => 1 } @$orig; # ensure we can go up or down in shards, or stay the same: for my $R (qw(2 4 1 3 3)) { delete $ibx->{search}; # release old handles my $cmd = [@xcpdb, "-R$R", $ibx->{inboxdir}]; push @$cmd, '--compact' if $R == 1 && have_xapian_compact; ok(run_script($cmd), "xcpdb -R$R"); my @new_shards = grep(m!/\d+\z!, glob("$ibx->{inboxdir}/xap*/*")); is(scalar(@new_shards), $R, 'resharded to two shards'); my $mset = $ibx->search->mset('s:this'); my $msgs = $ibx->search->mset_to_smsg($ibx, $mset); is(scalar(@$msgs), $ndoc, 'got expected docs after resharding'); my %by_mid = map {; "$_->{mid}" => $_ } @$msgs; ok($by_mid{"m$_\@example.com"}, "$_ exists") for (1..$ndoc); delete $ibx->{search}; # release old handles # ensure docids in Xapian match NNTP article numbers my $tot = 0; my %tmp = %nums; my $XapianDatabase = do { no warnings 'once'; $PublicInbox::Search::X{Database}; }; foreach my $d (@new_shards) { my $xdb = $XapianDatabase->new($d); $tot += $xdb->get_doccount; my $it = $xdb->postlist_begin(''); my $end = $xdb->postlist_end(''); for (; $it != $end; $it++) { my $docid = $it->get_docid; if ($xdb->get_document($docid)) { ok(delete($tmp{$docid}), "saw #$docid"); } } } is(scalar keys %tmp, 0, 'all docids seen'); } done_testing(); 1; public-inbox-1.6.1/xt/000077500000000000000000000000001377346120300145535ustar00rootroot00000000000000public-inbox-1.6.1/xt/cmp-msgstr.t000066400000000000000000000062631377346120300170430ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; use PublicInbox::TestCommon; use PublicInbox::Eml; use Digest::MD5; use PublicInbox::MsgIter; require_mods(qw(Data::Dumper Email::MIME)); Data::Dumper->import('Dumper'); require PublicInbox::MIME; require_git(2.19); my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'cmp' }); my $git = $ibx->git; my $fh = $git->popen(@cat); vec(my $vec = '', fileno($fh), 1) = 1; select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; my $n = 0; my $m = 0; my $dig_cls = 'Digest::MD5'; sub h ($) { s/\s+\z//s; # E::M leaves trailing white space s/\s+/ /sg; "$_[0]: $_"; } my $cmp = sub { my ($p, $cmp_arg) = @_; my $part = shift @$p; push @$cmp_arg, '---'.join(', ', @$p).'---'; my $ct = $part->content_type // 'text/plain'; $ct =~ s/[ \t]+.*\z//s; my ($s, $err); eval { push @$cmp_arg, map { h 'f' } $part->header('From'); push @$cmp_arg, map { h 't' } $part->header('To'); push @$cmp_arg, map { h 'cc' } $part->header('Cc'); push @$cmp_arg, map { h 'mid' } $part->header('Message-ID'); push @$cmp_arg, map { h 'refs' } $part->header('References'); push @$cmp_arg, map { h 'irt' } $part->header('In-Reply-To'); push @$cmp_arg, map { h 's' } $part->header('Subject'); push @$cmp_arg, map { h 'cd' } $part->header('Content-Description'); ($s, $err) = msg_part_text($part, $ct); if (defined $s) { $s =~ s/\s+\z//s; push @$cmp_arg, "S: ".$s; } else { $part = $part->body; push @$cmp_arg, "T: $ct"; if ($part =~ /[^\p{XPosixPrint}\s]/s) { # binary my $dig = $dig_cls->new; $dig->add($part); push @$cmp_arg, "M: ".$dig->hexdigest; push @$cmp_arg, "B: ".bytes::length($part); } else { $part =~ s/\s+\z//s; push @$cmp_arg, "X: ".$part; } } }; if ($@) { $err //= ''; push @$cmp_arg, "E: $@ ($err)"; } }; my $ndiff = 0; my $git_cb = sub { my ($bref, $oid) = @_; local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; ++$m; PublicInbox::MIME->new($$bref)->each_part($cmp, my $m_ctx = [], 1); PublicInbox::Eml->new($$bref)->each_part($cmp, my $e_ctx = [], 1); if (join("\0", @$e_ctx) ne join("\0", @$m_ctx)) { ++$ndiff; open my $fh, '>', "$tmpdir/mime" or die $!; print $fh Dumper($m_ctx) or die $!; close $fh or die $!; open $fh, '>', "$tmpdir/eml" or die $!; print $fh Dumper($e_ctx) or die $!; close $fh or die $!; diag "$inboxdir $oid differ"; # using `git diff', diff(1) may not be installed diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]); } }; my $t = timeit(1, sub { while (<$fh>) { my ($oid, $type) = split / /; next if $type ne 'blob'; ++$n; $git->cat_async($oid, $git_cb); } $git->cat_async_wait; }); is($m, $n, "$inboxdir rendered all $m <=> $n messages"); is($ndiff, 0, "$inboxdir $ndiff differences"); done_testing(); public-inbox-1.6.1/xt/cmp-msgview.t000066400000000000000000000060111377346120300171740ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; use PublicInbox::TestCommon; use PublicInbox::Eml; use Digest::MD5; require_git(2.19); require_mods qw(Data::Dumper Email::MIME Plack::Util); Data::Dumper->import('Dumper'); require PublicInbox::MIME; my ($tmpdir, $for_destroy) = tmpdir(); my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'perf' }); my $git = $ibx->git; my $fh = $git->popen(@cat); vec(my $vec = '', fileno($fh), 1) = 1; select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; my $mime_ctx = { env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, -inbox => $ibx, www => Plack::Util::inline_object(style => sub {''}), obuf => \(my $mime_buf = ''), mhref => '../', }; my $eml_ctx = { %$mime_ctx, obuf => \(my $eml_buf = '') }; my $n = 0; my $m = 0; my $ndiff_html = 0; my $dig_cls = 'Digest::MD5'; my $digest_attach = sub { # ensure ->body (not ->body_raw) matches my ($p, $cmp_arg) = @_; my $part = shift @$p; my $dig = $cmp_arg->[0] //= $dig_cls->new; $dig->add($part->body_raw); push @$cmp_arg, join(', ', @$p); }; my $git_cb = sub { my ($bref, $oid) = @_; local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; ++$m; my $mime = PublicInbox::MIME->new($$bref); PublicInbox::View::multipart_text_as_html($mime, $mime_ctx); my $eml = PublicInbox::Eml->new($$bref); PublicInbox::View::multipart_text_as_html($eml, $eml_ctx); if ($eml_buf ne $mime_buf) { ++$ndiff_html; open my $fh, '>', "$tmpdir/mime" or die $!; print $fh $mime_buf or die $!; close $fh or die $!; open $fh, '>', "$tmpdir/eml" or die $!; print $fh $eml_buf or die $!; close $fh or die $!; # using `git diff', diff(1) may not be installed diag "$inboxdir $oid differs"; diag xqx([qw(git diff), "$tmpdir/mime", "$tmpdir/eml"]); } $eml_buf = $mime_buf = ''; # don't tolerate differences in attachment downloads $mime = PublicInbox::MIME->new($$bref); $mime->each_part($digest_attach, my $mime_cmp = [], 1); $eml = PublicInbox::Eml->new($$bref); $eml->each_part($digest_attach, my $eml_cmp = [], 1); $mime_cmp->[0] = $mime_cmp->[0]->hexdigest; $eml_cmp->[0] = $eml_cmp->[0]->hexdigest; # don't have millions of "ok" lines if (join("\0", @$eml_cmp) ne join("\0", @$mime_cmp)) { diag Dumper([ $oid, eml => $eml_cmp, mime =>$mime_cmp ]); is_deeply($eml_cmp, $mime_cmp, "$inboxdir $oid match"); } }; my $t = timeit(1, sub { while (<$fh>) { my ($oid, $type) = split / /; next if $type ne 'blob'; ++$n; $git->cat_async($oid, $git_cb); } $git->cat_async_wait; }); is($m, $n, 'rendered all messages'); # we'll tolerate minor differences in HTML rendering diag "$ndiff_html HTML differences"; done_testing(); public-inbox-1.6.1/xt/eml_check_limits.t000066400000000000000000000050241377346120300202340ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use v5.10.1; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::Inbox; use List::Util qw(max); use Benchmark qw(:all :hireswallclock); use PublicInbox::Spawn qw(popen_rd); use Carp (); require_git(2.19); # for --unordered require_mods(qw(BSD::Resource)); BSD::Resource->import(qw(getrusage)); my $cls = $ENV{TEST_CLASS}; if ($cls) { diag "TEST_CLASS=$cls"; require_mods($cls); } $cls //= 'PublicInbox::Eml'; my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; local $PublicInbox::Eml::mime_nesting_limit = 0x7fffffff; local $PublicInbox::Eml::mime_parts_limit = 0x7fffffff; local $PublicInbox::Eml::header_size_limit = 0x7fffffff; my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'x' }); my $git = $ibx->git; my @cat = qw(cat-file --buffer --batch-check --batch-all-objects --unordered); my $fh = $git->popen(@cat); my ($m, $n); my $max_nest = [ 0, '' ]; # [ bytes, blob oid ] my $max_idx = [ 0, '' ]; my $max_parts = [ 0, '' ]; my $max_size = [ 0, '' ]; my $max_hdr = [ 0, '' ]; my $info = [ 0, '' ]; my $each_part_cb = sub { my ($p) = @_; my ($part, $depth, $idx) = @$p; $max_nest = [ $depth, $info->[1] ] if $depth > $max_nest->[0]; my $max = max(split(/\./, $idx)); $max_idx = [ $max, $info->[1] ] if $max > $max_idx->[0]; ++$info->[0]; }; my ($bref, $oid, $size); local $SIG{__WARN__} = sub { diag "$inboxdir $oid ", @_ }; my $cat_cb = sub { ($bref, $oid, undef, $size) = @_; ++$m; $info = [ 0, $oid ]; my $eml = $cls->new($bref); my $hdr_len = length($eml->header_obj->as_string); $max_hdr = [ $hdr_len, $oid ] if $hdr_len > $max_hdr->[0]; $eml->each_part($each_part_cb, $info, 1); $max_parts = $info if $info->[0] > $max_parts->[0]; $max_size = [ $size, $oid ] if $size > $max_size->[0]; }; my $t = timeit(1, sub { my ($blob, $type); while (<$fh>) { ($blob, $type) = split / /; next if $type ne 'blob'; ++$n; $git->cat_async($blob, $cat_cb); } $git->cat_async_wait; }); is($m, $n, 'scanned all messages'); diag "$$ $inboxdir took ".timestr($t)." for $n <=> $m messages"; diag "$$ max_nest $max_nest->[0] @ $max_nest->[1]"; diag "$$ max_idx $max_idx->[0] @ $max_idx->[1]"; diag "$$ max_parts $max_parts->[0] @ $max_parts->[1]"; diag "$$ max_size $max_size->[0] @ $max_size->[1]"; diag "$$ max_hdr $max_hdr->[0] @ $max_hdr->[1]"; diag "$$ RSS ".getrusage()->maxrss. ' k'; done_testing; public-inbox-1.6.1/xt/git-http-backend.t000066400000000000000000000100721377346120300200650ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # # Ensure buffering behavior in -httpd doesn't cause runaway memory use # or data corruption use strict; use warnings; use Test::More; use POSIX qw(setsid); use PublicInbox::TestCommon; use PublicInbox::Spawn qw(which); my $git_dir = $ENV{GIANT_GIT_DIR}; plan 'skip_all' => 'GIANT_GIT_DIR not defined' unless $git_dir; require_mods(qw(BSD::Resource Plack::Util Plack::Builder HTTP::Date HTTP::Status Net::HTTP)); my $psgi = "./t/git-http-backend.psgi"; my ($tmpdir, $for_destroy) = tmpdir(); my $err = "$tmpdir/stderr.log"; my $out = "$tmpdir/stdout.log"; my $sock = tcp_server(); my $host = $sock->sockhost; my $port = $sock->sockport; my $td; my $get_maxrss = sub { my $http = Net::HTTP->new(Host => "$host:$port"); ok($http, 'Net::HTTP object created for maxrss'); $http->write_request(GET => '/'); my ($code, $mess, %h) = $http->read_response_headers; is($code, 200, 'success reading maxrss'); my $n = $http->read_entity_body(my $buf, 256); ok(defined $n, 'read response body'); like($buf, qr/\A\d+\n\z/, 'got memory response'); ok(int($buf) > 0, 'got non-zero memory response'); int($buf); }; { ok($sock, 'sock created'); my $cmd = [ '-httpd', '-W0', "--stdout=$out", "--stderr=$err", $psgi ]; $td = start_script($cmd, undef, { 3 => $sock }); } my $mem_a = $get_maxrss->(); SKIP: { my $max = 0; my $pack; my $glob = "$git_dir/objects/pack/pack-*.pack"; foreach my $f (glob($glob)) { my $n = -s $f; if ($n > $max) { $max = $n; $pack = $f; } } skip "no packs found in $git_dir" unless defined $pack; if ($pack !~ m!(/objects/pack/pack-[a-f0-9]{40}.pack)\z!) { skip "bad pack name: $pack"; } my $url = $1; my $http = Net::HTTP->new(Host => "$host:$port"); ok($http, 'Net::HTTP object created'); $http->write_request(GET => $url); my ($code, $mess, %h) = $http->read_response_headers; is(200, $code, 'got 200 success for pack'); is($max, $h{'Content-Length'}, 'got expected Content-Length for pack'); # no $http->read_entity_body, here, since we want to force buffering foreach my $i (1..3) { sleep 1; my $diff = $get_maxrss->() - $mem_a; note "${diff}K memory increase after $i seconds"; ok($diff < 1024, 'no bloating caused by slow dumb client'); } } SKIP: { # make sure Last-Modified + If-Modified-Since works with curl my $nr = 6; skip 'no description', $nr unless -f "$git_dir/description"; my $mtime = (stat(_))[9]; my $curl = which('curl'); skip 'curl(1) not found', $nr unless $curl; my $url = "http://$host:$port/description"; my $dst = "$tmpdir/desc"; is(xsys($curl, qw(-RsSf), '-o', $dst, $url), 0, 'curl -R'); is((stat($dst))[9], $mtime, 'curl used remote mtime'); is(xsys($curl, qw(-sSf), '-z', $dst, '-o', "$dst.2", $url), 0, 'curl -z noop'); ok(!-e "$dst.2", 'no modification, nothing retrieved'); utime(0, 0, $dst) or die "utime failed: $!"; is(xsys($curl, qw(-sSfR), '-z', $dst, '-o', "$dst.2", $url), 0, 'curl -z updates'); ok(-e "$dst.2", 'faked modification, got new file retrieved'); } { my $c = fork; if ($c == 0) { setsid(); exec qw(git clone -q --mirror), "http://$host:$port/", "$tmpdir/mirror.git"; die "Failed start git clone: $!\n"; } select(undef, undef, undef, 0.1); foreach my $i (1..10) { is(1, kill('STOP', -$c), 'signaled clone STOP'); sleep 1; ok(kill('CONT', -$c), 'continued clone'); my $diff = $get_maxrss->() - $mem_a; note "${diff}K memory increase after $i seconds"; ok($diff < 2048, 'no bloating caused by slow smart client'); } ok(kill('CONT', -$c), 'continued clone'); is($c, waitpid($c, 0), 'reaped wayward slow clone'); is($?, 0, 'clone did not error out'); note 'clone done, fsck-ing clone result...'; is(0, system("git", "--git-dir=$tmpdir/mirror.git", qw(fsck --no-progress)), 'fsck did not report corruption'); my $diff = $get_maxrss->() - $mem_a; note "${diff}K memory increase after smart clone"; ok($diff < 2048, 'no bloating caused by slow smart client'); } { ok($td->kill, 'killed httpd'); $td->join; } done_testing(); public-inbox-1.6.1/xt/git_async_cmp.t000066400000000000000000000030761377346120300175650ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use Benchmark qw(:all); use Digest::SHA; use PublicInbox::TestCommon; my $git_dir = $ENV{GIANT_GIT_DIR}; plan 'skip_all' => "GIANT_GIT_DIR not defined for $0" unless defined($git_dir); use_ok 'PublicInbox::Git'; my $git = PublicInbox::Git->new($git_dir); my @cat = qw(cat-file --buffer --batch-check --batch-all-objects); if (require_git(2.19, 1)) { push @cat, '--unordered'; } else { warn "git <2.19, cat-file lacks --unordered, locality suffers\n"; } my @dig; my $nr = $ENV{NR} || 1; diag "NR=$nr"; my $async = timeit($nr, sub { my $dig = Digest::SHA->new(1); my $cb = sub { my ($bref) = @_; $dig->add($$bref); }; my $cat = $git->popen(@cat); while (<$cat>) { my ($oid, undef, undef) = split(/ /); $git->cat_async($oid, $cb); } close $cat or die "cat: $?"; $git->cat_async_wait; push @dig, ['async', $dig->hexdigest ]; }); my $sync = timeit($nr, sub { my $dig = Digest::SHA->new(1); my $cat = $git->popen(@cat); while (<$cat>) { my ($oid, undef, undef) = split(/ /); my $bref = $git->cat_file($oid); $dig->add($$bref); } close $cat or die "cat: $?"; push @dig, ['sync', $dig->hexdigest ]; }); ok(scalar(@dig) >= 2, 'got some digests'); my $ref = shift @dig; my $exp = $ref->[1]; isnt($exp, Digest::SHA->new(1)->hexdigest, 'not empty'); foreach (@dig) { is($_->[1], $exp, "digest matches $_->[0] <=> $ref->[0]"); } diag "sync=".timestr($sync); diag "async=".timestr($async); done_testing; 1; public-inbox-1.6.1/xt/httpd-async-stream.t000066400000000000000000000061201377346120300204660ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ # Expensive test to validate compression and TLS. use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::DS qw(now); use PublicInbox::Spawn qw(which popen_rd); use Digest::MD5; use POSIX qw(_exit); my $inboxdir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my $curl = which('curl') or plan skip_all => "curl(1) missing for $0"; my ($tmpdir, $for_destroy) = tmpdir(); require_mods(qw(DBD::SQLite)); my $JOBS = $ENV{TEST_JOBS} // 4; my $endpoint = $ENV{TEST_ENDPOINT} // 'all.mbox.gz'; my $curl_opt = $ENV{TEST_CURL_OPT} // ''; diag "TEST_JOBS=$JOBS TEST_ENDPOINT=$endpoint TEST_CURL_OPT=$curl_opt"; # we set Host: to ensure stable results across test runs my @CURL_OPT = (qw(-HHost:example.com -sSf), split(' ', $curl_opt)); my $make_local_server = sub { my $pi_config = "$tmpdir/config"; open my $fh, '>', $pi_config or die "open($pi_config): $!"; print $fh <<"" or die "print $pi_config: $!"; [publicinbox "test"] inboxdir = $inboxdir address = test\@example.com close $fh or die "close($pi_config): $!"; my ($out, $err) = ("$tmpdir/out", "$tmpdir/err"); for ($out, $err) { open my $fh, '>', $_ or die "truncate: $!"; } my $http = tcp_server(); my $rdr = { 3 => $http }; # not using multiple workers, here, since we want to increase # the chance of tripping concurrency bugs within PublicInbox/HTTP*.pm my $cmd = [ '-httpd', "--stdout=$out", "--stderr=$err", '-W0' ]; my $host_port = $http->sockhost.':'.$http->sockport; push @$cmd, "-lhttp://$host_port"; my $url = "$host_port/test/$endpoint"; print STDERR "# CMD ". join(' ', @$cmd). "\n"; my $env = { PI_CONFIG => $pi_config }; (start_script($cmd, $env, $rdr), $url); }; my ($td, $url) = $make_local_server->(); my $do_get_all = sub { my ($job) = @_; local $SIG{__DIE__} = sub { print STDERR $job, ': ', @_; _exit(1) }; my $dig = Digest::MD5->new; my ($buf, $nr); my $bytes = 0; my $t0 = now(); my ($rd, $pid) = popen_rd([$curl, @CURL_OPT, $url]); while (1) { $nr = sysread($rd, $buf, 65536); last if !$nr; $dig->add($buf); $bytes += $nr; } my $res = $dig->hexdigest; my $elapsed = sprintf('%0.3f', now() - $t0); close $rd or die "close curl failed: $!\n"; waitpid($pid, 0) == $pid or die "waitpid failed: $!\n"; $? == 0 or die "curl failed: $?\n"; print STDERR "# $job $$ ($?) $res (${elapsed}s) $bytes bytes\n"; $res; }; my (%pids, %res); for my $job (1..$JOBS) { pipe(my ($r, $w)) or die; my $pid = fork; if ($pid == 0) { close $r or die; my $res = $do_get_all->($job); print $w $res or die; close $w or die; _exit(0); } close $w or die; $pids{$pid} = [ $job, $r ]; } while (scalar keys %pids) { my $pid = waitpid(-1, 0) or next; my $child = delete $pids{$pid} or next; my ($job, $rpipe) = @$child; is($?, 0, "$job done"); my $sum = do { local $/; <$rpipe> }; push @{$res{$sum}}, $job; } is(scalar keys %res, 1, 'all got the same result'); $td->kill; $td->join; is($?, 0, 'no error on -httpd exit'); done_testing; public-inbox-1.6.1/xt/imapd-mbsync-oimap.t000066400000000000000000000065411377346120300204340ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ # ensure mbsync and offlineimap compatibility use strict; use Test::More; use File::Path qw(mkpath); use PublicInbox::TestCommon; use PublicInbox::Spawn qw(which spawn); require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address)); my $inboxdir = $ENV{GIANT_INBOX_DIR}; (defined($inboxdir) && -d $inboxdir) or plan skip_all => "GIANT_INBOX_DIR not defined for $0"; plan skip_all => "bad characters in $inboxdir" if $inboxdir =~ m![^\w\.\-/]!; my ($tmpdir, $for_destroy) = tmpdir(); my $cfg = "$tmpdir/cfg"; my $newsgroup = 'inbox.test'; my $mailbox = "$newsgroup.0"; { open my $fh, '>', $cfg or BAIL_OUT "open: $!"; print $fh < $cfg }; my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT "-imapd: $?"; { my $c = tcp_connect($sock); like(readline($c), qr/CAPABILITY /, 'got greeting'); } my ($host, $port) = ($sock->sockhost, $sock->sockport); my %pids; SKIP: { mkpath([map { "$tmpdir/oimapdir/$_" } qw(cur new tmp)]); my $oimap = which('offlineimap') or skip 'no offlineimap(1)', 1; open my $fh, '>', "$tmpdir/.offlineimaprc" or BAIL_OUT "open: $!"; print $fh < usecompression = no EOF close $fh or BAIL_OUT "close: $!"; my $cmd = [ $oimap, qw(-o -q -u quiet) ]; my $pid = spawn($cmd, { HOME => $tmpdir }, { 1 => 2 }); $pids{$pid} = $cmd; } SKIP: { mkpath([map { "$tmpdir/mbsyncdir/test/$_" } qw(cur new tmp)]); my $mbsync = which('mbsync') or skip 'no mbsync(1)', 1; open my $fh, '>', "$tmpdir/.mbsyncrc" or BAIL_OUT "open: $!"; print $fh < $tmpdir }, { 1 => 2 }); $pids{$pid} = $cmd; } while (scalar keys %pids) { my $pid = waitpid(-1, 0) or next; my $cmd = delete $pids{$pid} or next; is($?, 0, join(' ', @$cmd, 'done')); } my $sec = $ENV{TEST_PERSIST} // 0; diag "TEST_PERSIST=$sec"; if ($sec) { diag "sleeping ${sec}s, imap://$host:$port/$mailbox available"; diag "tmpdir=$tmpdir (Maildirs available)"; diag "stdout=$out"; diag "stderr=$err"; diag "pid=$td->{pid}"; sleep $sec; } $td->kill; $td->join; is($?, 0, 'no error on -imapd exit'); done_testing; public-inbox-1.6.1/xt/imapd-validate.t000066400000000000000000000124401377346120300176220ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ # Expensive test to validate compression and TLS. use strict; use Test::More; use Symbol qw(gensym); use PublicInbox::DS qw(now); use POSIX qw(_exit); use PublicInbox::TestCommon; my $inbox_dir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inbox_dir; # how many emails to read into memory at once per-process my $BATCH = $ENV{TEST_BATCH} // 100; my $REPEAT = $ENV{TEST_REPEAT} // 1; diag "TEST_BATCH=$BATCH TEST_REPEAT=$REPEAT"; require_mods(qw(Mail::IMAPClient Email::Address::XS||Mail::Address)); my $imap_client = 'Mail::IMAPClient'; my $can_compress = $imap_client->can('compress'); if ($can_compress) { # hope this gets fixed upstream, soon require PublicInbox::IMAPClient; $imap_client = 'PublicInbox::IMAPClient'; } my $test_tls = $ENV{TEST_SKIP_TLS} ? 0 : eval { require IO::Socket::SSL }; my ($cert, $key) = qw(certs/server-cert.pem certs/server-key.pem); if ($test_tls && !-r $key || !-r $cert) { plan skip_all => "certs/ missing for $0, run $^X ./certs/create-certs.perl"; } my ($tmpdir, $for_destroy) = tmpdir(); my %OPT = qw(User u Password p); my (%STARTTLS_OPT, %IMAPS_OPT, $td, $newsgroup, $mailbox, $make_local_server); if (($ENV{IMAP_TEST_URL} // '') =~ m!\Aimap://([^/]+)/(.+)\z!) { ($OPT{Server}, $mailbox) = ($1, $2); $OPT{Server} =~ s/:([0-9]+)\z// and $OPT{Port} = $1 + 0; %STARTTLS_OPT = %OPT; %IMAPS_OPT = (%OPT, Port => 993) if $OPT{Port} == 143; } else { require_mods(qw(DBD::SQLite)); $make_local_server->(); $mailbox = "$newsgroup.0"; } my %opts = (imap => \%OPT, 'imap+compress' => { %OPT, Compress => 1 }); my $uid_max = do { my $mic = $imap_client->new(%OPT) or BAIL_OUT "new $!"; $mic->examine($mailbox) or BAIL_OUT "examine: $!"; my $next = $mic->uidnext($mailbox) or BAIL_OUT "uidnext: $!"; $next - 1; }; if (scalar keys %STARTTLS_OPT) { $opts{starttls} = \%STARTTLS_OPT; $opts{'starttls+compress'} = { %STARTTLS_OPT, Compress => 1 }; } if (scalar keys %IMAPS_OPT) { $opts{imaps} = \%IMAPS_OPT; $opts{'imaps+compress'} = { %IMAPS_OPT, Compress => 1 }; } my $do_get_all = sub { my ($desc, $opt) = @_; local $SIG{__DIE__} = sub { print STDERR $desc, ': ', @_; _exit(1) }; my $t0 = now(); my $dig = Digest::SHA->new(1); my $mic = $imap_client->new(%$opt); $mic->examine($mailbox) or die "examine: $!"; my $uid_base = 1; my $bytes = 0; my $nr = 0; until ($uid_base > $uid_max) { my $end = $uid_base + $BATCH; my $ret = $mic->fetch_hash("$uid_base:$end", 'BODY[]') or last; for my $uid ($uid_base..$end) { $dig->add($uid); my $h = delete $ret->{$uid} or next; my $body = delete $h->{'BODY[]'} or die "no BODY[] for UID=$uid"; $dig->add($body); $bytes += length($body); ++$nr; } $uid_base = $end + 1; } $mic->logout or die "logout failed: $!"; my $elapsed = sprintf('%0.3f', now() - $t0); my $res = $dig->hexdigest; print STDERR "# $desc $res (${elapsed}s) $bytes bytes, NR=$nr\n"; $res; }; my (%pids, %res); for (1..$REPEAT) { while (my ($desc, $opt) = each %opts) { pipe(my ($r, $w)) or die; my $pid = fork; if ($pid == 0) { close $r or die; my $res = $do_get_all->($desc, $opt); print $w $res or die; close $w or die; _exit(0); } close $w or die; $pids{$pid} = [ $desc, $r ]; } } while (scalar keys %pids) { my $pid = waitpid(-1, 0) or next; my $child = delete $pids{$pid} or next; my ($desc, $rpipe) = @$child; is($?, 0, "$desc done"); my $sum = do { local $/; <$rpipe> }; push @{$res{$sum}}, $desc; } is(scalar keys %res, 1, 'all got the same result'); $td->kill; $td->join; is($?, 0, 'no error on -imapd exit'); done_testing; BEGIN { $make_local_server = sub { require PublicInbox::Inbox; $newsgroup = 'inbox.test'; my $ibx = { inboxdir => $inbox_dir, newsgroup => $newsgroup }; $ibx = PublicInbox::Inbox->new($ibx); my $pi_config = "$tmpdir/config"; { open my $fh, '>', $pi_config or die "open($pi_config): $!"; print $fh <<"" or die "print $pi_config: $!"; [publicinbox "test"] newsgroup = $newsgroup inboxdir = $inbox_dir address = test\@example.com close $fh or die "close($pi_config): $!"; } my ($out, $err) = ("$tmpdir/out", "$tmpdir/err"); for ($out, $err) { open my $fh, '>', $_ or die "truncate: $!"; } my $imap = tcp_server(); my $rdr = { 3 => $imap }; $OPT{Server} = $imap->sockhost; $OPT{Port} = $imap->sockport; # not using multiple workers, here, since we want to increase # the chance of tripping concurrency bugs within PublicInbox/IMAP*.pm my $cmd = [ '-imapd', "--stdout=$out", "--stderr=$err", '-W0' ]; push @$cmd, '-limap://'.$imap->sockhost.':'.$imap->sockport; if ($test_tls) { my $imaps = tcp_server(); $rdr->{4} = $imaps; push @$cmd, '-limaps://'.$imaps->sockhost.':'.$imaps->sockport; push @$cmd, "--cert=$cert", "--key=$key"; my $tls_opt = [ SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ]; %STARTTLS_OPT = (%OPT, Starttls => $tls_opt); %IMAPS_OPT = (%OPT, Ssl => $tls_opt, Server => $imaps->sockhost, Port => $imaps->sockport ); } print STDERR "# CMD ". join(' ', @$cmd). "\n"; my $env = { PI_CONFIG => $pi_config }; $td = start_script($cmd, $env, $rdr); }; } # BEGIN public-inbox-1.6.1/xt/mem-imapd-tls.t000066400000000000000000000155371377346120300174210ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ # Idle client memory usage test, particularly after EXAMINE when # Message Sequence Numbers are loaded use strict; use Test::More; use Socket qw(SOCK_STREAM IPPROTO_TCP SOL_SOCKET); use PublicInbox::TestCommon; use PublicInbox::Syscall qw(:epoll); use PublicInbox::DS; require_mods(qw(DBD::SQLite Email::Address::XS||Mail::Address Parse::RecDescent)); my $inboxdir = $ENV{GIANT_INBOX_DIR}; my $TEST_TLS; SKIP: { require_mods('IO::Socket::SSL', 1); $TEST_TLS = $ENV{TEST_TLS} // 1; }; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; diag 'TEST_COMPRESS='.($ENV{TEST_COMPRESS} // 1) . " TEST_TLS=$TEST_TLS"; my ($cert, $key) = qw(certs/server-cert.pem certs/server-key.pem); if ($TEST_TLS) { if (!-r $key || !-r $cert) { plan skip_all => "certs/ missing for $0, run ./certs/create-certs.perl"; } use_ok 'PublicInbox::TLS'; } my ($tmpdir, $for_destroy) = tmpdir(); my ($out, $err) = ("$tmpdir/stdout.log", "$tmpdir/stderr.log"); my $pi_config = "$tmpdir/pi_config"; my $group = 'inbox.test'; local $SIG{PIPE} = 'IGNORE'; # for IMAPC (below) my $imaps = tcp_server(); { open my $fh, '>', $pi_config or die "open: $!\n"; print $fh <sockhost . ':' . $imaps->sockport; my $env = { PI_CONFIG => $pi_config }; my $arg = $TEST_TLS ? [ "-limaps://$imaps_addr/?cert=$cert,key=$key" ] : []; my $cmd = [ '-imapd', '-W0', @$arg, "--stdout=$out", "--stderr=$err" ]; # run_mode=0 ensures Test::More FDs don't get shared my $td = start_script($cmd, $env, { 3 => $imaps, run_mode => 0 }); my %ssl_opt; if ($TEST_TLS) { %ssl_opt = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); my $ctx = IO::Socket::SSL::SSL_Context->new(%ssl_opt); # cf. https://rt.cpan.org/Ticket/Display.html?id=129463 my $mode = eval { Net::SSLeay::MODE_RELEASE_BUFFERS() }; if ($mode && $ctx->{context}) { eval { Net::SSLeay::CTX_set_mode($ctx->{context}, $mode) }; warn "W: $@ (setting SSL_MODE_RELEASE_BUFFERS)\n" if $@; } $ssl_opt{SSL_reuse_ctx} = $ctx; $ssl_opt{SSL_startHandshake} = 0; } chomp(my $nfd = `/bin/sh -c 'ulimit -n'`); $nfd -= 10; ok($nfd > 0, 'positive FD count'); my $MAX_FD = 10000; $nfd = $MAX_FD if $nfd >= $MAX_FD; our $DONE = 0; sub once { 0 }; # stops event loop # setup the event loop so that it exits at every step # while we're still doing connect(2) PublicInbox::DS->SetLoopTimeout(0); PublicInbox::DS->SetPostLoopCallback(\&once); my $pid = $td->{pid}; if ($^O eq 'linux' && open(my $f, '<', "/proc/$pid/status")) { diag(grep(/RssAnon/, <$f>)); } foreach my $n (1..$nfd) { my $io = tcp_connect($imaps, Blocking => 0); $io = IO::Socket::SSL->start_SSL($io, %ssl_opt) if $TEST_TLS; IMAPC->new($io); # one step through the event loop # do a little work as we connect: PublicInbox::DS->EventLoop; # try not to overflow the listen() backlog: if (!($n % 128) && $DONE != $n) { diag("nr: ($n) $DONE/$nfd"); PublicInbox::DS->SetLoopTimeout(-1); PublicInbox::DS->SetPostLoopCallback(sub { $DONE != $n }); # clear the backlog: PublicInbox::DS->EventLoop; # resume looping PublicInbox::DS->SetLoopTimeout(0); PublicInbox::DS->SetPostLoopCallback(\&once); } } # run the event loop normally, now: diag "done?: @".time." $DONE/$nfd"; if ($DONE != $nfd) { PublicInbox::DS->SetLoopTimeout(-1); PublicInbox::DS->SetPostLoopCallback(sub { $DONE != $nfd }); PublicInbox::DS->EventLoop; } is($nfd, $DONE, "$nfd/$DONE done"); if ($^O eq 'linux' && open(my $f, '<', "/proc/$pid/status")) { diag(grep(/RssAnon/, <$f>)); diag " SELF lsof | wc -l ".`lsof -p $$ |wc -l`; diag "SERVER lsof | wc -l ".`lsof -p $pid |wc -l`; } PublicInbox::DS->Reset; $td->kill; $td->join; is($?, 0, 'no error in exited process'); done_testing; package IMAPC; use strict; use parent qw(PublicInbox::DS); # fields: step: state machine, zin: Zlib inflate context use PublicInbox::Syscall qw(EPOLLIN EPOLLOUT EPOLLONESHOT); use Errno qw(EAGAIN); # determines where we start event_step use constant FIRST_STEP => ($ENV{TEST_COMPRESS} // 1) ? -2 : 0; # return true if complete, false if incomplete (or failure) sub connect_tls_step { my ($self) = @_; my $sock = $self->{sock} or return; return 1 if $sock->connect_SSL; return $self->drop("$!") if $! != EAGAIN; if (my $ev = PublicInbox::TLS::epollbit()) { unshift @{$self->{wbuf}}, \&connect_tls_step; PublicInbox::DS::epwait($sock, $ev | EPOLLONESHOT); 0; } else { $self->drop('BUG? EAGAIN but '.PublicInbox::TLS::err()); } } sub event_step { my ($self) = @_; # TLS negotiation happens in flush_write via {wbuf} return unless $self->flush_write && $self->{sock}; if ($self->{step} == -2) { $self->do_read(\(my $buf = ''), 128) or return; $buf =~ /\A\* OK / or die 'no greeting'; $self->{step} = -1; $self->write(\"1 COMPRESS DEFLATE\r\n"); } if ($self->{step} == -1) { $self->do_read(\(my $buf = ''), 128) or return; $buf =~ /\A1 OK / or die "no compression $buf"; IMAPCdeflate->enable($self); $self->{step} = 1; $self->write(\"2 EXAMINE inbox.test.0\r\n"); } if ($self->{step} == 0) { $self->do_read(\(my $buf = ''), 128) or return; $buf =~ /\A\* OK / or die 'no greeting'; $self->{step} = 1; $self->write(\"2 EXAMINE inbox.test.0\r\n"); } if ($self->{step} == 1) { my $buf = ''; until ($buf =~ /^2 OK \[READ-ONLY/ms) { $self->do_read(\$buf, 4096, length($buf)) or return; } $self->{step} = 2; $self->write(\"3 UID FETCH 1 (UID FLAGS)\r\n"); } if ($self->{step} == 2) { my $buf = ''; until ($buf =~ /^3 OK /ms) { $self->do_read(\$buf, 4096, length($buf)) or return; } $self->{step} = 3; $self->write(\"4 IDLE\r\n"); } if ($self->{step} == 3) { $self->do_read(\(my $buf = ''), 128) or return; no warnings 'once'; $::DONE++; $self->{step} = 5; # all done } else { warn "$self->{step} Should never get here $self"; } } sub new { my ($class, $io) = @_; my $self = bless { step => FIRST_STEP }, $class; if ($io->can('connect_SSL')) { $self->{wbuf} = [ \&connect_tls_step ]; } # wait for connect(), and maybe SSL_connect() $self->SUPER::new($io, EPOLLOUT|EPOLLONESHOT); } 1; package IMAPCdeflate; use strict; our @ISA; use Compress::Raw::Zlib; use PublicInbox::IMAPdeflate; my %ZIN_OPT; BEGIN { @ISA = qw(IMAPC); %ZIN_OPT = ( -WindowBits => -15, -AppendOutput => 1 ); *write = \&PublicInbox::IMAPdeflate::write; *do_read = \&PublicInbox::IMAPdeflate::do_read; }; sub enable { my ($class, $self) = @_; my ($in, $err) = Compress::Raw::Zlib::Inflate->new(%ZIN_OPT); die "Inflate->new failed: $err" if $err != Z_OK; bless $self, $class; $self->{zin} = $in; } 1; public-inbox-1.6.1/xt/mem-msgview.t000066400000000000000000000047241377346120300172040ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ # Note: this may be altered as-needed to demonstrate improvements. # See history in git for this file. use strict; use IO::Handle; # ->flush use Fcntl qw(SEEK_SET); use PublicInbox::TestCommon; use PublicInbox::Tmpfile; use Test::More; my @mods = qw(DBD::SQLite BSD::Resource PublicInbox::WWW); require_mods(@mods); use_ok($_) for @mods; my $lines = $ENV{NR_LINES} // 50000; diag "NR_LINES=$lines"; my ($tmpdir, $for_destroy) = tmpdir(); my $inboxname = 'big'; my $inboxdir = "$tmpdir/big"; local $ENV{PI_CONFIG} = "$tmpdir/cfg"; my $mid = 'test@example.com'; { # setup open my $fh, '>', "$tmpdir/cfg" or die; print $fh < To: Nikki <%s> Date: Tue, 3 May 1988 00:00:00 +0000 Subject: todo Message-ID: <%s> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="FOO" Content-Disposition: inline --FOO Content-Type: text/plain; charset=utf-8 Content-Disposition: inline EOF print $fh $hdr or die; for (0..$lines) { print $fh 'x' x 72, "\n" or die } print $fh <flush or die; sysseek($fh, 0, SEEK_SET) or die; my $env = { ORIGINAL_RECIPIENT => $addr }; my $err = ''; my $opt = { 0 => $fh, 2 => \$err, run_mode => 0 }; ok(run_script([qw(-mda --no-precheck)], $env, $opt), 'message delivered'); } my $www = PublicInbox::WWW->new; my $env = { PATH_INFO => "/$inboxname/$mid/", REQUEST_URI => "/$inboxname/$mid/", SCRIPT_NAME => '', QUERY_STRING => '', REQUEST_METHOD => 'GET', HTTP_HOST => 'example.com', 'psgi.errors' => \*STDERR, 'psgi.url_scheme' => 'http', }; my $ru_before = BSD::Resource::getrusage(); my $res = $www->call($env); my $body = $res->[2]; while (defined(my $x = $body->getline)) { } $body->close; my $ru_after = BSD::Resource::getrusage(); my $diff = $ru_after->maxrss - $ru_before->maxrss; diag "before: ${\$ru_before->maxrss} => ${\$ru_after->maxrss} diff=$diff kB"; done_testing(); public-inbox-1.6.1/xt/msgtime_cmp.t000066400000000000000000000110551377346120300172460ustar00rootroot00000000000000#!perl -w # Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Eml; use PublicInbox::Inbox; use PublicInbox::Git; use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp); use POSIX qw(strftime); require_mods('Date::Parse'); my $git; my ($inboxdir, $git_dir) = @ENV{qw(GIANT_INBOX_DIR GIANT_GIT_DIR)}; if (defined $inboxdir) { my $ibx = { inboxdir => $inboxdir, name => 'name' }; $git = PublicInbox::Inbox->new($ibx)->git; } elsif (defined $git_dir) { # sometimes just an old epoch is enough, since newer filters are nicer $git = PublicInbox::Git->new($git_dir); } else { plan skip_all => "GIANT_INBOX_DIR not set for $0"; } my @cat = qw(cat-file --buffer --batch-check --batch-all-objects); if (require_git(2.19, 1)) { push @cat, '--unordered'; } else { warn "git <2.19, cat-file lacks --unordered, locality suffers\n"; } # millions of "ok" lines are noise, just show mismatches: sub quiet_is_deeply ($$$$$) { my ($cur, $old, $func, $oid, $hdr) = @_; if ((scalar(@$cur) != 2) || (scalar(@$old) == 2 && ($old->[0] != $cur->[0]) || ($old->[1] != $cur->[1]))) { for ($cur, $old) { $_->[2] = strftime('%Y-%m-%d %k:%M:%S', gmtime($_->[0])) } is_deeply($cur, $old, "$func $oid"); diag('got: ', explain($cur)); diag('exp: ', explain($old)); diag $hdr->as_string; } } sub compare { my ($bref, $oid, $type, $size) = @_; local $SIG{__WARN__} = sub { diag "$oid: ", @_ }; my $mime = PublicInbox::Eml->new($$bref); my $hdr = $mime->header_obj; my @cur = msg_datestamp($hdr); my @old = Old::msg_datestamp($hdr); quiet_is_deeply(\@cur, \@old, 'datestamp', $oid, $hdr); @cur = msg_timestamp($hdr); @old = Old::msg_timestamp($hdr); quiet_is_deeply(\@cur, \@old, 'timestamp', $oid, $hdr); } my $fh = $git->popen(@cat); while (<$fh>) { my ($oid, $type) = split / /; next if $type ne 'blob'; $git->cat_async($oid, \&compare); } $git->cat_async_wait; ok(1); done_testing; # old date/time-related functions package Old; use strict; use warnings; sub str2date_zone ($) { my ($date) = @_; my $ts = Date::Parse::str2time($date); return undef unless(defined $ts); # off is the time zone offset in seconds from GMT my ($ss,$mm,$hh,$day,$month,$year,$off) = Date::Parse::strptime($date); # new behavior which wasn't in the original old version: if ('commit d857e7dc0d816b635a7ead09c3273f8c2d2434be') { # "msgtime: assume +0000 if TZ missing when using Date::Parse" $off //= '+0000'; } return undef unless(defined $off); # Compute the time zone from offset my $sign = ($off < 0) ? '-' : '+'; my $hour = abs(int($off / 3600)); my $min = ($off / 60) % 60; # deal with weird offsets like '-0420' properly $min = 60 - $min if ($min && $off < 0); my $zone = sprintf('%s%02d%02d', $sign, $hour, $min); # "-1200" is the furthest westermost zone offset, # but git fast-import is liberal so we use "-1400" if ($zone >= 1400 || $zone <= -1400) { warn "bogus TZ offset: $zone, ignoring and assuming +0000\n"; $zone = '+0000'; } [$ts, $zone]; } sub time_response ($) { my ($ret) = @_; wantarray ? @$ret : $ret->[0]; } sub msg_received_at ($) { my ($hdr) = @_; # PublicInbox::Eml my @recvd = $hdr->header_raw('Received'); my ($ts); foreach my $r (@recvd) { $r =~ /\s*([0-9]+\s+[a-zA-Z]+\s+[0-9]{2,4}\s+ [0-9]+[^0-9][0-9]+(?:[^0-9][0-9]+) \s+([\+\-][0-9]+))/sx or next; $ts = eval { str2date_zone($1) } and return $ts; my $mid = $hdr->header_raw('Message-ID'); warn "no date in $mid Received: $r\n"; } undef; } sub msg_date_only ($) { my ($hdr) = @_; # PublicInbox::Eml my @date = $hdr->header_raw('Date'); my ($ts); foreach my $d (@date) { # Y2K problems: 3-digit years $d =~ s!([A-Za-z]{3}) ([0-9]{3}) ([0-9]{2}:[0-9]{2}:[0-9]{2})! my $yyyy = $2 + 1900; "$1 $yyyy $3"!e; $ts = eval { str2date_zone($d) } and return $ts; if ($@) { my $mid = $hdr->header_raw('Message-ID'); warn "bad Date: $d in $mid: $@\n"; } } undef; } # Favors Received header for sorting globally sub msg_timestamp ($) { my ($hdr) = @_; # PublicInbox::Eml my $ret; $ret = msg_received_at($hdr) and return time_response($ret); $ret = msg_date_only($hdr) and return time_response($ret); wantarray ? (time, '+0000') : time; } # Favors the Date: header for display and sorting within a thread sub msg_datestamp ($) { my ($hdr) = @_; # PublicInbox::Eml my $ret; $ret = msg_date_only($hdr) and return time_response($ret); $ret = msg_received_at($hdr) and return time_response($ret); wantarray ? (time, '+0000') : time; } 1; public-inbox-1.6.1/xt/nntpd-validate.t000066400000000000000000000137771377346120300176710ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ # Integration test to validate compression. use strict; use warnings; use Test::More; use Symbol qw(gensym); use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC); use POSIX qw(_exit); use PublicInbox::TestCommon; my $inbox_dir = $ENV{GIANT_INBOX_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inbox_dir; my $mid = $ENV{TEST_MID}; # Net::NNTP is part of the standard library, but distros may split it off... require_mods(qw(DBD::SQLite Net::NNTP Compress::Raw::Zlib)); my $test_compress = Net::NNTP->can('compress'); if (!$test_compress) { diag 'Your Net::NNTP does not yet support compression'; diag 'See: https://rt.cpan.org/Ticket/Display.html?id=129967'; } my $test_tls = $ENV{TEST_SKIP_TLS} ? 0 : eval { require IO::Socket::SSL }; my $cert = 'certs/server-cert.pem'; my $key = 'certs/server-key.pem'; if ($test_tls && !-r $key || !-r $cert) { plan skip_all => "certs/ missing for $0, run $^X ./certs/create-certs.perl"; } my ($tmpdir, $ftd) = tmpdir(); $File::Temp::KEEP_ALL = !!$ENV{TEST_KEEP_TMP}; my (%OPT, $td, $host_port, $group); my $batch = 1000; if (($ENV{NNTP_TEST_URL} // '') =~ m!\Anntp://([^/]+)/([^/]+)\z!) { ($host_port, $group) = ($1, $2); $host_port .= ":119" unless index($host_port, ':') > 0; } else { make_local_server(); } my $test_article = $ENV{TEST_ARTICLE} // 0; my $test_xover = $ENV{TEST_XOVER} // 1; if ($test_tls) { my $nntp = Net::NNTP->new($host_port, %OPT); ok($nntp->starttls, 'STARTTLS works'); ok($nntp->compress, 'COMPRESS works') if $test_compress; ok($nntp->quit, 'QUIT after starttls OK'); } if ($test_compress) { my $nntp = Net::NNTP->new($host_port, %OPT); ok($nntp->compress, 'COMPRESS works'); ok($nntp->quit, 'QUIT after compress OK'); } sub do_get_all { my ($methods) = @_; my $desc = join(',', @$methods); my $t0 = clock_gettime(CLOCK_MONOTONIC); my $dig = Digest::SHA->new(1); my $digfh = gensym; my $tmpfh; if ($File::Temp::KEEP_ALL) { open $tmpfh, '>', "$tmpdir/$desc.raw" or die $!; } my $tmp = { dig => $dig, tmpfh => $tmpfh }; tie *$digfh, 'DigestPipe', $tmp; my $nntp = Net::NNTP->new($host_port, %OPT); $nntp->article("<$mid>", $digfh) if $mid; foreach my $m (@$methods) { my $res = $nntp->$m; print STDERR "# $m got $res ($desc)\n" if !$res; } $nntp->article("<$mid>", $digfh) if $mid; my ($num, $first, $last) = $nntp->group($group); unless (defined $num && defined $first && defined $last) { warn "Invalid group\n"; return undef; } my $i; for ($i = $first; $i < $last; $i += $batch) { my $j = $i + $batch - 1; $j = $last if $j > $last; if ($test_xover) { my $xover = $nntp->xover("$i-$j"); for my $n (sort { $a <=> $b } keys %$xover) { my $line = join("\t", @{$xover->{$n}}); $line =~ tr/\r//d; $dig->add("$n\t".$line); } } if ($test_article) { for my $n ($i..$j) { $nntp->article($n, $digfh) and next; next if $nntp->code == 423; my $res = $nntp->code.' '. $nntp->message; $res =~ tr/\r\n//d; print STDERR "# Article $n ($desc): $res\n"; } } } # hacky bytes_read thing added to Net::NNTP for testing: my $bytes_read = ''; if ($nntp->can('bytes_read')) { $bytes_read .= ' '.$nntp->bytes_read.'b'; } my $q = $nntp->quit; print STDERR "# quit failed: ".$nntp->code."\n" if !$q; my $elapsed = sprintf('%0.3f', clock_gettime(CLOCK_MONOTONIC) - $t0); my $res = $dig->hexdigest; print STDERR "# $desc - $res (${elapsed}s)$bytes_read\n"; $res; } my @tests = ([]); push @tests, [ 'compress' ] if $test_compress; push @tests, [ 'starttls' ] if $test_tls; push @tests, [ 'starttls', 'compress' ] if $test_tls && $test_compress; my (@keys, %thr, %res); for my $m (@tests) { my $key = join(',', @$m); push @keys, $key; pipe(my ($r, $w)) or die; my $pid = fork; if ($pid == 0) { close $r or die; my $res = do_get_all($m); print $w $res or die; $w->flush; _exit(0); } close $w or die; $thr{$key} = [ $pid, $r ]; } for my $key (@keys) { my ($pid, $r) = @{delete $thr{$key}}; local $/; $res{$key} = <$r>; defined $res{$key} or die "nothing for $key"; my $w = waitpid($pid, 0); defined($w) or die; $w == $pid or die "waitpid($pid) != $w)"; is($?, 0, "`$key' exited successfully") } my $plain = $res{''}; ok($plain, "plain got $plain"); is($res{$_}, $plain, "$_ matches '' result") for @keys; done_testing(); sub make_local_server { require PublicInbox::Inbox; $group = 'inbox.test.perf.nntpd'; my $ibx = { inboxdir => $inbox_dir, newsgroup => $group }; $ibx = PublicInbox::Inbox->new($ibx); my $pi_config = "$tmpdir/config"; { open my $fh, '>', $pi_config or die "open($pi_config): $!"; print $fh <<"" or die "print $pi_config: $!"; [publicinbox "test"] newsgroup = $group inboxdir = $inbox_dir address = test\@example.com close $fh or die "close($pi_config): $!"; } my ($out, $err) = ("$tmpdir/out", "$tmpdir/err"); for ($out, $err) { open my $fh, '>', $_ or die "truncate: $!"; } my $sock = tcp_server(); ok($sock, 'sock created'); $host_port = $sock->sockhost . ':' . $sock->sockport; # not using multiple workers, here, since we want to increase # the chance of tripping concurrency bugs within PublicInbox/NNTP*.pm my $cmd = [ '-nntpd', "--stdout=$out", "--stderr=$err", '-W0' ]; push @$cmd, "-lnntp://$host_port"; if ($test_tls) { push @$cmd, "--cert=$cert", "--key=$key"; %OPT = ( SSL_hostname => 'server.local', SSL_verifycn_name => 'server.local', SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_PEER(), SSL_ca_file => 'certs/test-ca.pem', ); } print STDERR "# CMD ". join(' ', @$cmd). "\n"; my $env = { PI_CONFIG => $pi_config }; $td = start_script($cmd, $env, { 3 => $sock }); } package DigestPipe; use strict; use warnings; sub TIEHANDLE { my ($class, $self) = @_; bless $self, $class; } sub PRINT { my $self = shift; my $data = join('', @_); # Net::NNTP emit different line-endings depending on TLS or not...: $data =~ tr/\r//d; $self->{dig}->add($data); if (my $tmpfh = $self->{tmpfh}) { print $tmpfh $data; } 1; } 1; public-inbox-1.6.1/xt/perf-msgview.t000066400000000000000000000031141377346120300173520ustar00rootroot00000000000000# Copyright (C) 2019-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Benchmark qw(:all); use PublicInbox::Inbox; use PublicInbox::View; use PublicInbox::TestCommon; my $inboxdir = $ENV{GIANT_INBOX_DIR} // $ENV{GIANT_PI_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my @cat = qw(cat-file --buffer --batch-check --batch-all-objects); if (require_git(2.19, 1)) { push @cat, '--unordered'; } else { warn "git <2.19, cat-file lacks --unordered, locality suffers\n"; } require_mods qw(Plack::Util); use_ok 'Plack::Util'; my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'name' }); my $git = $ibx->git; my $fh = $git->popen(@cat); my $vec = ''; vec($vec, fileno($fh), 1) = 1; select($vec, undef, undef, 60) or die "timed out waiting for --batch-check"; my $ctx = { env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' }, -inbox => $ibx, www => Plack::Util::inline_object(style => sub {''}), }; my ($mime, $res, $oid, $type); my $n = 0; my $obuf = ''; my $m = 0; my $cb = sub { $mime = PublicInbox::Eml->new(shift); PublicInbox::View::multipart_text_as_html($mime, $ctx); ++$m; $obuf = ''; }; my $t = timeit(1, sub { $ctx->{obuf} = \$obuf; $ctx->{mhref} = '../'; while (<$fh>) { ($oid, $type) = split / /; next if $type ne 'blob'; ++$n; $git->cat_async($oid, $cb); } $git->cat_async_wait; }); diag 'multipart_text_as_html took '.timestr($t)." for $n <=> $m messages"; is($m, $n, 'rendered all messages'); done_testing(); public-inbox-1.6.1/xt/perf-nntpd.t000066400000000000000000000052571377346120300170260ustar00rootroot00000000000000# Copyright (C) 2018-2020 all contributors # License: AGPL-3.0+ use strict; use warnings; use Test::More; use Benchmark qw(:all :hireswallclock); use PublicInbox::Inbox; use Net::NNTP; my $inboxdir = $ENV{GIANT_INBOX_DIR} // $ENV{GIANT_PI_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless defined($inboxdir); my ($host_port, $group, %opts, $s, $td, $tmp_obj); use PublicInbox::TestCommon; if (($ENV{NNTP_TEST_URL} || '') =~ m!\Anntp://([^/]+)/([^/]+)\z!) { ($host_port, $group) = ($1, $2); $host_port .= ":119" unless index($host_port, ':') > 0; } else { $group = 'inbox.test.perf.nntpd'; my $ibx = { inboxdir => $inboxdir, newsgroup => $group }; $ibx = PublicInbox::Inbox->new($ibx); my $tmpdir; ($tmpdir, $tmp_obj) = tmpdir(); my $pi_config = "$tmpdir/config"; { open my $fh, '>', $pi_config or die "open($pi_config): $!"; print $fh <<"" or die "print $pi_config: $!"; [publicinbox "test"] newsgroup = $group inboxdir = $inboxdir address = test\@example.com close $fh or die "close($pi_config): $!"; } my $sock = tcp_server(); ok($sock, 'sock created'); my $cmd = [ '-nntpd', '-W0' ]; $td = start_script($cmd, { PI_CONFIG => $pi_config }, { 3 => $sock }); $host_port = $sock->sockhost . ':' . $sock->sockport; } %opts = ( PeerAddr => $host_port, Proto => 'tcp', Timeout => 1, ); $s = IO::Socket::INET->new(%opts); $s->autoflush(1); my $buf = $s->getline; like($buf, qr/\A201 .* ready - post via email\r\n/s, 'got greeting'); my $t = timeit(10, sub { ok($s->print("GROUP $group\r\n"), 'changed group'); $buf = $s->getline; }); diag 'GROUP took: ' . timestr($t); my ($tot, $min, $max) = ($buf =~ /\A211 (\d+) (\d+) (\d+) /); ok($tot && $min && $max, 'got GROUP response'); my $nr = $max - $min; my $nmax = 50000; my $nmin = $max - $nmax; $nmin = $min if $nmin < $min; my $res; my $spec = "$nmin-$max"; my $n; sub read_until_dot ($) { my $n = 0; do { $buf = $s->getline; ++$n } until $buf eq ".\r\n"; $n; } $t = timeit(1, sub { $s->print("XOVER $spec\r\n"); $n = read_until_dot($s); }); diag 'xover took: ' . timestr($t) . " for $n"; $t = timeit(1, sub { $s->print("HDR From $spec\r\n"); $n = read_until_dot($s); }); diag "XHDR From ". timestr($t) . " for $n"; my $date = $ENV{NEWNEWS_DATE}; unless ($date) { my (undef, undef, undef, $d, $m, $y) = gmtime(time - 30 * 86400); $date = sprintf('%04u%02u%02u', $y + 1900, $m + 1, $d); diag "NEWNEWS_DATE undefined, using $date"; } $t = timeit(1, sub { $s->print("NEWNEWS * $date 000000 GMT\r\n"); $n = read_until_dot($s); }); diag 'newnews took: ' . timestr($t) . " for $n"; if ($s) { $s->print("QUIT\r\n"); $s->getline; } done_testing(); 1; public-inbox-1.6.1/xt/perf-threading.t000066400000000000000000000017111377346120300176370ustar00rootroot00000000000000# Copyright (C) 2016-2020 all contributors # License: AGPL-3.0+ # # real-world testing of search threading use strict; use warnings; use Test::More; use Benchmark qw(:all); use PublicInbox::Inbox; my $inboxdir = $ENV{GIANT_INBOX_DIR} // $ENV{GIANT_PI_DIR}; plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir }); eval { require PublicInbox::Search }; my $srch = $ibx->search; plan skip_all => "$inboxdir not configured for search $0 $@" unless $srch; require PublicInbox::View; my $msgs; my $elapsed = timeit(1, sub { $msgs = $ibx->over->recent({limit => 200000}); }); my $n = scalar(@$msgs); ok($n, 'got some messages'); diag "enquire: ".timestr($elapsed)." for $n"; $elapsed = timeit(1, sub { PublicInbox::View::thread_results({-inbox => $ibx}, $msgs); }); diag "thread_results ".timestr($elapsed); done_testing(); public-inbox-1.6.1/xt/solver.t000066400000000000000000000041561377346120300162600ustar00rootroot00000000000000#!perl -w # Copyright (C) 2020 all contributors # License: AGPL-3.0+ use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Config; # this relies on PI_CONFIG // ~/.public-inbox/config my @psgi = qw(HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); require_mods(qw(DBD::SQLite Search::Xapian), @psgi); use_ok($_) for @psgi; use_ok 'PublicInbox::WWW'; my $cfg = PublicInbox::Config->new; my $www = PublicInbox::WWW->new($cfg); my $app = sub { my $env = shift; $env->{'psgi.errors'} = \*STDERR; $www->call($env); }; # TODO: convert these to self-contained test cases my $todo = { 'git' => [ '9e9048b02bd04d287461543d85db0bb715b89f8c' .'/s/?b=t%2Ft3420%2Fremove-ids.sed', 'eebf7a8/s/?b=t%2Ftest-lib.sh', 'eb580ca513/s/?b=remote-odb.c', '776fa90f7f/s/?b=contrib/git-jump/git-jump', '5cd8845/s/?b=submodule.c', '81c1164ae5/s/?b=builtin/log.c', '6aa8857a11/s/?b=protocol.c', '96f1c7f/s/', # TODO: b=contrib/completion/git-completion.bash 'b76f2c0/s/?b=po/zh_CN.po', ], }; my ($ibx_name, $urls, @gone); my $client = sub { my ($cb) = @_; for (@$urls) { my $url = "/$ibx_name/$_"; my $res = $cb->(GET($url)); is($res->code, 200, $url); next if $res->code == 200; # diag $res->content; diag "$url failed"; } }; my $nr = 0; while (($ibx_name, $urls) = each %$todo) { SKIP: { if (!$cfg->lookup_name($ibx_name)) { push @gone, $ibx_name; skip("$ibx_name not configured", scalar(@$urls)); } test_psgi($app, $client); $nr++; } } SKIP: { require_mods(qw(Plack::Test::ExternalServer), $nr); delete @$todo{@gone}; my $sock = tcp_server() or BAIL_OUT $!; my ($tmpdir, $for_destroy) = tmpdir(); my ($out, $err) = map { "$tmpdir/std$_.log" } qw(out err); my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, undef, { 3 => $sock }); my ($h, $p) = ($sock->sockhost, $sock->sockport); local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; while (($ibx_name, $urls) = each %$todo) { Plack::Test::ExternalServer::test_psgi(client => $client); } } done_testing();