pax_global_header00006660000000000000000000000064142117505720014516gustar00rootroot0000000000000052 comment=e5e9a61fccfd7eff1475bd209e5db11824d88369 lrzip-0.651/000077500000000000000000000000001421175057200126715ustar00rootroot00000000000000lrzip-0.651/.github/000077500000000000000000000000001421175057200142315ustar00rootroot00000000000000lrzip-0.651/.github/workflows/000077500000000000000000000000001421175057200162665ustar00rootroot00000000000000lrzip-0.651/.github/workflows/c-cpp.yml000066400000000000000000000010661421175057200200160ustar00rootroot00000000000000name: check_build on: push: branches: [ master ] pull_request: branches: [ master ] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: prepare repo run: git fetch --prune --unshallow - name: autogen run: ./autogen.sh - name: install liblzo2-dev run: sudo apt install -y liblzo2-dev - name: install liblz4-dev run: sudo apt install -y liblz4-dev - name: configure run: ./configure - name: make run: make - name: make check run: make check lrzip-0.651/.gitignore000066400000000000000000000006351421175057200146650ustar00rootroot00000000000000*~ *.o *.lo config.* Makefile Makefile.in .deps .libs *.la aclocal.m4 autom4te.cache/ configure depcomp install-sh libtool lrzip lrzip*.tar.bz2 lrzip*.tar.gz lrzip*.tar.lrz ltmain.sh missing stamp-h1 libtool.m4 ltoptions.m4 ltsugar.m4 ltversion.m4 lt~obsolete.m4 compile man/lrunzip.1 man/lrzcat.1 man/lrztar.1 man/lrzuntar.1 man/lrz.1 libzpaq/.dirstamp lrzip.pc regressiontest.out decompress_demo liblrzip_demo lrzip-0.651/AUTHORS000066400000000000000000000016011421175057200137370ustar00rootroot00000000000000Original concept and programming by Con Kolivas Beginning with version 0.19, Peter Hyman submitted bug fixes, patches, multi-threading support, assembler integration, SDK updating, and autoconf improvements. Thanks to: Andrew Tridgell for rzip Markus Oberhumer for lzo Igor Pavlov for lzma and CRC Assembler code Jean-loup Gailly and Mark Adler for the zlib compression library Christian Leber for lzma compat layer Lasse Collin for debugging the compat layer Michael J Cohen for Darwin support Jukka Laurila for newer Darwin support George Makrydakis for lrztar, lrzuntar Jari Aalto for documentation and typos and git help Jon Tibble for nasm tests & Solaris support Michael Blumenkrantz for updated autotools and liblrzip! Serge Belyshev for encryption help and code Ulrich Drepper for MD5 implementation PolarSSL authors for sha512 + aes128 implementation Fernando Auil for lrzip completion lrzip-0.651/BUGS000066400000000000000000000003331421175057200133530ustar00rootroot00000000000000BUGME Issues can be reported/tracked here: https://github.com/ckolivas/lrzip/issues Known issues: Mac may not be able to work with STDIN/STDOUT on very large files. MD5 is disabled on Mac due to not working properly. lrzip-0.651/COPYING000066400000000000000000000432541421175057200137340ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. lrzip-0.651/ChangeLog000066400000000000000000000001121421175057200144350ustar00rootroot00000000000000Check git for changelog: https://github.com/ckolivas/lrzip/commits/master lrzip-0.651/INSTALL000066400000000000000000000363321421175057200137310ustar00rootroot00000000000000Installation Instructions ************************* Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without warranty of any kind. Basic Installation ================== Briefly, the shell commands `./configure; make; make install' should configure, build, and install this package. The following more-detailed instructions are generic; see the `README' file for instructions specific to this package. Some packages provide this `INSTALL' file but do not implement all of the features documented below. The lack of an optional feature in a given package is not necessarily a bug. More recommendations for GNU packages can be found in *note Makefile Conventions: (standards)Makefile Conventions. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. Caching is disabled by default to prevent problems with accidental use of stale cache files. If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. Running `configure' might take a while. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package, generally using the just-built uninstalled binaries. 4. Type `make install' to install the programs and any data files and documentation. When installing into a prefix owned by root, it is recommended that the package be configured and built as a regular user, and only the `make install' phase executed with root privileges. 5. Optionally, type `make installcheck' to repeat any self-tests, but this time using the binaries in their final installed location. This target does not install anything. Running this target as a regular user, particularly if the prior `make install' required root privileges, verifies that the installation completed correctly. 6. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. 7. Often, you can also type `make uninstall' to remove the installed files again. In practice, not all packages have tested that uninstallation works correctly, even though it is required by the GNU Coding Standards. 8. Some packages, particularly those that use Automake, provide `make distcheck', which can by used by developers to test that all other targets like `make install' and `make uninstall' work correctly. This target is generally not run by end users. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c99 CFLAGS=-g LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you can use GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. This is known as a "VPATH" build. With a non-GNU `make', it is safer to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. On MacOS X 10.5 and later systems, you can create libraries and executables that work on multiple system types--known as "fat" or "universal" binaries--by specifying multiple `-arch' options to the compiler but only a single `-arch' option to the preprocessor. Like this: ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CPP="gcc -E" CXXCPP="g++ -E" This is not guaranteed to produce working output in all cases, you may have to build one architecture at a time and combine the results using the `lipo' tool if you have problems. Installation Names ================== By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PREFIX', where PREFIX must be an absolute file name. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you pass the option `--exec-prefix=PREFIX' to `configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. In general, the default for these options is expressed in terms of `${prefix}', so that specifying just `--prefix' will affect all of the other directory specifications that were not explicitly provided. The most portable way to affect installation locations is to pass the correct locations to `configure'; however, many packages provide one or both of the following shortcuts of passing variable assignments to the `make install' command line to change installation locations without having to reconfigure or recompile. The first method involves providing an override variable for each affected directory. For example, `make install prefix=/alternate/directory' will choose an alternate location for all directory configuration variables that were expressed in terms of `${prefix}'. Any directories that were specified during `configure', but not in terms of `${prefix}', must each be overridden at install time for the entire installation to be relocated. The approach of makefile variable overrides for each directory variable is required by the GNU Coding Standards, and ideally causes no recompilation. However, some platforms have known limitations with the semantics of shared libraries that end up requiring recompilation when using this method, particularly noticeable in packages that use GNU Libtool. The second method involves providing the `DESTDIR' variable. For example, `make install DESTDIR=/alternate/directory' will prepend `/alternate/directory' before all installation names. The approach of `DESTDIR' overrides is not required by the GNU Coding Standards, and does not work on platforms that have drive letters. On the other hand, it does better at avoiding recompilation issues, and works well even when some directory options were not specified in terms of `${prefix}' at `configure' time. Optional Features ================= If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Some packages offer the ability to configure how verbose the execution of `make' will be. For these packages, running `./configure --enable-silent-rules' sets the default to minimal output, which can be overridden with `make V=1'; while running `./configure --disable-silent-rules' sets the default to verbose, which can be overridden with `make V=0'. Particular systems ================== On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC is not installed, it is recommended to use the following options in order to use an ANSI C compiler: ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" and if that doesn't work, install pre-built binaries of GCC for HP-UX. On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot parse its `' header file. The option `-nodtk' can be used as a workaround. If GNU CC is not installed, it is therefore recommended to try ./configure CC="cc" and if that doesn't work, try ./configure CC="cc -nodtk" On Solaris, don't put `/usr/ucb' early in your `PATH'. This directory contains several dysfunctional programs; working variants of these programs are available in `/usr/bin'. So, if you need `/usr/ucb' in your `PATH', put it _after_ `/usr/bin'. On Haiku, software installed for all users goes in `/boot/common', not `/usr/local'. It is recommended to use the following options: ./configure --prefix=/boot/common Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the option `--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc causes the specified `gcc' to be used as the C compiler (unless it is overridden in the site shell script). Unfortunately, this technique does not work for `CONFIG_SHELL' due to an Autoconf bug. Until the bug is fixed you can use this workaround: CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of all of the options to `configure', and exit. `--help=short' `--help=recursive' Print a summary of the options unique to this package's `configure', and exit. The `short' variant lists options used only in the top level, while the `recursive' variant lists options also present in any nested packages. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `--prefix=DIR' Use DIR as the installation prefix. *note Installation Names:: for more details, including other options available for fine-tuning the installation locations. `--no-create' `-n' Run the configure checks, but stop before creating any output files. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. lrzip-0.651/Lrzip.h000066400000000000000000000761431421175057200141550ustar00rootroot00000000000000/* Copyright (C) 2006-2011 Con Kolivas Copyright (C) 2011 Peter Hyman Copyright (C) 1998-2003 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef LIBLRZIP_H #define LIBLRZIP_H #include #include #include #ifdef _WIN32 # include #else # include #endif #ifdef __cplusplus extern "C" { #endif /** @brief LRZIP library @mainpage lrzip @version 1.0 @date 2011 @section intro What is LRZIP? LRZIP is a compression program optimised for large files. The larger the file and the more memory you have, the better the compression advantage this will provide, especially once the files are larger than 100MB. The advantage can be chosen to be either size (much smaller than bzip2) or speed (much faster than bzip2). * @link Lrzip.h LRZIP API @endlink */ /** @file Lrzip.h */ /** * @typedef Lrzip * @brief The overall struct for managing all operations */ typedef struct Lrzip Lrzip; /** * @typedef Lrzip_Log_Level * @brief The amount of information to display using logging functions * This enum is used when setting or getting the log level of an #Lrzip * struct. It determines how much information is shown about the current operation, * either in stdout/stderr or using logging callbacks. * @see lrzip_log_level_set() * @see lrzip_log_level_get() */ typedef enum { LRZIP_LOG_LEVEL_ERROR = 0, /**< Only display errors */ LRZIP_LOG_LEVEL_INFO, /**< Display information and errors */ LRZIP_LOG_LEVEL_PROGRESS, /**< Display progress updates, information, and errors */ LRZIP_LOG_LEVEL_VERBOSE, /**< Display verbose progress updates, information, and errors */ LRZIP_LOG_LEVEL_DEBUG /**< Display all possible information */ } Lrzip_Log_Level; /** * @typedef Lrzip_Mode * @brief The mode of operation for an #Lrzip struct * This enum is used when setting or getting the operation mode of an #Lrzip * struct. It determines what will happen when lrzip_run() is called. * @see lrzip_mode_set() * @see lrzip_mode_get() */ typedef enum { LRZIP_MODE_NONE = 0, /**< No operation set */ LRZIP_MODE_INFO, /**< Retrieve info about an archive */ LRZIP_MODE_TEST, /**< Test an archive's integrity */ LRZIP_MODE_DECOMPRESS, /**< Decompress an archive */ LRZIP_MODE_COMPRESS_NONE, /**< RZIP preprocess only */ LRZIP_MODE_COMPRESS_LZO, /**< Use LZO compression */ LRZIP_MODE_COMPRESS_ZLIB, /**< Use ZLIB (GZIP) compression */ LRZIP_MODE_COMPRESS_BZIP2, /**< Use BZIP2 compression */ LRZIP_MODE_COMPRESS_LZMA, /**< Use LZMA compression */ LRZIP_MODE_COMPRESS_ZPAQ /**< Use ZPAQ compression */ } Lrzip_Mode; /** * @typedef Lrzip_Flag * @brief The extra params for an #Lrzip struct's operations * This enum is used when setting or getting the flags of an #Lrzip * struct. It determines some of the miscellaneous extra abilities of LRZIP. * @see lrzip_flags_set() * @see lrzip_flags_get() */ typedef enum { LRZIP_FLAG_REMOVE_SOURCE = (1 << 0), /**< Remove the input file after the operation completes */ LRZIP_FLAG_REMOVE_DESTINATION = (1 << 1), /**< Remove matching destination file if it exists */ LRZIP_FLAG_KEEP_BROKEN = (1 << 2), /**< Do not remove broken files */ LRZIP_FLAG_VERIFY = (1 << 3), /**< Only verify the archive, do not perform any compression/decompression */ LRZIP_FLAG_DISABLE_LZO_CHECK = (1 << 4), /**< Disable test to determine if LZO compression will be useful */ LRZIP_FLAG_UNLIMITED_RAM = (1 << 5), /**< Use unlimited ram window size for compression */ LRZIP_FLAG_ENCRYPT = (1 << 6) /**< Encrypt archive during compression; @see lrzip_pass_cb_set() */ } Lrzip_Flag; /** * @typedef Lrzip_Info_Cb * @brief The callback to call when an operation's progress changes * @param data The data param passed in lrzip_info_cb_set() * @param pct The overall operation progress as a percent * @param chunk_pct The current chunk's operation progress as a percent */ typedef void (*Lrzip_Info_Cb)(void *data, int pct, int chunk_pct); /** * @typedef Lrzip_Log_Cb * @brief The callback to call when a log message is to be shown * @param data The data param passed in lrzip_log_cb_set() * @param level The Lrzip_Log_Level of the message * @param line The line in LRZIP code where the message originated * @param file The file in LRZIP code where the message originated * @param func The function in LRZIP code where the message originated * @param format The printf-style format of the message * @param args The matching va_list for @p format */ typedef void (*Lrzip_Log_Cb)(void *data, unsigned int level, unsigned int line, const char *file, const char *func, const char *format, va_list args); /** * @typedef Lrzip_Password_Cb * @brief The callback to call for operations requiring a password * @param data The data param passed in lrzip_pass_cb_set() * @param buffer The pre-allocated buffer to write the password into * @param buf_size The size, in bytes, of @p buffer */ typedef void (*Lrzip_Password_Cb)(void *data, char *buffer, size_t buf_size); /** * @brief Initialize liblrzip * This function must be called prior to running any other liblrzip * functions to initialize compression algorithms. It does not allocate. * @return true on success, false on failure */ bool lrzip_init(void); /** * @brief Create a new #Lrzip struct * Use this function to allocate a new struct for immediate or later use, * optionally setting flags and changing modes at a later time. * @param mode The optional Lrzip_Mode to set, or LRZIP_MODE_NONE to allow * setting a mode later. * @return The new #Lrzip struct, or NULL on failure * @see lrzip_mode_set() */ Lrzip *lrzip_new(Lrzip_Mode mode); /** * @brief Free an #Lrzip struct * Use this function to free all memory associated with an existing struct. * @param lr The struct to free */ void lrzip_free(Lrzip *lr); /** * @brief Set up an #Lrzip struct using environment settings * Use this function to acquire and utilize settings already existing in * either environment variables or configuration files for LRZIP. For more detailed * information, see the LRZIP manual. * @param lr The struct to configure * @note This function cannot fail. */ void lrzip_config_env(Lrzip *lr); /** * @brief Retrieve the operation mode of an #Lrzip struct * @param lr The struct to query * @return The Lrzip_Mode of @p lr, or LRZIP_MODE_NONE on failure */ Lrzip_Mode lrzip_mode_get(Lrzip *lr); /** * @brief Set the operation mode of an #Lrzip struct * @param lr The struct to change the mode for * @param mode The Lrzip_Mode to set for @p lr * @return true on success, false on failure */ bool lrzip_mode_set(Lrzip *lr, Lrzip_Mode mode); /** * @brief Set the compression level of an #Lrzip struct * @param lr The struct to change the compression level for * @param level The value, 1-9, to use as the compression level for operations with @p lr * @return true on success, false on failure * @note This function is only valid for compression operations */ bool lrzip_compression_level_set(Lrzip *lr, unsigned int level); /** * @brief Get the compression level of an #Lrzip struct * @param lr The struct to get the compression level of * @return The value, 1-9, used as the compression level for operations with @p lr, * or 0 on failure * @note This function is only valid for compression operations */ unsigned int lrzip_compression_level_get(Lrzip *lr); /** * @brief Set the operation specific parameters * @param lr The struct to set parameters for * @param flags A bitwise ORed set of Lrzip_Flags * @note This function does not perform any error checking. Any errors in flags * will be determined when lrzip_run() is called. */ void lrzip_flags_set(Lrzip *lr, unsigned int flags); /** * @brief Get the operation specific parameters * @param lr The struct to get parameters of * @return A bitwise ORed set of Lrzip_Flags */ unsigned int lrzip_flags_get(Lrzip *lr); /** * @brief Set the nice level for operations in a struct * @param lr The struct to set the nice level for * @param nice The value to use when nicing during operations */ void lrzip_nice_set(Lrzip *lr, int nice); /** * @brief Get the nice level for operations in a struct * @param lr The struct to get the nice level of * @return The value to use when nicing during operations */ int lrzip_nice_get(Lrzip *lr); /** * @brief Explicitly set the number of threads to use during operations * @param lr The struct to set the threads for * @param threads The number of threads to use for operations * @note LRZIP will automatically determine the optimal number of threads to use, * so this function should only be used to specify FEWER than optimal threads. */ void lrzip_threads_set(Lrzip *lr, unsigned int threads); /** * @brief Get the number of threads used during operations * @param lr The struct to query * @return The number of threads to use for operations */ unsigned int lrzip_threads_get(Lrzip *lr); /** * @brief Set the maximum compression window for operations * @param lr The struct to set the maximum compression window for * @param size The size (in hundreds of MB) to use for the maximum size of compression * chunks. * @note LRZIP will automatically determine the optimal maximum compression window to use, * so this function should only be used to specify a LOWER value. */ void lrzip_compression_window_max_set(Lrzip *lr, int64_t size); /** * @brief Get the maximum compression window for operations * @param lr The struct to query * @return The size (in hundreds of MB) to use for the maximum size of compression * chunks. */ int64_t lrzip_compression_window_max_get(Lrzip *lr); /** * @brief Return the size of the stream queue in a struct * This function returns the current count of streams added for processing * using lrzip_file_add. It always returns instantly. * @param lr The struct to query * @return The current number of streams in the queue */ unsigned int lrzip_files_count(Lrzip *lr); /** * @brief Return the size of the file queue in a struct * This function returns the current count of files added for processing * using lrzip_filename_add. It always returns instantly. * @param lr The struct to query * @return The current number of files in the queue */ unsigned int lrzip_filenames_count(Lrzip *lr); /** * @brief Return the array of the stream queue in a struct * This function returns the current queue of streams added for processing * using lrzip_file_add. It always returns instantly. * @param lr The struct to query * @return The current stream queue */ FILE **lrzip_files_get(Lrzip *lr); /** * @brief Return the array of the filename queue in a struct * This function returns the current queue of files added for processing * using lrzip_filename_add. It always returns instantly. * @param lr The struct to query * @return The current filename queue */ char **lrzip_filenames_get(Lrzip *lr); /** * @brief Add a stream (FILE) to the operation queue * This function adds a stream to the input queue. Each time lrzip_run() * is called, it will run the current operation (specified by the Lrzip_Mode) * on either a stream or file in the queue. * @param lr The struct * @param file The stream descriptor to queue * @return true on success, false on failure * @note The file queue will be fully processed prior to beginning processing * the stream queue. * @warning Any streams added to this queue MUST NOT be closed until they have * either been processed or removed from the queue! */ bool lrzip_file_add(Lrzip *lr, FILE *file); /** * @brief Remove a stream from the operation queue * This function removes a previously added stream from the operation queue by * iterating through the queue and removing the stream if found. * @param lr The struct * @param file The stream to remove * @return true only on successful removal, else false */ bool lrzip_file_del(Lrzip *lr, FILE *file); /** * @brief Pop the current head of the stream queue * This function is used to remove the current head of the stream queue. It can be called * immediately following any lrzip_run() stream operation to remove the just-processed stream. This * function modifies the stream queue array, reordering and updating the index count. * @param lr The struct to pop the stream queue of * @return The stream removed from the queue, or NULL on failure */ FILE *lrzip_file_pop(Lrzip *lr); /** * @brief Clear the stream queue * This function is used to free and reset the stream queue. The streams * themselves are untouched. * @param lr The struct */ void lrzip_files_clear(Lrzip *lr); /** * @brief Add a file to the operation queue * This function adds a file to the input queue. Each time lrzip_run() * is called, it will run the current operation (specified by the Lrzip_Mode) * on either a stream or file in the queue. * @param lr The struct * @param file The file (by absolute path) to queue * @return true on success, false on failure * @note The file queue will be fully processed prior to beginning processing * the stream queue. */ bool lrzip_filename_add(Lrzip *lr, const char *file); /** * @brief Remove a filename from the operation queue * This function removes a previously added filename from the operation queue by * iterating through the queue and removing the filename if found. * @param lr The struct * @param file The file to remove * @return true only on successful removal, else false */ bool lrzip_filename_del(Lrzip *lr, const char *file); /** * @brief Pop the current head of the file queue * This function is used to remove the current head of the file queue. It can be called * immediately following any lrzip_run() file operation to remove the just-processed file. This * function modifies the file queue array, reordering and updating the index count. * @param lr The struct to pop the filename queue of * @return The filename removed from the queue, or NULL on failure */ const char *lrzip_filename_pop(Lrzip *lr); /** * @brief Clear the file queue * This function is used to free and reset the file queue. * @param lr The struct */ void lrzip_filenames_clear(Lrzip *lr); /** * @brief Set the default suffix for LRZIP compression operations * This function is used to change the default ".lrz" suffix for operations * to @p suffix. * @param lr The struct * @param suffix The suffix to use for compression operations */ void lrzip_suffix_set(Lrzip *lr, const char *suffix); /** * @brief Get the default suffix for LRZIP compression operations * @param lr The struct * @return The suffix to use for compression operations, or NULL on failure */ const char *lrzip_suffix_get(Lrzip *lr); /** * @brief Set the output directory for operations * This function can be used to set the output directory for operations. * Files will be stored according to their basename and lrzip suffix where * applicable. * @param lr The struct * @param dir The absolute path of the output directory */ void lrzip_outdir_set(Lrzip *lr, const char *dir); /** * @brief Get the output directory for operations * @param lr The struct * @return The previously set output directory */ const char *lrzip_outdir_get(Lrzip *lr); /** * @brief Set the output stream for operations * This function can be used to set the output stream for operations. * Raw data will be written to this stream for the duration of lrzip_run(). * @param lr The struct * @param file The stream to write to * @warning @p file is NOT created by this library and must be opened by the user! */ void lrzip_outfile_set(Lrzip *lr, FILE *file); /** * @brief Get the output stream for operations * @param lr The struct * @return The previously set output stream */ FILE *lrzip_outfile_get(Lrzip *lr); /** * @brief Set the output file for operations * This function can be used to set the output file for operations. * Raw data will be written to the file with this name for the duration of lrzip_run(). * @param lr The struct * @param file The name of the file to write to */ void lrzip_outfilename_set(Lrzip *lr, const char *file); /** * @brief Get the output filename for operations * @param lr The struct * @return The previously set output filename */ const char *lrzip_outfilename_get(Lrzip *lr); /** * @brief Retrieve the MD5 digest of an LRZIP file * Use this function after calling lrzip_run() to retrieve the digest of * the processed archive. * @param lr The struct having run an operation * @return The MD5 digest of the operation's associated archive * @note The return value of this function will change after each operation */ const unsigned char *lrzip_md5digest_get(Lrzip *lr); /** * @brief Run the current operation * This function is called when all necessary parameters have been set for an operation. * The calling thread will then block until the operation has fully completed, writing * output using logging and progress callbacks and calling password callbacks as required. * @param lr The struct to run an operation with * @return true if the operation successfully completed, else false */ bool lrzip_run(Lrzip *lr); /** * @brief Set the logging level * @param lr The struct * @param level The #Lrzip_Log_Level to use */ void lrzip_log_level_set(Lrzip *lr, int level); /** * @brief Get the logging level * @param lr The struct to query * @return The #Lrzip_Log_Level of @p lr */ int lrzip_log_level_get(Lrzip *lr); /** * @brief Set a logging callback for use with all operations * This function sets an Lrzip_Log_Cb which will be called any time logging * output is to be displayed. The callback will be called as many times as the #Lrzip_Log_Level * requires. * @param lr The struct * @param cb The callback * @param log_data The data param to use in the logging callback */ void lrzip_log_cb_set(Lrzip *lr, Lrzip_Log_Cb cb, void *log_data); /** * @brief Redirect stdout log messages to another stream * This function sends any logging messages which would normally go to stdout into another stream. * Useful for when stdout is the target set by lrzip_outfile_set(). * @param lr The struct * @param out The stream to use instead of stdout */ void lrzip_log_stdout_set(Lrzip *lr, FILE *out); /** * @brief Return the stream currently used as stdout * @param lr The struct to query * @return A stream where stdout messages will be sent, NULL on failure */ FILE *lrzip_log_stdout_get(Lrzip *lr); /** * @brief Redirect stderr log messages to another stream * This function sends any logging messages which would normally go to stderr into another stream. * @param lr The struct * @param err The stream to use instead of stderr */ void lrzip_log_stderr_set(Lrzip *lr, FILE *err); /** * @brief Return the stream currently used as stderr * @param lr The struct to query * @return A stream where stderr messages will be sent, NULL on failure */ FILE *lrzip_log_stderr_get(Lrzip *lr); /** * @brief Set a password callback for use with all operations * This function sets an Lrzip_Password_Cb which will be used when working with encrypted * LRZIP archives. It will be called both when compressing and decompressing archives. * @param lr The struct * @param cb The callback to set * @param data The data param to use in the password callback */ void lrzip_pass_cb_set(Lrzip *lr, Lrzip_Password_Cb cb, void *data); /** * @brief Set an info callback for use with all operations * This function sets an Lrzip_Info_Cb which will be called any time there is a * progress update in an operation. * @param lr The struct * @param cb The callback to set * @param data The data param to use in the info callback */ void lrzip_info_cb_set(Lrzip *lr, Lrzip_Info_Cb cb, void *data); /** * @brief Quick setup for performing a decompression * This function performs all the required allocations and sets necessary parameters * to decompress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @return true on success, else false */ bool lrzip_decompress(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len); /** * @brief Quick setup for performing a compression * This function performs all the required allocations and sets necessary parameters * to compress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @param mode The compression mode to use * @param compress_level The value, 1-9, to use as a compression level * @return true on success, else false */ bool lrzip_compress_full(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len, Lrzip_Mode mode, int compress_level); /** * @brief Quick setup for performing a compression using LZMA * This function performs all the required allocations and sets necessary parameters * to compress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @return true on success, else false */ static inline bool lrzip_compress(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_LZMA, 7); } /** * @brief Quick setup for performing a compression using LZO * This function performs all the required allocations and sets necessary parameters * to compress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @return true on success, else false */ static inline bool lrzip_lcompress(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_LZO, 7); } /** * @brief Quick setup for performing a compression using ZLIB (GZIP) * This function performs all the required allocations and sets necessary parameters * to compress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @return true on success, else false */ static inline bool lrzip_gcompress(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_ZLIB, 7); } /** * @brief Quick setup for performing a compression using ZPAQ * This function performs all the required allocations and sets necessary parameters * to compress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @return true on success, else false */ static inline bool lrzip_zcompress(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_ZPAQ, 7); } /** * @brief Quick setup for performing a compression using BZIP * This function performs all the required allocations and sets necessary parameters * to compress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @return true on success, else false */ static inline bool lrzip_bcompress(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_BZIP2, 7); } /** * @brief Quick setup for performing RZIP preprocessing * This function performs all the required allocations and sets necessary parameters * to preprocess @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @return true on success, else false */ static inline bool lrzip_rcompress(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_NONE, 7); } /** * @brief Quick setup for performing a compression using LZMA and a user-defined compression level * This function performs all the required allocations and sets necessary parameters * to compress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @param compress_level The value, 1-9, to use as a compression level * @return true on success, else false */ static inline bool lrzip_compress2(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len, int compress_level) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_LZMA, compress_level); } /** * @brief Quick setup for performing a compression using LZO and a user-defined compression level * This function performs all the required allocations and sets necessary parameters * to compress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @param compress_level The value, 1-9, to use as a compression level * @return true on success, else false */ static inline bool lrzip_lcompress2(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len, int compress_level) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_LZO, compress_level); } /** * @brief Quick setup for performing a compression using ZLIB (GZIP) and a user-defined compression level * This function performs all the required allocations and sets necessary parameters * to compress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @param compress_level The value, 1-9, to use as a compression level * @return true on success, else false */ static inline bool lrzip_gcompress2(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len, int compress_level) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_ZLIB, compress_level); } /** * @brief Quick setup for performing a compression using ZPAQ and a user-defined compression level * This function performs all the required allocations and sets necessary parameters * to compress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @param compress_level The value, 1-9, to use as a compression level * @return true on success, else false */ static inline bool lrzip_zcompress2(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len, int compress_level) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_ZPAQ, compress_level); } /** * @brief Quick setup for performing a compression using BZIP and a user-defined compression level * This function performs all the required allocations and sets necessary parameters * to compress @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @param compress_level The value, 1-9, to use as a compression level * @return true on success, else false */ static inline bool lrzip_bcompress2(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len, int compress_level) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_BZIP2, compress_level); } /** * @brief Quick setup for performing RZIP preprocessing and a user-defined compression level * This function performs all the required allocations and sets necessary parameters * to preprocess @p source to @p dest. No extra functions are necessary to call, and * this function will block until it completes. * @param dest A pointer to the LRZIP-allocated destination buffer * @param dest_len A pointer to the length of @p dest * @param source The allocated source buffer to read from * @param source_len The length of @p source * @param compress_level The value, 1-9, to use as a compression level * @return true on success, else false */ static inline bool lrzip_rcompress2(void *dest, unsigned long *dest_len, const void *source, unsigned long source_len, int compress_level) { return lrzip_compress_full(dest, dest_len, source, source_len, LRZIP_MODE_COMPRESS_NONE, compress_level); } #ifdef __cplusplus } #endif #endif lrzip-0.651/Makefile.am000066400000000000000000000036231421175057200147310ustar00rootroot00000000000000ACLOCAL_AMFLAGS = -I m4 MAINTAINERCLEANFILES = \ Makefile.in \ aclocal.m4 \ config.guess \ config.h.in \ config.h.in~ \ config.sub \ configure \ depcomp \ install-sh \ ltconfig \ ltmain.sh \ missing \ $(PACKAGE_TARNAME)-$(PACKAGE_VERSION).tar.gz \ $(PACKAGE_TARNAME)-$(PACKAGE_VERSION).tar.bz2 \ $(PACKAGE_TARNAME)-$(PACKAGE_VERSION).tar.xz \ $(PACKAGE_TARNAME)-$(PACKAGE_VERSION).tar.lrz \ $(PACKAGE_TARNAME)-$(PACKAGE_VERSION)-doc.tar.bz2 \ m4/libtool.m4 \ m4/lt~obsolete.m4 \ m4/ltoptions.m4 \ m4/ltsugar.m4 \ m4/ltversion.m4 SUBDIRS = lzma man doc AM_CFLAGS = -I. -I lzma/C -DNDEBUG AM_CXXFLAGS = $(AM_CFLAGS) lrztardir = $(bindir) lrztar_SCRIPTS = lrztar noinst_LTLIBRARIES = libtmplrzip.la libtmplrzip_la_SOURCES = \ lrzip_private.h \ lrzip.c \ lrzip_core.h \ rzip.h \ rzip.c \ runzip.c \ runzip.h \ stream.c \ stream.h \ util.c \ util.h \ md5.c \ md5.h \ aes.c \ aes.h \ sha4.c \ sha4.h \ libzpaq/libzpaq.cpp \ libzpaq/libzpaq.h libtmplrzip_la_LIBADD = lzma/C/liblzma.la bin_PROGRAMS = lrzip lrzip_SOURCES = \ main.c nodist_EXTRA_lrzip_SOURCES = dummyy.cxx lrzip_LDADD = libtmplrzip.la if STATIC lrzip_LDFLAGS = -all-static endif dist_doc_DATA = \ AUTHORS \ BUGS \ ChangeLog \ COPYING \ README.md \ README-NOT-BACKWARD-COMPATIBLE \ TODO \ WHATS-NEW noinst_HEADERS = Lrzip.h lrzipdir = $(includedir) EXTRA_DIST = \ lrztar \ description-pak \ autogen.sh \ INSTALL \ $(dist_doc_DATA) install-exec-hook: $(LN_S) -f lrzip$(EXEEXT) $(DESTDIR)$(bindir)/lrunzip$(EXEEXT) $(LN_S) -f lrzip$(EXEEXT) $(DESTDIR)$(bindir)/lrzcat$(EXEEXT) $(LN_S) -f lrztar$(EXEEXT) $(DESTDIR)$(bindir)/lrzuntar$(EXEEXT) $(LN_S) -f lrzip$(EXEEXT) $(DESTDIR)$(bindir)/lrz$(EXEEXT) uninstall-local: rm -f $(bindir)/lrunzip rm -f $(bindir)/lrzcat rm -f $(bindir)/lrzuntar rm -f $(bindir)/lrz .PHONY: doc # Documentation doc: all @echo "entering doc/" $(MAKE) -C doc doc lrzip-0.651/README-NOT-BACKWARD-COMPATIBLE000066400000000000000000000070121421175057200170000ustar00rootroot00000000000000lrzip-0.60 update All files created with lrzip 0.6x are not backward compatible with versions prior to 0.60. v0.6x can read files generated with earlier versions. Con Kolivas March 2011. lrzip-0.50 update All files created with lrzip 0.5x are not backward compatible with versions prior to 0.50. v0.50 can read earlier generated files. lrzip-0.41 update Files created with lrzip 0.41 and selecting the -z option for ZPAQ compression are not backwardly compatible. lrzip-0.40 update! FILES CREATED WITH LRZIP 0.40+ are not backward compatible with versions prior to 0.40. The file format was completely changed to 64bit addressing throughout to allow massive compression windows on massive files. v0.40+ will detect older version files and decompress them fine though, but will always generate files in the new format. Con Kolivas November 2009. lrzip-0.24 update! FILES CREATED WITH LRZIP 0.23 and earlier are NOT BACKWARD COMPATIBLE if compressed with LZMA. All other compression schemes are compatible. The lrz file header is changed. It now stores the encoded parameters LZMA uses in bytes 16-20. This is a departure from the method used in lrzip-0.23. Please preserve the binary of lrzip-0.23 or earlier if you require access to lrzip files using LZMA compression created with an earlier version. FILES CREATED WITH LRZIP-0.22 MAY NOT BE BACKWARD COMPATIBLE! lrzip-0.22 uses a slightly different and improved method of compressing and decompressing files compared to lrzip-0.19 and earlier versions. ANY FILE COMPRESSED WITH LZMA USING A COMPRESSION LEVEL > 7 cannot be decompressed with any earlier version of lrzip. ANY FILE COMPRESSED WITH LZMA USING A COMPRESSION LEVEL <=7 CAN be decompressed with earlier versions of lrzip. ANY FILE COMPRESSED WITH AN EARLIER VERSION OF LRZIP CAN be decompressed with lrzip-0.22 --------------------------------------------------------- Brief Technical discussion. Earlier versions of lrzip used a variable dictionary buffer size when compressing files with LZMA. It used a formula of Compression Level + 14 bits. LZMA Dictionary buffer size was computed as 2^(level+14). 2MB, 21 bits had been the default for compression level 7. Level 8 was 4MB and level 9, 8MB. The default decompression level was fixed at 23 bits, 8MB. This was equal to the (then) largest possible dictionary buffer size, 9+14=23, 2^23=8MB. So all data regardless of compression level could decompress. Beginning in lrzip-0.22, the default dictionary buffer size is Level + 16 bits (7+16=23 bits or 8MB). Files compressed with the default level or lower CAN be decompressed with an earlier lrzip version. Since the the maximum dictionary buffer size for lrzip-0.22 is now 25 bits, or 32MB. Files compressed using level 8 or level 9 (24 or 25 bits) cannot be decompressed with earlier versions of lrzip since the fixed dictionary buffer size of 8MB used for decompression in lrzip-0.19 and earlier cannot hold the data from lrzip-0.22. Here is a table to show what can and cannot be decompressed with lrzip-0.19 and earlier LRZIP-0.22 LRZIP-0.19 COMPRESSION CAN DICTIONARY LEVEL DECOMPRESS? BUFFER SIZE ----------- ----------- ----------- <=7 YES <=8MB (2^23) 8 NO 16MB (2^24) 9 NO 32MB (2^25) lrzip-0.22 can decompress all earlier files. lrzip-0.22 uses three bytes in the compressed file to store the compression level used. Thus, when decompressing, lrzip will read the proper dictionary buffer size and use it when decompressing the file. See the file magic.header.txt for more information. January 2008 Peter Hyman pete@peterhyman.com lrzip-0.651/README.md000066400000000000000000000513311421175057200141530ustar00rootroot00000000000000lrzip - Long Range ZIP or LZMA RZIP =================================== A compression utility that excels at compressing large files (usually > 10-50 MB). Larger files and/or more free RAM means that the utility will be able to more effectively compress your files (ie: faster / smaller size), especially if the filesize(s) exceed 100 MB. You can either choose to optimise for speed (fast compression / decompression) or size, but not both. ### haneefmubarak's TL;DR for the long explanation: Just change the word `directory` to the name of the directory you wish to compress. #### Compression: ```bash lrzdir=directory; tar cvf $lrzdir.tar $lrzdir; lrzip -Ubvvp `nproc` -S .bzip2-lrz -L 9 $lrzdir.tar; rm -fv $lrzdir.tar; unset lrzdir ``` `tar`s the directory, then maxes out all of the system's processor cores along with sliding window RAM to give the best **BZIP2** compression while being as fast as possible, enables max verbosity output, attaches the extension `.bzip2-lrz`, and finally gets rid of the temporary tarfile. Uses a tempvar `lrzdir` which is unset automatically. #### Decompression for the kind of file from above: ```bash lrzdir=directory; lrunzip -cdivvp `nproc` -o $lrzdir.tar $lrzdir.tar.bzip2-lrz; tar xvf $lrzdir.tar; rm -vf $lrzdir.tar ``` Checks integrity, then decompresses the directory using all of the processor cores for max speed, enables max verbosity output, unarchives the resulting tarfile, and finally gets rid of the temporary tarfile. Uses the same kind of tempvar. ### lrzip build/install guide: A quick guide on building and installing. #### What you will need - gcc - bash or zsh - pthreads - tar - libc - libm - libz-dev - libbz2-dev - liblzo2-dev - liblz4-dev - coreutils - Optional nasm - git if you want a repo-fresh copy - an OS with the usual *nix headers and libraries #### Obtaining the source Two different ways of doing this: Stable: Packaged tarball that is known to work: Go to and download the `tar.gz` file from the top. `cd` to the directory you downloaded, and use `tar xvzf lrzip-X.X.tar.gz` to extract the files (don't forget to replace `X.X` with the correct version). Finally, cd into the directory you just extracted. Latest: `git clone -v https://github.com/ckolivas/lrzip.git; cd lrzip` #### Build ```bash ./autogen.sh ./configure make -j `nproc` # maxes out all cores ``` #### Install Simple 'n Easyâ„¢: `sudo make install` ### lrzip 101: |Command|Result| |------|------| |`lrztar directory`|An archive `directory.tar.lrz` compressed with **LZMA**.| |`lrzuntar directory.tar.lrz`|A directory extracted from a `lrztar` archive.| |`lrzip filename`|An archive `filename.lrz` compressed with **LZMA**, meaning slow compression and fast decompression.| |`lrzip -z filename`|An archive "filename.lrz" compressed with **ZPAQ** that can give extreme compression, but takes a bit longer than forever to compress and decompress.| |`lrzip -l filename`|An archive lightly compressed with **LZO**, meaning really, really fast compression and decompression.| |`lrunzip filename.lrz`|Decompress filename.lrz to filename.| |`lrz filename`|As per lrzip above but with gzip compatible semantics (i.e. will be quiet and delete original file) |`lrz -d filename.lrz`|As per lrunzip above but with gzip compatible semantics (i.e. will be quiet and delete original file) ### lrzip internals lrzip uses an extended version of [rzip](http://rzip.samba.org/) which does a first pass long distance redundancy reduction. lrzip's modifications allow it to scale to accommodate various memory sizes. Then, one of the following scenarios occurs: - Compressed - (default) **LZMA** gives excellent compression @ ~2x the speed of bzip2 - **ZPAQ** gives extreme compression while taking forever - **LZO** gives insanely fast compression that can actually be faster than simply copying a large file - **GZIP** gives compression almost as fast as LZO but with better compression - **BZIP2** is a defacto linux standard and hacker favorite which usually gives quite good compression (ZPAQ>LZMA>BZIP2>GZIP>LZO) while staying fairly fast (LZO>GZIP>BZIP2>LZMA>ZPAQ); in other words, a good middle-ground and a good choice overall - Uncompressed, in the words of the software's original author: > Leaving it uncompressed and rzip prepared. This form improves substantially > any compression performed on the resulting file in both size and speed (due to > the nature of rzip preparation merging similar compressible blocks of data and > creating a smaller file). By "improving" I mean it will either speed up the > very slow compressors with minor detriment to compression, or greatly increase > the compression of simple compression algorithms. > > (Con Kolivas, from the original lrzip README) The only real disadvantages: - The main program, lrzip, only works on single files, and therefore requires the use of an lrztar wrapper to fake a complete archiver. - lrzip requires quite a bit of memory along with a modern processor to get the best performance in reasonable time. This usually means that it is somewhat unusable with less than 256 MB. However, decompression usually requires less RAM and can work on less powerful machines with much less RAM. On machines with less RAM, it may be a good idea to enable swap if you want to keep your operating system happy. - Piping output to and/or from STDIN and/or STDOUT works fine with both compression and decompression, but larger files compressed this way will likely end up being compressed less efficiently. Decompression doesn't really have any issues with piping, though. One of the more unique features of lrzip is that it will try to use all of the available RAM as best it can at all times to provide maximum benefit. This is the default operating method, where it will create and use the single largest memory window that will still fit in available memory without freezing up the system. It does this by `mmap`ing the small portions of the file that it is working on. However, it also has a unique "sliding `mmap`" feature, which allows it to use compression windows that far exceed the size of your RAM if the file you are compressing is large. It does this by using one large `mmap` along with a smaller moving `mmap` buffer to track the part of the file that is currently being examined. From a higher level, this can be seen as simply emulating a single, large `mmap` buffer. The unfortunate thing about this feature is that it can become extremely slow. The counter-argument to being slower is that it will usually give a better compression factor. The file `doc/README.benchmarks` has some performance examples to show what kind of data lrzip is good with. ### FAQ > Q: What kind of encryption does lrzip use? > A: lrzip uses SHA2-512 repetitive hashing of the password along with a salt > to provide a key which is used by AES-128 to do block encryption. Each block > has more random salts added to the block key. The amount of initial hashing > increases as the timestamp goes forward, in direct relation to Moore's law, > which means that the amount of time required to encrypt/decrypt the file > stays the same on a contemporary computer. It is virtually > guaranteed that the same file encrypted with the same password will never > be the same twice. The weakest link in this encryption mode by far is the > password chosen by the user. There is currently no known attack or backdoor > for this encryption mechanism, and there is absolutely no way of retrieving > your password should you forget it. > Q: How do I make a static build? > A: `./configure --enable-static-bin` > Q: I want the absolute maximum compression I can possibly get, what do I do? > A: Try the command line options "-Uzp 1 -L 9". This uses all available ram and > ZPAQ compression, and even uses a compression window larger than you have ram. > The -p 1 option disables multithreading which improves compression but at the > expense of speed. Expect it to take many times longer. > Q: I want the absolute fastest decent compression I can possibly get. > A: Try the command line option -l. This will use the lzo backend compression, > and level 7 compression (1 isn't much faster). > Q: How much slower is the unlimited mode? > A: It depends on 2 things. First, just how much larger than your ram the file is, as the bigger the difference, the slower it will be. The second is how much redundant data there is. The more there is, the slower, but ultimately the better the compression. Why isn't it on by default? If the compression window is a LOT larger than ram, with a lot of redundant information it can be drastically slower. I may revisit this possibility in the future if I can make it any faster. > Q: Can I use your tool for even more compression than lzma offers? > A: Yes, the rzip preparation of files makes them more compressible by most other compression technique I have tried. Using the -n option will generate a .lrz file smaller than the original which should be more compressible, and since it is smaller it will compress faster than it otherwise would have. > Q: 32bit? > A: 32bit machines have a limit of 2GB sized compression windows due to userspace limitations on mmap and malloc, so even if you have much more ram you will not be able to use compression windows larger than 2GB. Also you may be unable to decompress files compressed on 64bit machines which have used windows larger than 2GB. > Q: How about 64bit? > A: 64bit machines with their ability to address massive amounts of ram will excel with lrzip due to being able to use compression windows limited only in size by the amount of physical ram. > Q: Other operating systems? > A: The code is POSIXy with GNU extensions. Patches are welcome. Version 0.43+ should build on MacOSX 10.5+ > Q: Does it work on stdin/stdout? > A: Yes it does. Compression and decompression work well to/from STDIN/STDOUT. However because lrzip does multiple passes on the data, it has to store a large amount in ram before it dumps it to STDOUT (and vice versa), thus it is unable to work with the massive compression windows regular operation provides. Thus the compression afforded on files larger than approximately 25% RAM size will be less efficient (though still benefiting compared to traditional compression formats). > Q: I have another compression format that is even better than zpaq, can you use that? > A: You can use it yourself on rzip prepared files (see above). Alternatively if the source code is compatible with the GPL license it can be added to the lrzip source code. Libraries with functions similar to compress() and decompress() functions of zlib would make the process most painless. Please tell me if you have such a library so I can include it :) > Q: What's this "Starting lzma back end compression thread..." message? > A: While I'm a big fan of progress percentage being visible, unfortunately lzma compression can't currently be tracked when handing over 100+MB chunks over to the lzma library. Therefore you'll see progress percentage until each chunk is handed over to the lzma library. > Q: What's this "lz4 testing for incompressible data" message? > A: Other compression is much slower, and lz4 is the fastest. To help speed up the process, lz4 compression is performed on the data first to test that the data is at all compressible. If a small block of data is not compressible, it tests progressively larger blocks until it has tested all the data (if it fails to compress at all). If no compressible data is found, then the subsequent compression is not even attempted. This can save a lot of time during the compression phase when there is incompressible data. Theoretically it may be possible that data is compressible by the other backend (zpaq, lzma etc) and not at all by lz4, but in practice such data achieves only minuscule amounts of compression which are not worth pursuing. Most of the time it is clear one way or the other that data is compressible or not. If you wish to disable this test and force it to try compressing it anyway, use -T. > Q: I have truckloads of ram so I can compress files much better, but can my generated file be decompressed on machines with less ram? > A: Yes. Ram requirements for decompression go up only by the -L compression option with lzma and are never anywhere near as large as the compression requirements. However if you're on 64bit and you use a compression window greater than 2GB, it might not be possible to decompress it on 32bit machines. > Q: Why are you including bzip2 compression? > A: To maintain a similar compression format to the original rzip (although the other modes are more useful). > Q: What about multimedia? > A: Most multimedia is already in a heavily compressed "lossy" format which by its very nature has very little redundancy. This means that there is not much that can actually be compressed. If your video/audio/picture is in a high bitrate, there will be more redundancy than a low bitrate one making it more suitable to compression. None of the compression techniques in lrzip are optimised for this sort of data. However, the nature of rzip preparation means that you'll still get better compression than most normal compression algorithms give you if you have very large files. ISO images of dvds for example are best compressed directly instead of individual .VOB files. ZPAQ is the only compression format that can do any significant compression of multimedia. > Q: Is this multithreaded? > A: As of version 0.540, it is HEAVILY multithreaded with the back end compression and decompression phase, and will continue to process the rzip pre-processing phase so when using one of the more CPU intensive backend compressions like lzma or zpaq, SMP machines will show massive speed improvements. Lrzip will detect the number of CPUs to use, but it can be overridden with the -p option if the slightly better compression is desired more than speed. -p 1 will give the best compression but also be the slowest. > Q: This uses heaps of memory, can I make it use less? > A: Well you can by setting -w to the lowest value (1) but the huge use of memory is what makes the compression better than ordinary compression programs so it defeats the point. You'll still derive benefit with -w 1 but not as much. > Q: What CFLAGS should I use? > A: With a recent enough compiler (gcc>4) setting both CFLAGS and CXXFLAGS to -O2 -march=native -fomit-frame-pointer > Q: What compiler does this work with? > A: It has been tested on gcc, ekopath and the intel compiler successfully previously. Whether the commercial compilers help or not, I could not tell you. > Q: What codebase are you basing this on? > A: rzip v2.1 and lzma sdk920, but it should be possible to stay in sync with each of these in the future. > Q: Do we really need yet another compression format? > A: It's not really a new one at all; simply a reimplementation of a few very good performing ones that will scale with memory and file size. > Q: How do you use lrzip yourself? > A: Three basic uses. I compress large files currently on my drive with the -l option since it is so quick to get a space saving. When archiving data for permanent storage I compress it with the default options. When compressing small files for distribution I use the -z option for the smallest possible size. > Q: I found a file that compressed better with plain lzma. How can that be? > A: When the file is more than 5 times the size of the compression window you have available, the efficiency of rzip preparation drops off as a means of getting better compression. Eventually when the file is large enough, plain lzma compression will get better ratios. The lrzip compression will be a lot faster though. The only way around this is to use as large compression windows as possible with -U option. > Q: Can I use swapspace as ram for lrzip with a massive window? > A: It will indirectly do this with -U (unlimited) mode enabled. This mode will make the compression window as big as the file itself no matter how big it is, but it will slow down proportionately more the bigger the file is than your ram. > Q: Why do you nice it to +19 by default? Can I speed up the compression by changing the nice value? > A: This is a common misconception about what nice values do. They only tell the cpu process scheduler how to prioritise workloads, and if your application is the _only_ thing running it will be no faster at nice -20 nor will it be any slower at +19. > Q: What is the LZ4 Testing option, -T? > A: LZ4 testing is normally performed for the slower back-end compression of LZMA and ZPAQ. The reasoning is that if it is completely incompressible by LZ4 then it will also be incompressible by them. Thus if a block fails to be compressed by the very fast LZ4, lrzip will not attempt to compress that block with the slower compressor, thereby saving time. If this option is enabled, it will bypass the LZ4 testing and attempt to compress each block regardless. > Q: Compression and decompression progress on large archives slows down and speeds up. There's also a jump in the percentage at the end? > A: Yes, that's the nature of the compression/decompression mechanism. The jump is because the rzip preparation makes the amount of data much smaller than the compression backend (lzma) needs to compress. > Q: Tell me about patented compression algorithms, GPL, lawyers and copyright. > A: No > Q: I receive an error "LZMA ERROR: 2. Try a smaller compression window." what does this mean? > A: LZMA requests large amounts of memory. When a higher compression window is used, there may not be enough contiguous memory for LZMA: LZMA may request up to 25% of TOTAL ram depending on compression level. If contiguous blocks of memory are not free, LZMA will return an error. This is not a fatal error, and a backup mode of compression will be used. > Q: Where can I get more information about the internals of LZMA? > A: See http://www.7-zip.org and http://www.p7zip.org. Also, see the file ./lzma/C/lzmalib.h which explains the LZMA properties used and the LZMA memory requirements and computation. > Q: This version is much slower than the old version? > A: Make sure you have set CFLAGS and CXXFLAGS. An unoptimised build will be almost 3 times slower. > Q: Why not update to the latest version of libzpaq? > A: For reasons that are unclear the later versions of libzpaq create corrupt archives when included with lrzip #### LIMITATIONS Due to mmap limitations the maximum size a window can be set to is currently 2GB on 32bit unless the -U option is specified. Files generated on 64 bit machines with windows >2GB in size might not be decompressible on 32bit machines. Large files might not decompress on machines with less RAM if SWAP is disabled. #### BUGS: Probably lots. if you spot any :D Any known ones should be documented in the file BUGS. #### Backends: rzip: lzo: lzma: zpaq: ### Thanks (CONTRIBUTORS) |Person(s)|Thanks for| |---|---| |`Andrew Tridgell`|`rzip`| |`Markus Oberhumer`|`lzo`| |`Igor Pavlov`|`lzma`| |`Jean-Loup Gailly & Mark Adler`|`zlib`| |***`Con Kolivas`***|***Original Code, binding all of this together, managing the project, original `README`***| |`Christian Leber`|`lzma` compatibility layer| |`Michael J Cohen`|Darwin/OSX support| |`Lasse Collin`|fixes to `LZMALib.cpp` and `Makefile.in`| |Everyone else who coded along the way (add yourself where appropriate if that's you)|Miscellaneous Coding| |**`Peter Hyman`**|Most of the `0.19` to `0.24` changes| |`^^^^^^^^^^^`|Updating the multithreaded `lzma` lib |`^^^^^^^^^^^`|All sorts of other features |`René Rhéaume`|Fixing executable stacks| |`Ed Avis`|Various fixes| |`Matt Mahoney`|`zpaq` integration code| |`Jukka Laurila`|Additional Darwin/OSX support| |`George Makrydakis`|`lrztar` wrapper| |`Ulrich Drepper`|*special* implementation of md5| |**`Michael Blumenkrantz`**|New config tools| |`^^^^^^^^^^^^^^^^^^^^`|`liblrzip`| |Authors of `PolarSSL`|Encryption code| |`Serge Belyshev`|Extensive help, advice, and patches to implement secure encryption| |`Jari Aalto`|Fixing typos, esp. in code| |`Carlo Alberto Ferraris`|Code cleanup |`Peter Hyman`|Additional documentation| |`Haneef Mubarak`|Cleanup, Rewrite, and GH Markdown of `README` --> `README.md`| Persons above are listed in chronological order of first contribution to **lrzip**. Person(s) with names in **bold** have multiple major contributions, person(s) with names in *italics* have made massive contributions, person(s) with names in ***both*** have made innumerable massive contributions. #### README Authors Con Kolivas (`ckolivas` on GitHub) Tuesday, 16 February 2021: README Also documented by Peter Hyman Sun, 04 Jan 2009: README Mostly Rewritten + GFMified: Haneef Mubarak (haneefmubarak on GitHub) Sun/Mon Sep 01-02 2013: README.md lrzip-0.651/TODO000066400000000000000000000011601421175057200133570ustar00rootroot00000000000000MAYBE TODO for lrzip program Upgrade to newer version of zpaq supporting 3 compression levels without relying on open_memstream so it works without temporary files on apple. Get MD5 working on apple. Make sure STDIO works properly on large files on apple. Make a liblrzip library. Other posix/windows builds?? Need help there... Add log file option so that output could be saved for review. Add test function that would only run lzo_compresses for a current file without doing any writes. Consider ncurses version or even GUI one. Consider using LZMA Filters for processor-optimised coding to increase compression. lrzip-0.651/WHATS-NEW000066400000000000000000000530141421175057200141340ustar00rootroot00000000000000lrzip-0.651 Remove redundant files Revert locale dependent output Add warnings for low memory and threads lrzip-0.650 Minor optimisations. Exit status fixes. Update and beautify information output. Fix Android build. Enable MD5 on Apple build. Deprecate and remove liblrzip which was unused and at risk of bitrot. Fix failures with compressing to STDOUT with inadequate memory. Fix possible race conditions. Fix memory leaks. Fix -q to only hide progress. Add -Q option for very quiet. lrzip-0.641 Critical bugfix for broken lz4 testing which would prevent secondary compression from being enabled. lrzip-0.640 Numerous bugfixes and build fixes. lz4 now used for compressibility testing (only) making lz4-dev a build requirement. Fixes for handling of corrupt archives without crashing. Fixes for creating small lzma based archives to stdout. Incomplete files are now deleted on interrupting lrzip unless the keep-broken option is enabled. Version prints to stdout instead of stderr. lrzip-0.631 Assembler code is back and works with x86_64 lrzip-0.621 Substantial speed ups for the rzip stage in both regular and unlimited modes. Lrzip now supports long command line options. Proper support for the various forms of TMPDIR environment variables. More unix portability fixes. OSX fixes. Fixed order of lrzip.conf search. Addressed all warnings created with pedantic compiler settings and clang Fixes for some stderr messages being swallowed up. Fixed being unable to decompress to STDOUT when in a non-writable directory. Changed broken liblrzip callback function API to match lrzip proper. lrzip-0.620 Fixes display output of lrzip -i for large files greater than one chunk. Fixes for various failure to allocate memory conditions when dealing with large files and STDIO. Fixes for more unix portability. Fixes for failure to decompress to STDOUT. lrzip-0.616 Fixes for various issues with -O not working with trailing slashes and outputting to directories that already exist. lrzip-0.615 Fixed -O not working on lrztar. Made it less likely to run out of ram when working with STDIN/OUT. Fixed running out of ram when using -U on huge files. Fixed corrupt archives being generated from incompressible data. Fixed corrupt archives being generated from very small files. Fixed endianness on various platforms for MD5 calculation to work. Fixed rare corruption when compressing with lzma from STDIN. Fixed all blank data being generated when compressing from STDIN on OSX. Performance micro-optimisations. Fixed corrupt archive being generated when all the same non-zero bytes exist on large files. lrzip-0.614 Fixed lrztar not working. lrzip-0.613 Fixed the bug where massive files would show an incorrect md5 value on decompression - this was a bug from the md5 code upstream. Compressing ultra-small files to corrupt archives was fixed. Compilation on various other platforms was fixed. A crash with using -S was fixed. lrzip-0.612 Updated to a new zpaq library back end which is faster and now supports three different compression levels, which will be activated at lrzip levels -L 1+, 4+ and 8+. This significantly increases the maximum compression available by lrzip with -L 9. The include file Lrzip.h used by liblrzip is now properly installed into $prefix/include. lrzip-0.611 lrzcat and lrzuntar have been fixed. The update counter will continue to update even when there is nothing being matched (like a file full of zeroes). Numerous optimisations in the rzip stage speeds up the faster compression modes noticeably. Checksumming is done in a separate thread during rzip compression for more compression speed improvements. lrzip-0.610 The new liblrzip library allows you to add lrzip compression and decompression to other applications with either simple lrzip_compress and lrzip_decompress functions or fine control over all the options with low level functions. Faster rzip stage when files are large enough to require the sliding mmap feature (usually >1/3 of ram) and in unlimited mode. A bug where multiple files being compressed or decompressed from the one command line could have gotten corrupted was fixed. Modification date of the decompressed file is now set to that of the lrzip archive (support for storing the original file's date would require modifying the archive format again). Compilation warning fixes. Make lrztar work with directories with spaces in their names. lrzip-0.608 Faster rzip stage through use of a selective get_sb function. The bash completion script is no longer installed by default to not conflict with distribution bash completion packages. More compilation fixes for non-linux platforms. lrzip-0.607 A rare case of not being able to decompress archives was fixed. The lzma library was updated to version 920. A bash completion script for lrzip was added. More debugging info was added in maximum verbose mode. Less messages occur without verbose mode. FreeBSD and posix compilation fixes were committed. lrzip-0.606 lrzuntar, which broke last version leaving behind an untarred .tar file, is working properly again. lrzip-0.605 Addition of lrzcat - automatically decompresses .lrz files to stdout. lrzip and lrunzip will no longer automatically output to stdout due to addition of lrzcat executable, and to be consistent with gzip. lrzip progress output will no longer spam the output unless the percentage has changed. lrzip now has no lower limit on file sizes it will happily compress and is able to work with zero byte sized files. The percentage counter when getting file info on small files will not show %nan. The executable bit will not be enabled when compressing via a means that can't preserve the original permissions (e.g. from STDIN). lrzip-0.604 lrzip will no longer fail with a "resource temporarily unavailable" error when compressing files over 100GB that require hundreds of threads to complete. lrzip-0.603 lrzip now supports stdout without requiring the '-o -' option. It detects when output is being redirected without a filename and will automatically output to stdout so you can do: lrunzip patch-2.6.38.4.lrz | patch -p1 Apple builds will not have errors on compressing files >2GB in size which broke with 0.600. lrztar will properly support -o, -O and -S. lrzip.conf file now supports encryption. lrzip will now warn if it's inappropriately passed a directory as an argument directly. lrzip-0.602 Fixed wrong symlinks which broke some package generation. Imposed limits for 32bit machines with way too much ram for their own good. Disable md5 generation on Apple for now since it's faulty. Displays full version with -V. Checks for podman on ./configure Now builds on Cygwin. File permissions are better carried over instead of being only 0600. lrzip-0.601 lrzuntar, lrunzip symlinks and the pod-based manpages are installed again. Configuration clearly shows now that ASM isn't supported on 64bit. lrzip-0.600 Compressing/decompressing to/from STDIN/STDOUT now works without generating any temporary files. Very large files compressed in this way will be less efficiently compressed than if the whole solid file is presented to lrzip, but it is guaranteed not to generate temporary files on compression. Decompressing files on a machine with the same amount of ram will also not generate temporary files, but if a file was generated on a larger ram machine, lrzip might employ temporary files, but they will not be the full size of the final file. Decompression should now be faster as the rzip reconstruction stage is mostly performed in ram before being written to disk, and testing much faster. Final file sizes should be slightly smaller as block headers are now also compressed. Heavy grade encryption is now provided with the -e option. A combination of a time scaled multiply hashed sha512 password with random salt followed by aes128 block encryption of all data, including the data headers, provides for extremely secure encryption. Passwords up to 500 characters in length are supported, and the same file encrypted with the same password is virtually guaranteed to never produce the same data twice. All data beyond the basic lrzip opening header is completely obscured. Don't lose your password! Lrzip will not try to malloc a negative amount of ram on smaller ram machines, preferring to decrease the number of threads used when compressing, and then aborting to a nominal minimum. A new build configuration system which should be more robust and provides neater output during compilation. lrzip should work again on big endian hardware. lrztar / lrzuntar will no longer use temporary files. lrzip-0.571 Avoid spurious errors on failing to mmap a file. Fee space will now be checked to ensure there is enough room for the compressed or decompressed file and lrzip will abort unless the -f option is passed to it. The extra little chunk at the end of every large file should now be fixed. The file lzma.txt now has unix end-of-lines. There will be a more accurate summary of what compression window will be used when lrzip is invoked with STDIN/STDOUT. STDIN will now be able to show estimated time to completion and percentage complete once lrzip knows how much file is left. Temporary files are much less likely to be left lying around. Less temporary file space will be used when decompressing to stdout. File checking will not be attempted when it's meaningless (like to stdout). Times displayed should avoid the nonsense thousands of seconds bug. lrzip-0.570 Multi-threaded performance has been improved with a significant speed-up on both compression and decompression. New benchmark results have been added to the README.benchmarks file. Visual output has been further improved, with an updated help menu and no unrelated system errors on failure. lrzip.conf supports the newer options available. TMP environment is now respected when using temporary files and TMPDIR can be set in lrzip.conf. LRZIP=NOCONFIG environment variable setting can be used to bypass lrzip.conf. The -M option has been removed as the -U option achieves more and has understandable semantics. Memory usage should be very tightly controlled on compression now by default, using the most possible without running out of ram. Temporary files generated when doing -t from stdin will no longer be left lying around. lrzip will no longer stupidly sit waiting to read from stdin/stdout when called from a terminal without other arguments. Executable size will be slightly smaller due to stripping symbols by default now. The -T option no longer takes an argument. It simply denotes that lzo testing should be disabled. Verbose added to -i now prints a lot more information about an lrzip archive. lrzip-0.560 Implemented OSX multi-threading by converting all semaphores to pthread_mutexes. Converted the integrity checking to also use md5 hash checking. As a bonus it is still backwardly compatible by still storing the crc value, and yet is faster on large files than the old one. On decompression it detects whether the md5 value has been stored and chooses what integrity checking to use. Implemented the -H feature which shows the md5 hash value on compression and decompression. It is also shown in max verbose mode. Added information about what integrity testing will be used in verbose mode, and with the -i option. Added the -c option which will perform a hash check on the file generated on disk on decompression, comparing it to that from the archive to validate the decompressed file. Modified lrzip to delete broken or damaged files when lrzip is interrupted or the file generated fails an integrity test. Added the -k keep option to keep broken or damaged files. Case reports of corruption have been confirmed to NOT BE DUE TO LRZIP. lrzip-0.552 Fixed a potential silent corruption bug on decompression. Fixed compilation on freebsd. Fixed failures on incompressible blocks with bzip2 or gzip. Fixed osx failing to work. It does not support threaded compression or decompression but should work again. lrzip-0.551 Compressing from stdin should be unbroken again. Compression values returned at the end of stdin work. lzma failing to compress a block will not cause a failure. lrzip-0.550 Speed up compression on large files that take more than one pass by overlapping work on successive streams, thus using multiple CPUs better. Fix for failures to decompress large files. Decompression will be slightly slower but more reliable. Faster lzma compression by default, less prone to memory failures, but at slight compression cost. Recover from multithreaded failures by serialising work that there isn't enough ram to do in parallel. Revert the "smooth out spacing" change in 0.544 as it slowed things down instead of speeding them up. Larger compression windows are back for 32 bits now that memory usage is kept under better control. Fixed some memory allocation issues which may have been causing subtle bugs. lrzip-0.544 Hopefully a fix for corrupt decompression on large files with multiple stream 0 entries. Fix for use under uclibc. Fix for memory allocation errors on large files on 32 bits. Smooth out spacing of compression threads making better use of CPU on compress and decompress. Fix for using -U on ultra-small files. Use bzip2 on blocks that lzma fails to compress to make sure they are still compressed. lrzip-0.543 A fix for when large files being decompressed fail with multithreaded decompression. Slight speedup on multithreaded workloads by decreasing the nice value of the main process compared to the back end threads as it tends to be the rate limiting component. Fixed lzma compression windows being set way too small by default. lrzip-0.542 Lrzip will now try to select sane defaults for memory usage in cases where the virtual memory heavily overcommits (eg. Linux) as this seriously slows down compression. For compression windows larger than 2/3 ram, lrzip will now use a sliding mmap buffer for better performance. The progress output is more informative in max verbose mode, and will no longer do more passes than it estimates. 32 bit machines should be able to use slightly larger windows. The sliding mmap not working on 2nd pass onwards has been fixed which should speed up the slowdown of death. lrzip-0.540 MASSIVE MULTITHREADING on the decompression phase. Provided there are enough chunks of data in the archived file, lrzip will use as many threads as there are CPUs for the backend decompression. Much like the multithreading on the compression side, it makes the slower compression algorithms speed up the most. Fixed output from being scrambled and consuming a lot of CPU time on threaded zpaq compression. Further fixes to ensure window sizes work on 32 bit machines. Be more careful about testing for how much ram lrzip can use. Minor build warning fixes. Minor tweaks to screen output. Updated benchmarks. lrzip-0.530 MASSIVE MULTITHREADING on the compression phase. Lrzip will now use as many threads as you have CPU cores for the back end compression, and even continue doing the rzip preprocessing stage as long as it can which the other threads continue. This makes the slower compression algorithms (lzma and zpaq) much faster on multicore machines, to the point of making zpaq compression almost as fast as single threaded lzma compression. -p option added to allow you to specify number of processors to override the built-in test, or if you wish to disable threading. -P option to not set permissions has now been removed since failing to set permissions is only a warning now and not a failure. Further improvements to the progress output. Updated benchmarks and docs. lrzip-0.520 Just changed version numbering back to 2 point. lrzip-0.5.2 Fixed the Darwin build again. Fix the corner case of big ram usage on 32 bit zpaq failing due to the compression window not being limited by limiting zpaq to 600MB windows on 32 bits as well. Some previous failures now only induce warnings. Improved progress output. lrzip-0.5.1 Fixed the build on Darwin. Rewrote the rzip compression phase to make it possible to use unlimited sized windows now, not limited by ram. Unfortunately it gets progressively slower in this mode the bigger the file gets but you can compress a file of any size as one big compression window with it using the new -U option. Suggest you try the new improved -M mode first or in combination. See the docs for more information. Changed the memory selection system to simply find the largest reasonable sized window and use that by default instead of guessing the window size. Setting -M now only affects the window size, trying to find the largest unreasonably sized window that will still work. The default compression level is now 9 and affects the rzip compression stage as well as the backend compression. Fixed some potential failures during compression. Improved screen output with more reporting in verbose mode, and chunk size percentage update. Fixed file size reporting on compressed files generated from stdin. Changed to 3 point releases in case we get more than 9 subversions ;) lrzip-0.50 Rewrote the file format to be up to 5% more compact and slightly faster. Made the memory initialisation much more robust, with attempted fallback to still work even when initial settings fail. Updated a lot of the stdin code. The most common scenario of compression from stdin now works without temporary files. Lots more meaningful warnings if failure occurs. May be able to decompress files on 32 bit machines that were compressed on 64 bit machines with >2GB windows now if there is enough ram. lrzip-0.46 Added lrzuntar which works the same as lrztar -d. Con Kolivas May 2010 lrzip-0.45 Added docs for lrztar and lrunzip. Added distclean and maintainer-clean make targets. Created git repo: http://github.com/ckolivas/lrzip Con Kolivas March 2010 lrzip-0.44 Added an lrztar wrapper to compress / decompress whole directories (finally). Added -i option to give information about a compressed file. lrzip-0.43 Darwin support updated. Should build on OSX v10.5+ Finally, stdin/stdout support. Test archive integrity support. ZPAQ support in config files. lrzip-0.42 ZPAQ compression update now shows which rzip stream it's currently compressing making the update more useful. It also doesn't update unnecessarily with every byte compressed which was slowing it down a LOT. lrzip-0.41 ZPAQ compression backend! ZPAQ is from the family of "paq" compressors that have some of the best compression ratios around, but at the cost of extremely long compression and equally long decompression times. This can be enabled with the -z option and makes lrzip archives made with this not backwardly compatible. lrzip-0.40 Compression windows should be limited by available ram now on 64bit. The limit on 32bit is still 2GB. The compression advantages on large files on 64bit machines with large ram should be substantially better. The file format is no longer compatible with earlier versions of lrzip. Support for decompressing older formats is present, but all new files will be generated in the new format. Minor speedups. Decompression should no longer stall at 4GB boundaries for extended periods making decompression much faster on files >4GB in size. Documentation and benchmark updates galore. lrzip-0.31 The window size limit is now 2GB on both 32bit and 64bit. While it appears to be smaller than the old windows, only 900MB was being used on .30 even though it claimed to use more. This can cause huge improvements in the compression of very large files. Flushing of data to disk between compression windows was implemented to minimise disk thrashing of read vs write. Con Kolivas November 2009 lrzip-0.30 -P option to not set permissions on output files allowing you to write to braindead filesystems (eg fat32). Probably other weird and wonderful bugs have been introduced. Con Kolivas November 2009 lrzip-0.24 has updated functionality FEATURE ENHANCEMENTS lrzip.conf file may be used to set default parameters. Omit conf using environment: LRZIP=NOCONFIG lrzip..... LRZIP environment variable may be used in the future to store certain types of parameters. LZMA SDK has been upgraded to version 4.63. This version fixes some problems certain users observed, and is much simpler using a C-only wrapper interface. lrzip now is able to compute an ETA for completion. In order to do this, the file must be larger than one compression window in size. That is, is the compression window is 500MB, and the file is 1GB, then after the first pass, an ETA will be computed. If the file is smaller, then no estimate can be made. lrzip is now able to compute MB/s transfer speeds for both compression and decompression. CLEANUPS Some file cleanups have been done. Peter Hyman January 2009 pete@peterhyman.com lrzip-0.22 update FEATURE ENHANCEMENTS -g option. Now supports gzip compression. Very fast! Expanded dictionary buffer size in lzma compressor. Variable, expanded dictionary size buffer in both lzma compressor and decompressor. Improved output during compression when using -vv. Multi-threading supprt when using multiple processors or dual core processors when using lzma compression. This results in a nearly 2x speed improvement. Assembler module support to speed up CRC checking. Improvements in autotools usage, system detection and Makefile enhancements. Lrzip now has a timer that will print total time at the end of a compression or decompression if -q command line option is not used. BUG FIX!!! Even though lrzip uses a compression threshold to prevent the lzma compressor from getting data that may not be compressible, there was still a possibility that lrzip could hang. This was because a data chunk could contain an uncompressible segment and if the lzma compressor got it, it would hang. THANKS TO LASSE COLLIN for uncovering the error in the lzma wrapper code that was causing the hangup. January 2008 Peter Hyman pete@peterhyman.com lrzip-0.651/aes.c000066400000000000000000000402231421175057200136060ustar00rootroot00000000000000/* * FIPS-197 compliant AES implementation * * Copyright (C) 2011, Con Kolivas * Copyright (C) 2006-2010, Brainspark B.V. * * This file is part of PolarSSL (http://www.polarssl.org) * Lead Maintainer: Paul Bakker * * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ /* * The AES block cipher was designed by Vincent Rijmen and Joan Daemen. * * http://csrc.nist.gov/encryption/aes/rijndael/Rijndael.pdf * http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf */ #include "aes.h" #include /* * 32-bit integer manipulation macros (little endian) */ #ifndef GET_ULONG_LE #define GET_ULONG_LE(n,b,i) \ { \ (n) = ( (unsigned long) (b)[(i) ] ) \ | ( (unsigned long) (b)[(i) + 1] << 8 ) \ | ( (unsigned long) (b)[(i) + 2] << 16 ) \ | ( (unsigned long) (b)[(i) + 3] << 24 ); \ } #endif #ifndef PUT_ULONG_LE #define PUT_ULONG_LE(n,b,i) \ { \ (b)[(i) ] = (unsigned char) ( (n) ); \ (b)[(i) + 1] = (unsigned char) ( (n) >> 8 ); \ (b)[(i) + 2] = (unsigned char) ( (n) >> 16 ); \ (b)[(i) + 3] = (unsigned char) ( (n) >> 24 ); \ } #endif /* * Forward S-box & tables */ static unsigned char FSb[256]; static unsigned long FT0[256]; static unsigned long FT1[256]; static unsigned long FT2[256]; static unsigned long FT3[256]; /* * Reverse S-box & tables */ static unsigned char RSb[256]; static unsigned long RT0[256]; static unsigned long RT1[256]; static unsigned long RT2[256]; static unsigned long RT3[256]; /* * Round constants */ static unsigned long RCON[10]; /* * Tables generation code */ #define ROTL8(x) ( ( x << 8 ) & 0xFFFFFFFF ) | ( x >> 24 ) #define XTIME(x) ( ( x << 1 ) ^ ( ( x & 0x80 ) ? 0x1B : 0x00 ) ) #define MUL(x,y) ( ( x && y ) ? pow[(log[x]+log[y]) % 255] : 0 ) static int aes_init_done = 0; static void aes_gen_tables( void ) { int i, x, y, z; int pow[256]; int log[256]; /* * compute pow and log tables over GF(2^8) */ for( i = 0, x = 1; i < 256; i++ ) { pow[i] = x; log[x] = i; x = ( x ^ XTIME( x ) ) & 0xFF; } /* * calculate the round constants */ for( i = 0, x = 1; i < 10; i++ ) { RCON[i] = (unsigned long) x; x = XTIME( x ) & 0xFF; } /* * generate the forward and reverse S-boxes */ FSb[0x00] = 0x63; RSb[0x63] = 0x00; for( i = 1; i < 256; i++ ) { x = pow[255 - log[i]]; y = x; y = ( (y << 1) | (y >> 7) ) & 0xFF; x ^= y; y = ( (y << 1) | (y >> 7) ) & 0xFF; x ^= y; y = ( (y << 1) | (y >> 7) ) & 0xFF; x ^= y; y = ( (y << 1) | (y >> 7) ) & 0xFF; x ^= y ^ 0x63; FSb[i] = (unsigned char) x; RSb[x] = (unsigned char) i; } /* * generate the forward and reverse tables */ for( i = 0; i < 256; i++ ) { x = FSb[i]; y = XTIME( x ) & 0xFF; z = ( y ^ x ) & 0xFF; FT0[i] = ( (unsigned long) y ) ^ ( (unsigned long) x << 8 ) ^ ( (unsigned long) x << 16 ) ^ ( (unsigned long) z << 24 ); FT1[i] = ROTL8( FT0[i] ); FT2[i] = ROTL8( FT1[i] ); FT3[i] = ROTL8( FT2[i] ); x = RSb[i]; RT0[i] = ( (unsigned long) MUL( 0x0E, x ) ) ^ ( (unsigned long) MUL( 0x09, x ) << 8 ) ^ ( (unsigned long) MUL( 0x0D, x ) << 16 ) ^ ( (unsigned long) MUL( 0x0B, x ) << 24 ); RT1[i] = ROTL8( RT0[i] ); RT2[i] = ROTL8( RT1[i] ); RT3[i] = ROTL8( RT2[i] ); } } /* * AES key schedule (encryption) */ int aes_setkey_enc( aes_context *ctx, const unsigned char *key, int keysize ) { int i; unsigned long *RK; #if !defined(POLARSSL_AES_ROM_TABLES) if( aes_init_done == 0 ) { aes_gen_tables(); aes_init_done = 1; } #endif switch( keysize ) { case 128: ctx->nr = 10; break; case 192: ctx->nr = 12; break; case 256: ctx->nr = 14; break; default : return( POLARSSL_ERR_AES_INVALID_KEY_LENGTH ); } #if defined(PADLOCK_ALIGN16) ctx->rk = RK = PADLOCK_ALIGN16( ctx->buf ); #else ctx->rk = RK = ctx->buf; #endif for( i = 0; i < (keysize >> 5); i++ ) { GET_ULONG_LE( RK[i], key, i << 2 ); } switch( ctx->nr ) { case 10: for( i = 0; i < 10; i++, RK += 4 ) { RK[4] = RK[0] ^ RCON[i] ^ ( (unsigned long) FSb[ ( RK[3] >> 8 ) & 0xFF ] ) ^ ( (unsigned long) FSb[ ( RK[3] >> 16 ) & 0xFF ] << 8 ) ^ ( (unsigned long) FSb[ ( RK[3] >> 24 ) & 0xFF ] << 16 ) ^ ( (unsigned long) FSb[ ( RK[3] ) & 0xFF ] << 24 ); RK[5] = RK[1] ^ RK[4]; RK[6] = RK[2] ^ RK[5]; RK[7] = RK[3] ^ RK[6]; } break; case 12: for( i = 0; i < 8; i++, RK += 6 ) { RK[6] = RK[0] ^ RCON[i] ^ ( (unsigned long) FSb[ ( RK[5] >> 8 ) & 0xFF ] ) ^ ( (unsigned long) FSb[ ( RK[5] >> 16 ) & 0xFF ] << 8 ) ^ ( (unsigned long) FSb[ ( RK[5] >> 24 ) & 0xFF ] << 16 ) ^ ( (unsigned long) FSb[ ( RK[5] ) & 0xFF ] << 24 ); RK[7] = RK[1] ^ RK[6]; RK[8] = RK[2] ^ RK[7]; RK[9] = RK[3] ^ RK[8]; RK[10] = RK[4] ^ RK[9]; RK[11] = RK[5] ^ RK[10]; } break; case 14: for( i = 0; i < 7; i++, RK += 8 ) { RK[8] = RK[0] ^ RCON[i] ^ ( (unsigned long) FSb[ ( RK[7] >> 8 ) & 0xFF ] ) ^ ( (unsigned long) FSb[ ( RK[7] >> 16 ) & 0xFF ] << 8 ) ^ ( (unsigned long) FSb[ ( RK[7] >> 24 ) & 0xFF ] << 16 ) ^ ( (unsigned long) FSb[ ( RK[7] ) & 0xFF ] << 24 ); RK[9] = RK[1] ^ RK[8]; RK[10] = RK[2] ^ RK[9]; RK[11] = RK[3] ^ RK[10]; RK[12] = RK[4] ^ ( (unsigned long) FSb[ ( RK[11] ) & 0xFF ] ) ^ ( (unsigned long) FSb[ ( RK[11] >> 8 ) & 0xFF ] << 8 ) ^ ( (unsigned long) FSb[ ( RK[11] >> 16 ) & 0xFF ] << 16 ) ^ ( (unsigned long) FSb[ ( RK[11] >> 24 ) & 0xFF ] << 24 ); RK[13] = RK[5] ^ RK[12]; RK[14] = RK[6] ^ RK[13]; RK[15] = RK[7] ^ RK[14]; } break; default: break; } return( 0 ); } /* * AES key schedule (decryption) */ int aes_setkey_dec( aes_context *ctx, const unsigned char *key, int keysize ) { int i, j; aes_context cty; unsigned long *RK; unsigned long *SK; int ret; switch( keysize ) { case 128: ctx->nr = 10; break; case 192: ctx->nr = 12; break; case 256: ctx->nr = 14; break; default : return( POLARSSL_ERR_AES_INVALID_KEY_LENGTH ); } #if defined(PADLOCK_ALIGN16) ctx->rk = RK = PADLOCK_ALIGN16( ctx->buf ); #else ctx->rk = RK = ctx->buf; #endif ret = aes_setkey_enc( &cty, key, keysize ); if( ret != 0 ) return( ret ); SK = cty.rk + cty.nr * 4; *RK++ = *SK++; *RK++ = *SK++; *RK++ = *SK++; *RK++ = *SK++; for( i = ctx->nr - 1, SK -= 8; i > 0; i--, SK -= 8 ) { for( j = 0; j < 4; j++, SK++ ) { *RK++ = RT0[ FSb[ ( *SK ) & 0xFF ] ] ^ RT1[ FSb[ ( *SK >> 8 ) & 0xFF ] ] ^ RT2[ FSb[ ( *SK >> 16 ) & 0xFF ] ] ^ RT3[ FSb[ ( *SK >> 24 ) & 0xFF ] ]; } } *RK++ = *SK++; *RK++ = *SK++; *RK++ = *SK++; *RK++ = *SK++; memset( &cty, 0, sizeof( aes_context ) ); return( 0 ); } #define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ { \ X0 = *RK++ ^ FT0[ ( Y0 ) & 0xFF ] ^ \ FT1[ ( Y1 >> 8 ) & 0xFF ] ^ \ FT2[ ( Y2 >> 16 ) & 0xFF ] ^ \ FT3[ ( Y3 >> 24 ) & 0xFF ]; \ \ X1 = *RK++ ^ FT0[ ( Y1 ) & 0xFF ] ^ \ FT1[ ( Y2 >> 8 ) & 0xFF ] ^ \ FT2[ ( Y3 >> 16 ) & 0xFF ] ^ \ FT3[ ( Y0 >> 24 ) & 0xFF ]; \ \ X2 = *RK++ ^ FT0[ ( Y2 ) & 0xFF ] ^ \ FT1[ ( Y3 >> 8 ) & 0xFF ] ^ \ FT2[ ( Y0 >> 16 ) & 0xFF ] ^ \ FT3[ ( Y1 >> 24 ) & 0xFF ]; \ \ X3 = *RK++ ^ FT0[ ( Y3 ) & 0xFF ] ^ \ FT1[ ( Y0 >> 8 ) & 0xFF ] ^ \ FT2[ ( Y1 >> 16 ) & 0xFF ] ^ \ FT3[ ( Y2 >> 24 ) & 0xFF ]; \ } #define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ { \ X0 = *RK++ ^ RT0[ ( Y0 ) & 0xFF ] ^ \ RT1[ ( Y3 >> 8 ) & 0xFF ] ^ \ RT2[ ( Y2 >> 16 ) & 0xFF ] ^ \ RT3[ ( Y1 >> 24 ) & 0xFF ]; \ \ X1 = *RK++ ^ RT0[ ( Y1 ) & 0xFF ] ^ \ RT1[ ( Y0 >> 8 ) & 0xFF ] ^ \ RT2[ ( Y3 >> 16 ) & 0xFF ] ^ \ RT3[ ( Y2 >> 24 ) & 0xFF ]; \ \ X2 = *RK++ ^ RT0[ ( Y2 ) & 0xFF ] ^ \ RT1[ ( Y1 >> 8 ) & 0xFF ] ^ \ RT2[ ( Y0 >> 16 ) & 0xFF ] ^ \ RT3[ ( Y3 >> 24 ) & 0xFF ]; \ \ X3 = *RK++ ^ RT0[ ( Y3 ) & 0xFF ] ^ \ RT1[ ( Y2 >> 8 ) & 0xFF ] ^ \ RT2[ ( Y1 >> 16 ) & 0xFF ] ^ \ RT3[ ( Y0 >> 24 ) & 0xFF ]; \ } /* * AES-ECB block encryption/decryption */ int aes_crypt_ecb( aes_context *ctx, int mode, const unsigned char input[16], unsigned char output[16] ) { int i; unsigned long *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3; #if defined(POLARSSL_PADLOCK_C) && defined(POLARSSL_HAVE_X86) if( padlock_supports( PADLOCK_ACE ) ) { if( padlock_xcryptecb( ctx, mode, input, output ) == 0 ) return( 0 ); // If padlock data misaligned, we just fall back to // unaccelerated mode // } #endif RK = ctx->rk; GET_ULONG_LE( X0, input, 0 ); X0 ^= *RK++; GET_ULONG_LE( X1, input, 4 ); X1 ^= *RK++; GET_ULONG_LE( X2, input, 8 ); X2 ^= *RK++; GET_ULONG_LE( X3, input, 12 ); X3 ^= *RK++; if( mode == AES_DECRYPT ) { for( i = (ctx->nr >> 1) - 1; i > 0; i-- ) { AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); AES_RROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 ); } AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); X0 = *RK++ ^ \ ( (unsigned long) RSb[ ( Y0 ) & 0xFF ] ) ^ ( (unsigned long) RSb[ ( Y3 >> 8 ) & 0xFF ] << 8 ) ^ ( (unsigned long) RSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^ ( (unsigned long) RSb[ ( Y1 >> 24 ) & 0xFF ] << 24 ); X1 = *RK++ ^ \ ( (unsigned long) RSb[ ( Y1 ) & 0xFF ] ) ^ ( (unsigned long) RSb[ ( Y0 >> 8 ) & 0xFF ] << 8 ) ^ ( (unsigned long) RSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^ ( (unsigned long) RSb[ ( Y2 >> 24 ) & 0xFF ] << 24 ); X2 = *RK++ ^ \ ( (unsigned long) RSb[ ( Y2 ) & 0xFF ] ) ^ ( (unsigned long) RSb[ ( Y1 >> 8 ) & 0xFF ] << 8 ) ^ ( (unsigned long) RSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^ ( (unsigned long) RSb[ ( Y3 >> 24 ) & 0xFF ] << 24 ); X3 = *RK++ ^ \ ( (unsigned long) RSb[ ( Y3 ) & 0xFF ] ) ^ ( (unsigned long) RSb[ ( Y2 >> 8 ) & 0xFF ] << 8 ) ^ ( (unsigned long) RSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^ ( (unsigned long) RSb[ ( Y0 >> 24 ) & 0xFF ] << 24 ); } else /* AES_ENCRYPT */ { for( i = (ctx->nr >> 1) - 1; i > 0; i-- ) { AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); AES_FROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 ); } AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); X0 = *RK++ ^ \ ( (unsigned long) FSb[ ( Y0 ) & 0xFF ] ) ^ ( (unsigned long) FSb[ ( Y1 >> 8 ) & 0xFF ] << 8 ) ^ ( (unsigned long) FSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^ ( (unsigned long) FSb[ ( Y3 >> 24 ) & 0xFF ] << 24 ); X1 = *RK++ ^ \ ( (unsigned long) FSb[ ( Y1 ) & 0xFF ] ) ^ ( (unsigned long) FSb[ ( Y2 >> 8 ) & 0xFF ] << 8 ) ^ ( (unsigned long) FSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^ ( (unsigned long) FSb[ ( Y0 >> 24 ) & 0xFF ] << 24 ); X2 = *RK++ ^ \ ( (unsigned long) FSb[ ( Y2 ) & 0xFF ] ) ^ ( (unsigned long) FSb[ ( Y3 >> 8 ) & 0xFF ] << 8 ) ^ ( (unsigned long) FSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^ ( (unsigned long) FSb[ ( Y1 >> 24 ) & 0xFF ] << 24 ); X3 = *RK++ ^ \ ( (unsigned long) FSb[ ( Y3 ) & 0xFF ] ) ^ ( (unsigned long) FSb[ ( Y0 >> 8 ) & 0xFF ] << 8 ) ^ ( (unsigned long) FSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^ ( (unsigned long) FSb[ ( Y2 >> 24 ) & 0xFF ] << 24 ); } PUT_ULONG_LE( X0, output, 0 ); PUT_ULONG_LE( X1, output, 4 ); PUT_ULONG_LE( X2, output, 8 ); PUT_ULONG_LE( X3, output, 12 ); return( 0 ); } /* * AES-CBC buffer encryption/decryption */ int aes_crypt_cbc( aes_context *ctx, int mode, long long int length, unsigned char iv[16], const unsigned char *input, unsigned char *output ) { int i; unsigned char temp[16]; if( length % 16 ) return( POLARSSL_ERR_AES_INVALID_INPUT_LENGTH ); #if defined(POLARSSL_PADLOCK_C) && defined(POLARSSL_HAVE_X86) if( padlock_supports( PADLOCK_ACE ) ) { if( padlock_xcryptcbc( ctx, mode, length, iv, input, output ) == 0 ) return( 0 ); // If padlock data misaligned, we just fall back to // unaccelerated mode // } #endif if( mode == AES_DECRYPT ) { while( length > 0 ) { memcpy( temp, input, 16 ); aes_crypt_ecb( ctx, mode, input, output ); for( i = 0; i < 16; i++ ) output[i] = (unsigned char)( output[i] ^ iv[i] ); memcpy( iv, temp, 16 ); input += 16; output += 16; length -= 16; } } else { while( length > 0 ) { for( i = 0; i < 16; i++ ) output[i] = (unsigned char)( input[i] ^ iv[i] ); aes_crypt_ecb( ctx, mode, output, output ); memcpy( iv, output, 16 ); input += 16; output += 16; length -= 16; } } return( 0 ); } lrzip-0.651/aes.h000066400000000000000000000104101421175057200136060ustar00rootroot00000000000000/** * \file aes.h * * Copyright (C) 2011, Con Kolivas * Copyright (C) 2006-2010, Brainspark B.V. * * This file is part of PolarSSL (http://www.polarssl.org) * Lead Maintainer: Paul Bakker * * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef POLARSSL_AES_H #define POLARSSL_AES_H #define AES_ENCRYPT 1 #define AES_DECRYPT 0 #define POLARSSL_ERR_AES_INVALID_KEY_LENGTH -0x0800 #define POLARSSL_ERR_AES_INVALID_INPUT_LENGTH -0x0810 /** * \brief AES context structure */ typedef struct { int nr; /*!< number of rounds */ unsigned long *rk; /*!< AES round keys */ unsigned long buf[68]; /*!< unaligned data */ } aes_context; #ifdef __cplusplus extern "C" { #endif /** * \brief AES key schedule (encryption) * * \param ctx AES context to be initialized * \param key encryption key * \param keysize must be 128, 192 or 256 * * \return 0 if successful, or POLARSSL_ERR_AES_INVALID_KEY_LENGTH */ int aes_setkey_enc( aes_context *ctx, const unsigned char *key, int keysize ); /** * \brief AES key schedule (decryption) * * \param ctx AES context to be initialized * \param key decryption key * \param keysize must be 128, 192 or 256 * * \return 0 if successful, or POLARSSL_ERR_AES_INVALID_KEY_LENGTH */ int aes_setkey_dec( aes_context *ctx, const unsigned char *key, int keysize ); /** * \brief AES-ECB block encryption/decryption * * \param ctx AES context * \param mode AES_ENCRYPT or AES_DECRYPT * \param input 16-byte input block * \param output 16-byte output block * * \return 0 if successful */ int aes_crypt_ecb( aes_context *ctx, int mode, const unsigned char input[16], unsigned char output[16] ); /** * \brief AES-CBC buffer encryption/decryption * Length should be a multiple of the block * size (16 bytes) * * \param ctx AES context * \param mode AES_ENCRYPT or AES_DECRYPT * \param length length of the input data * \param iv initialization vector (updated after use) * \param input buffer holding the input data * \param output buffer holding the output data * * \return 0 if successful, or POLARSSL_ERR_AES_INVALID_INPUT_LENGTH */ int aes_crypt_cbc( aes_context *ctx, int mode, long long int length, unsigned char iv[16], const unsigned char *input, unsigned char *output ); /** * \brief AES-CFB128 buffer encryption/decryption. * * \param ctx AES context * \param mode AES_ENCRYPT or AES_DECRYPT * \param length length of the input data * \param iv_off offset in IV (updated after use) * \param iv initialization vector (updated after use) * \param input buffer holding the input data * \param output buffer holding the output data * * \return 0 if successful */ int aes_crypt_cfb128( aes_context *ctx, int mode, int length, int *iv_off, unsigned char iv[16], const unsigned char *input, unsigned char *output ); /** * \brief Checkup routine * * \return 0 if successful, or 1 if the test failed */ int aes_self_test( int verbose ); #ifdef __cplusplus } #endif #endif /* aes.h */ lrzip-0.651/autogen.sh000077500000000000000000000006501421175057200146730ustar00rootroot00000000000000#!/bin/sh cwd="$PWD" bs_dir="$(dirname $(readlink -f $0))" rm -rf "${bs_dir}"/autom4te.cache rm -f "${bs_dir}"/aclocal.m4 "${bs_dir}"/ltmain.sh echo 'Running autoreconf -if...' autoreconf -if || exit 1 if test -z "$NOCONFIGURE" ; then echo 'Configuring...' cd "${bs_dir}" &> /dev/null test "$?" = "0" || e=1 test "$cwd" != "$bs_dir" && cd "$bs_dir" &> /dev/null ./configure $@ test "$e" = "1" && exit 1 cd "$cwd" fi lrzip-0.651/configure.ac000066400000000000000000000126321421175057200151630ustar00rootroot00000000000000##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_define([v_maj], [0]) m4_define([v_min], [6]) m4_define([v_mic], [51]) ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_define([v_v], m4_join([], v_min, v_mic)) m4_define([v_ver], [v_maj.v_v]) m4_define([lt_rev], m4_eval(v_maj + v_min)) m4_define([lt_cur], v_mic) m4_define([lt_age], v_min) ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## dnl Process this file with autoconf to produce a configure script. AC_INIT([lrzip],[v_ver],[kernel@kolivas.org]) AC_PREREQ([2.59]) AC_CONFIG_SRCDIR([configure.ac]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_HEADERS([config.h]) AM_INIT_AUTOMAKE([1.6 dist-bzip2 foreign subdir-objects]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AC_USE_SYSTEM_EXTENSIONS AC_PROG_LIBTOOL ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## m4_ifdef([v_rev], , [m4_define([v_rev], [0])]) m4_ifdef([v_rel], , [m4_define([v_rel], [])]) AC_DEFINE_UNQUOTED(LRZIP_MAJOR_VERSION, [v_maj], [Major version]) AC_DEFINE_UNQUOTED(LRZIP_MINOR_VERSION, [v_min], [Minor version]) AC_DEFINE_UNQUOTED(LRZIP_MINOR_SUBVERSION, [v_mic], [Micro version]) version_info="lt_rev:lt_cur:lt_age" release_info="v_rel" AC_SUBST(version_info) AC_SUBST(release_info) ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--## VMAJ=v_maj AC_SUBST(VMAJ) AC_CANONICAL_BUILD AC_CANONICAL_HOST dnl Checks for programs. AC_PROG_CC AC_PROG_CXX AC_PROG_INSTALL AC_PROG_LN_S AC_SUBST(SHELL) AC_SYS_LARGEFILE AC_FUNC_FSEEKO AC_FUNC_ALLOCA AC_PROG_CC_C99 AS_IF([test x"$ac_cv_prog_cc_c99" = x"no"], AC_MSG_ERROR([C compiler does not support C99], 1)) AC_CHECK_PROG([HAVE_POD2MAN], [pod2man], [yes]) AS_IF([test "$HAVE_POD2MAN" != "yes"], AC_MSG_FAILURE([pod2man is needed to generate manual from POD])) AC_ARG_ENABLE( asm, [AC_HELP_STRING([--enable-asm],[Enable native Assembly code])], ASM=$enableval, ASM=yes ) if test x"$ASM" = x"yes"; then AC_CHECK_PROG( ASM_PROG, nasm, nasm, no ) # fix to set ASM_PROG to nasm, not yes. if test x"$ASM_PROG" = x"no "; then ASM=no fi fi static=no AC_ARG_ENABLE([static-bin], [AC_HELP_STRING([--enable-static-bin],[Build statically linked binary @<:@default=no@:>@])], [static=$enableval] ) AM_CONDITIONAL([STATIC], [test x"$static" = x"yes"]) AC_CHECK_HEADERS(fcntl.h sys/time.h unistd.h sys/mman.h) AC_CHECK_HEADERS(ctype.h errno.h sys/resource.h) AC_CHECK_HEADERS(endian.h sys/endian.h arpa/inet.h) AC_CHECK_HEADERS(alloca.h pthread.h) AC_TYPE_OFF_T AC_TYPE_SIZE_T AC_C___ATTRIBUTE__ AC_CHECK_SIZEOF(int) AC_CHECK_SIZEOF(long) AC_CHECK_SIZEOF(short) AC_CACHE_CHECK([for large file support],rzip_cv_HAVE_LARGE_FILES,[ AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include #include main() { return (sizeof(off_t) == 4); }]])],[rzip_cv_HAVE_LARGE_FILES=yes],[rzip_cv_HAVE_LARGE_FILES=no],[rzip_cv_HAVE_LARGE_FILES=cross])]) if test x"$rzip_cv_HAVE_LARGE_FILES" = x"yes"; then AC_DEFINE(HAVE_LARGE_FILES, 1, [ ]) fi AC_C_INLINE AC_C_BIGENDIAN AC_CHECK_LIB(pthread, pthread_create, , AC_MSG_ERROR([Could not find pthread library - please install libpthread])) AC_CHECK_LIB(m, sqrt, , AC_MSG_ERROR([Could not find math library - please install libm])) AC_CHECK_LIB(z, compress2, , AC_MSG_ERROR([Could not find zlib library - please install zlib-dev])) AC_CHECK_LIB(bz2, BZ2_bzBuffToBuffCompress, , AC_MSG_ERROR([Could not find bz2 library - please install libbz2-dev])) AC_CHECK_LIB(lzo2, lzo1x_1_compress, , AC_MSG_ERROR([Could not find lzo2 library - please install liblzo2-dev])) AC_CHECK_LIB(lz4, LZ4_compress_default, , AC_MSG_ERROR([Could not find lz4 library - please install liblz4-dev])) AC_CHECK_FUNCS(mmap strerror) AC_CHECK_FUNCS(getopt_long) AX_PTHREAD LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" CXXFLAGS="$CXXFLAGS $PTHREAD_CXXFLAGS" # final checks for assembler # ASM is back for x86_64 by using newer CRC code from p7zip-16.02 # object files handled in lzma/C/Makefile.am if test x"$ASM" = x"yes"; then ASM_OPT="-I../ASM/x86/" case $host in i?86-*) ASM_OPT="$ASM_OPT -g -f elf" ;; x86_64-*) ASM_OPT="$ASM_OPT -Dx64 -g -f elf64" ;; *) ASM_OPT= ;; esac else ASM_OPT= fi AM_CONDITIONAL([USE_ASM], [test x"$ASM" = x"yes"]) AC_SUBST([ASM_OPT]) AC_SUBST([ASM_CMD]) EFL_CHECK_DOXYGEN([build_doc="yes"], [build_doc="no"]) AC_CONFIG_FILES([ Makefile lzma/Makefile lzma/C/Makefile lzma/ASM/x86/Makefile doc/Makefile man/Makefile ]) AC_OUTPUT echo echo echo echo "------------------------------------------------------------------------" echo "$PACKAGE $VERSION" echo "------------------------------------------------------------------------" echo echo echo "Configuration Options Summary:" echo echo " ASM................: $ASM" echo " Static binary......: $static" echo echo "Documentation..........: ${build_doc}" echo echo "Compilation............: make (or gmake)" echo " CPPFLAGS.............: $CPPFLAGS" echo " CFLAGS...............: $CFLAGS" echo " CXXFLAGS.............: $CXXFLAGS" echo " LDFLAGS..............: $LDFLAGS" echo echo "Installation...........: make install (as root if needed, with 'su' or 'sudo')" echo " prefix...............: $prefix" echo lrzip-0.651/decompress_demo.c000066400000000000000000000027201421175057200162060ustar00rootroot00000000000000/* Copyright (C) 2012 Con Kolivas This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #undef NDEBUG #include #include #include #include #include static const char *suffix_me(const char *file) { const char *p; static char buf[4096]; p = strrchr(file, '.'); if (p && (strlen(p + 1) < 4)) strncat(buf, file, p - file); else strcat(buf, file); return &buf[0]; } int main(int argc, char *argv[]) { Lrzip *lr; if ((argc != 2) && (argc != 3)) { fprintf(stderr, "Usage: %s file [outfile]\n", argv[0]); exit(1); } lr = lrzip_new(LRZIP_MODE_DECOMPRESS); assert(lr); lrzip_config_env(lr); assert(lrzip_filename_add(lr, argv[1])); if (argc == 2) lrzip_outfilename_set(lr, suffix_me(argv[1])); else lrzip_outfilename_set(lr, argv[2]); assert(lrzip_run(lr)); return 0; } lrzip-0.651/description-pak000066400000000000000000000000061421175057200157040ustar00rootroot00000000000000lrzip lrzip-0.651/doc/000077500000000000000000000000001421175057200134365ustar00rootroot00000000000000lrzip-0.651/doc/Doxyfile000066400000000000000000000141131421175057200151440ustar00rootroot00000000000000DOXYFILE_ENCODING = UTF-8 PROJECT_NAME = Lrzip PROJECT_NUMBER = OUTPUT_DIRECTORY = . CREATE_SUBDIRS = NO OUTPUT_LANGUAGE = English BRIEF_MEMBER_DESC = YES REPEAT_BRIEF = YES ABBREVIATE_BRIEF = ALWAYS_DETAILED_SEC = NO INLINE_INHERITED_MEMB = NO FULL_PATH_NAMES = NO STRIP_FROM_PATH = STRIP_FROM_INC_PATH = SHORT_NAMES = NO JAVADOC_AUTOBRIEF = YES QT_AUTOBRIEF = NO MULTILINE_CPP_IS_BRIEF = NO INHERIT_DOCS = YES SEPARATE_MEMBER_PAGES = NO TAB_SIZE = 2 ALIASES = OPTIMIZE_OUTPUT_FOR_C = YES OPTIMIZE_OUTPUT_JAVA = NO OPTIMIZE_FOR_FORTRAN = NO OPTIMIZE_OUTPUT_VHDL = NO EXTENSION_MAPPING = BUILTIN_STL_SUPPORT = NO CPP_CLI_SUPPORT = NO SIP_SUPPORT = NO IDL_PROPERTY_SUPPORT = YES DISTRIBUTE_GROUP_DOC = NO SUBGROUPING = YES TYPEDEF_HIDES_STRUCT = NO SYMBOL_CACHE_SIZE = 0 EXTRACT_ALL = NO EXTRACT_PRIVATE = NO EXTRACT_STATIC = NO EXTRACT_LOCAL_CLASSES = NO EXTRACT_LOCAL_METHODS = NO EXTRACT_ANON_NSPACES = NO HIDE_UNDOC_MEMBERS = YES HIDE_UNDOC_CLASSES = YES HIDE_FRIEND_COMPOUNDS = YES HIDE_IN_BODY_DOCS = NO INTERNAL_DOCS = NO CASE_SENSE_NAMES = YES HIDE_SCOPE_NAMES = NO SHOW_INCLUDE_FILES = NO FORCE_LOCAL_INCLUDES = NO INLINE_INFO = YES SORT_MEMBER_DOCS = YES SORT_BRIEF_DOCS = NO SORT_MEMBERS_CTORS_1ST = NO SORT_GROUP_NAMES = NO SORT_BY_SCOPE_NAME = NO GENERATE_TODOLIST = YES GENERATE_TESTLIST = YES GENERATE_BUGLIST = YES GENERATE_DEPRECATEDLIST= YES ENABLED_SECTIONS = MAX_INITIALIZER_LINES = 30 SHOW_USED_FILES = NO SHOW_DIRECTORIES = NO SHOW_FILES = YES SHOW_NAMESPACES = YES FILE_VERSION_FILTER = LAYOUT_FILE = QUIET = NO WARNINGS = YES WARN_IF_UNDOCUMENTED = YES WARN_IF_DOC_ERROR = YES WARN_NO_PARAMDOC = NO WARN_FORMAT = "$file:$line: $text" WARN_LOGFILE = INPUT = ../Lrzip.h INPUT_ENCODING = UTF-8 FILE_PATTERNS = RECURSIVE = YES EXCLUDE = EXCLUDE_SYMLINKS = NO EXCLUDE_PATTERNS = */extras/* *private* EXCLUDE_SYMBOLS = EXAMPLE_PATH = EXAMPLE_PATTERNS = EXAMPLE_RECURSIVE = YES IMAGE_PATH = INPUT_FILTER = FILTER_PATTERNS = FILTER_SOURCE_FILES = NO SOURCE_BROWSER = NO INLINE_SOURCES = NO STRIP_CODE_COMMENTS = YES REFERENCED_BY_RELATION = YES REFERENCES_RELATION = YES REFERENCES_LINK_SOURCE = YES USE_HTAGS = NO VERBATIM_HEADERS = NO ALPHABETICAL_INDEX = YES COLS_IN_ALPHA_INDEX = 2 IGNORE_PREFIX = GENERATE_HTML = YES HTML_OUTPUT = html HTML_FILE_EXTENSION = .html HTML_TIMESTAMP = YES HTML_ALIGN_MEMBERS = YES HTML_DYNAMIC_SECTIONS = NO GENERATE_DOCSET = NO DOCSET_FEEDNAME = "Doxygen generated docs" DOCSET_BUNDLE_ID = org.doxygen.Project DOCSET_PUBLISHER_ID = org.doxygen.Publisher DOCSET_PUBLISHER_NAME = Publisher GENERATE_HTMLHELP = NO CHM_FILE = HHC_LOCATION = GENERATE_CHI = NO CHM_INDEX_ENCODING = BINARY_TOC = NO TOC_EXPAND = NO GENERATE_QHP = NO QCH_FILE = QHP_NAMESPACE = org.doxygen.Project QHP_VIRTUAL_FOLDER = doc QHP_CUST_FILTER_NAME = QHP_CUST_FILTER_ATTRS = QHP_SECT_FILTER_ATTRS = QHG_LOCATION = GENERATE_ECLIPSEHELP = NO ECLIPSE_DOC_ID = org.doxygen.Project DISABLE_INDEX = YES ENUM_VALUES_PER_LINE = 1 GENERATE_TREEVIEW = NO USE_INLINE_TREES = NO TREEVIEW_WIDTH = 250 EXT_LINKS_IN_WINDOW = NO FORMULA_FONTSIZE = 10 FORMULA_TRANSPARENT = YES USE_MATHJAX = NO MATHJAX_RELPATH = http://www.mathjax.org/mathjax SEARCHENGINE = NO SERVER_BASED_SEARCH = NO GENERATE_LATEX = YES LATEX_OUTPUT = latex LATEX_CMD_NAME = latex MAKEINDEX_CMD_NAME = makeindex COMPACT_LATEX = NO PAPER_TYPE = a4wide EXTRA_PACKAGES = LATEX_HEADER = PDF_HYPERLINKS = YES USE_PDFLATEX = NO LATEX_BATCHMODE = NO LATEX_HIDE_INDICES = NO LATEX_SOURCE_CODE = NO GENERATE_RTF = NO RTF_OUTPUT = rtf COMPACT_RTF = NO RTF_HYPERLINKS = NO RTF_STYLESHEET_FILE = RTF_EXTENSIONS_FILE = GENERATE_MAN = YES MAN_OUTPUT = man MAN_EXTENSION = .3 MAN_LINKS = YES GENERATE_XML = NO XML_OUTPUT = xml XML_SCHEMA = XML_DTD = XML_PROGRAMLISTING = YES GENERATE_AUTOGEN_DEF = NO GENERATE_PERLMOD = NO PERLMOD_LATEX = NO PERLMOD_PRETTY = YES PERLMOD_MAKEVAR_PREFIX = ENABLE_PREPROCESSING = YES MACRO_EXPANSION = YES EXPAND_ONLY_PREDEF = NO SEARCH_INCLUDES = YES INCLUDE_PATH = INCLUDE_FILE_PATTERNS = PREDEFINED = EXPAND_AS_DEFINED = SKIP_FUNCTION_MACROS = YES TAGFILES = GENERATE_TAGFILE = ALLEXTERNALS = NO EXTERNAL_GROUPS = YES PERL_PATH = /usr/bin/perl CLASS_DIAGRAMS = NO MSCGEN_PATH = HIDE_UNDOC_RELATIONS = YES HAVE_DOT = NO DOT_NUM_THREADS = 0 DOT_FONTNAME = FreeSans.ttf DOT_FONTSIZE = 10 DOT_FONTPATH = CLASS_GRAPH = NO COLLABORATION_GRAPH = NO GROUP_GRAPHS = YES UML_LOOK = NO TEMPLATE_RELATIONS = NO INCLUDE_GRAPH = NO INCLUDED_BY_GRAPH = NO CALL_GRAPH = NO CALLER_GRAPH = NO GRAPHICAL_HIERARCHY = NO DIRECTORY_GRAPH = YES DOT_IMAGE_FORMAT = png DOT_PATH = DOTFILE_DIRS = MSCFILE_DIRS = DOT_GRAPH_MAX_NODES = 50 MAX_DOT_GRAPH_DEPTH = 0 DOT_TRANSPARENT = NO DOT_MULTI_TARGETS = NO GENERATE_LEGEND = YES DOT_CLEANUP = YES lrzip-0.651/doc/Makefile.am000066400000000000000000000013721421175057200154750ustar00rootroot00000000000000MAINTAINERCLEANFILES = Makefile.in dist_doc_DATA = \ README.Assembler \ README.benchmarks \ README.lzo_compresses.test.txt \ magic.header.txt \ lrzip.conf.example PACKAGE_DOCNAME = $(PACKAGE_TARNAME)-$(PACKAGE_VERSION)-doc .PHONY: doc if EFL_BUILD_DOC doc-clean: rm -rf html/ latex/ man/ xml/ $(PACKAGE_DOCNAME).tar* doc: all doc-clean $(efl_doxygen) rm -rf $(PACKAGE_DOCNAME).tar* mkdir -p $(PACKAGE_DOCNAME)/doc cp -R html/ latex/ man/ $(PACKAGE_DOCNAME)/doc tar cf $(PACKAGE_DOCNAME).tar $(PACKAGE_DOCNAME)/ bzip2 -9 $(PACKAGE_DOCNAME).tar rm -rf $(PACKAGE_DOCNAME)/ mv $(PACKAGE_DOCNAME).tar.bz2 $(top_srcdir) clean-local: doc-clean else doc: @echo "Documentation not built. Run ./configure --help" endif EXTRA_DIST = Doxyfile lrzip-0.651/doc/README.Assembler000066400000000000000000000025251421175057200162360ustar00rootroot00000000000000README.Assembler Update November 2019 Assembler is enabled by ./configure --enable-asm and disabled by ./configure --disable-asm not ASM=no ./configure New files replace 32 and 64 bit assembler code. fixes to lzma/C/Makefile.am permit libtool linking. Original text follows. ========================== Notes about CRC Assembly Language Coding. lrzip-0.21 makes use of an x86 assembly language file that optimizes CRC computation used in lrzip. It includes a wrapper C file, 7zCrcT8.c and the assembler code, 7zCrcT8U.s. configure should detect your host system properly and adjust the Makefile accordingly. If you don't have the nasm assembler or have a ppc or other non- x86 system, the standard C CRC routines will be compiled and linked in. If for any reason configure does not properly detect your system type, or you do not want assembler modules to be compiled, you can run ASM=no ./configure which will automatically not include the asm module or change the line ASM_OBJ=7zCrcT8.o 7zCrcT8U.o to ASM_OBJ=7zCrc.o in Makefile. This will change the dependency tree. To force assembly module compilation and linking (if configure does not detect your system type properly), type ASM=yes ./configure or change the Makefile to include the ASM_OBJ files as described above. Type `make clean' and then re-run make. Peter Hyman pete@peterhyman.com lrzip-0.651/doc/README.benchmarks000066400000000000000000000152501421175057200164350ustar00rootroot00000000000000The first comparison is that of a linux kernel tarball (2.6.37). In all cases the default options were used. 4 other common compression apps were used for comparison, 7z which is an excellent all-round lzma based compression app, gzip which is the benchmark fast standard that has good compression, and bzip2 which is the most common linux used compression. xz was included for completeness. In the following tables, lrzip means lrzip default options, lrzip -l means lrzip using the lzo backend, lrzip -g means using the gzip backend, lrzip -b means using the bzip2 backend and lrzip -z means using the zpaq backend. linux-2.6.37.tar These are benchmarks performed on a 3GHz quad core Intel Core2 with 8GB ram using lrzip v0.612 on an SSD drive. Compression Size Percentage Compress Decompress None 430612480 100 7z 63636839 14.8 2m28s 0m6.6s xz 63291156 14.7 4m02s 0m8.7 lrzip 64561485 14.9 1m12s 0m4.3s lrzip -z 51588423 12.0 2m02s 2m08s lrzip -l 137515997 31.9 0m14s 0m2.7s lrzip -g 86142459 20.0 0m17s 0m3.0s lrzip -b 72103197 16.7 0m21s 0m6.5s bzip2 74060625 17.2 0m48s 0m12.8s gzip 94512561 21.9 0m17s 0m4.0s These results are interesting to note the compression of lrzip by default is about the same as 7z, but it's significantly faster thanks to its heavily multithreaded nature. Zpaq offers by far the best compression but at the cost of extra time. However with the heavily threaded nature of lrzip, it's not a lot longer given how much better its compression is. It's actually faster than xz on compression on a quad core machine. Let's take six kernel trees one version apart as a tarball, linux-2.6.31 to linux-2.6.36. These will show lots of redundant information, but hundreds of megabytes apart, which lrzip will be very good at compressing. For simplicity, only 7z will be compared since that's by far the best general purpose compressor at the moment: These are benchmarks performed on a 2.53Ghz dual core Intel Core2 with 4GB ram using lrzip v0.5.1. Note that it was running with a 32 bit userspace so only 2GB addressing was possible. However the benchmark was run with the -U option allowing the whole file to be treated as one large compression window. Tarball of 6 consecutive kernel trees. Compression Size Percentage Compress Decompress None 2373713920 100 7z 344088002 14.5 17m26s 1m22s lrzip 104874109 4.4 11m37s 56s lrzip -l 223130711 9.4 05m21s 1m01s lrzip -U 73356070 3.1 08m53s 43s lrzip -Ul 158851141 6.7 04m31s 35s lrzip -Uz 62614573 2.6 24m42s 25m30s Things start getting very interesting now when lrzip is really starting to shine. Note how it's not that much larger for 6 kernel trees than it was for one. That's because all the similar data in both kernel trees is being compressed as one copy and only the differences really make up the extra size. All compression software does this, but not over such large distances. If you copy the same data over multiple times, the resulting lrzip archive doesn't get much larger at all. You might find this example interesting because the -U option is actually faster as well as providing better compression. The reason is that the window is not much larger than the amount of ram addressable (2GB), and it compresses so much more in the rzip stage that it makes up the time by not needing to compress anywhere near as much data with the backend compressor. Using the first example (linux-2.6.31.tar) and simply copying the data multiple times over gives these results with lrzip(lzo): Copies Size Compressed Compress Decompress 1 365711360 112151676 0m14.9s 0m5.1s 2 731422720 112151829 0m16.2s 0m6.5s 3 1097134080 112151832 0m17.5s 0m8.1s I had the amusing thought that this compression software could be used as a bullshit detector if you were to compress people's speeches because if their talks were full of catchphrases and not much actual content, it would all be compressed down. So the larger the final archive, the less bullshit =) Now let's move on to the other special feature of lrzip, the ability to compress massive amounts of data on huge ram machines by using massive compression windows. This is a 10GB virtual image of an installed operating system and some basic working software on it. The default options on the 8GB machine meant that it was using a 5 GB window. 10GB Virtual image: These benchmarks were done on the quad core with version 0.612 Compression Size Percentage Compress Time Decompress Time None 10737418240 100.0 gzip 2772899756 25.8 05m47s 2m46s bzip2 2704781700 25.2 16m15s 6m19s xz 2272322208 21.2 50m58s 3m52s 7z 2242897134 20.9 26m36s 5m41s lrzip 1372218189 12.8 10m23s 2m53s lrzip -U 1095735108 10.2 08m44s 2m45s lrzip -l 1831894161 17.1 04m53s 2m37s lrzip -lU 1414959433 13.2 04m48s 2m38s lrzip -zU 1067169419 9.9 39m32s 39m46s At this end of the spectrum things really start to heat up. The compression advantage is massive, with the lzo backend even giving much better results than 7z, and over a ridiculously short time. The improvements in version 0.530 in scalability with multiple CPUs has a huge impact on compression time here, with zpaq almost being faster on quad core than xz is, yet producing a file less than half the size. What appears to be a big disappointment is actually zpaq here which takes more than 4 times longer than r/lzma for a measly .3% improvement. The reason is that most of the advantage here is achieved by the rzip first stage since there's a lot of redundant space over huge distances on a virtual image. The -U option which works the memory subsystem rather hard making noticeable impact on the rest of the machine also does further wonders for the compression (virtually always) and even the times in this particular case. Finally testing the same 10GB image on a i7-3930K at 3.2GHz (12 thread CPU!) with 32GB of ram so the whole image fits in ram with a fast SSD: Compression Size Percentage Compress Time Decompress Time None 10737418240 100.0 gzip 2772899756 25.8 3m56s 2m15s pbzip2 2705814394 25.2 1m41s 1m46s lrzip 1095337763 10.2 2m54s 2m21s Note that with enough ram and CPU, lrzip is actually faster than gzip (which does compression in place) and comparable on decompression, despite a huge increase in compression. pbzip2 is faster than both but its compression is almost no better than gzip. This should help govern what compression you choose. Small files are nicely compressed with zpaq. Intermediate files are nicely compressed with lzma. Large files get excellent results even with lzo provided you have enough ram. (Small being < 100MB, intermediate <1GB, large >1GB). Or, to make things easier, just use the default settings all the time and be happy as lzma gives good results. :D Con Kolivas Saturday, 7th July 2012 lrzip-0.651/doc/README.lzo_compresses.test.txt000066400000000000000000000131231421175057200211600ustar00rootroot00000000000000An explanation of the revised lzo_compresses function in stream.c. The modifications to the lrzip program for 0.19 centered around an attempt to catch data chunks that would cause lzma compression to either take an inordinately long time or not complete at all. The files that could cause problems for lzma are already-compressed files, multimedia files, files that have compressed files in them, and files with randomized data (such as an encrypted volume or file). The lzo_compresses function is used to assess the data and return a TRUE or FALSE to the lzma_compress_buf function based on whether or not the function determined the data to be compressible or not. The simple formula cdata < odata was used (c=compressed, o=original). Some test cases were slipping through and caused the hangups. Beginning with lrzip-0.19 a new option, -T, test compression threshold has been introduced and sets configurable limits as to what is considered a compressible data chunk and what is not. In addition, with very large chunks of data, a small modification was made to the initial test buffer size to make it more representative of the entire sample. To go along with this, increased verbosity was added to the function so that the user/evaluator can better see what is going on. -v or -vv can be used to increase informational output. Functional Overview: Data chunks are passed to the lzo_copresses function in two streams. The first is the small data set in the primary hashing bucket which can be seen when using the -v or -vv option. This is normally a small sample. The second stream will be the rest. The size of the streams are dependent on how the long range analysis that is performed on the entire file and available memory. After analysis of the data chunk, a value of TRUE or FALSE is returned and lzma compression will either commence or be skipped. If skipped, data written out to the .lrz file will simply be the rzip data which is the reorganized data based on long range analysis. The lzo_compresses function traverses through the data chunk comparing larger and larger blocks. If suitable compression ratios are found, the function ends and returns TRUE. If not, and the largest sample block size has been reached, the function will traverse deeper into the chunk and analyze that region. Anytime a compressible area is found, the function returns TRUE. When the end of the data chunk has been reached and no suitable compressible blocks found, the program will return FALSE. Under most circumstances, this logic was fine. However, if the test found a chunk that could only achieve 2% compression, for example, this type of result could adversely affect the lzma compression routine. Hence, the concept of a limiting threshold. The threshold option works as a limiter that forces the lzo_compresses function to not just compare the estimated compressed size with the original, but to add a limiting threshold. This ranges a very low threshold, 1, to a very strict, 10. A threshold of 1 means that for the function to return TRUE, the estimated compressed data size for the current data chunk can be between 90-100% of the original size. This means that almost no compressible data is observed or tested for. A value of 2, means that the data MUST compress better than 90% of the original size. However, if the observed compression of the data chunk is over 90% of the original size, then lzo_compresses will fail. Each additional threshold value will increase the strictness according to the following formula CDS = Observed Compressed Data Size from LZO ODS = Original Data chunk size T = Threshold To return TRUE, CDS < ODS * (1.1-T/10) At T=1, just 0.01% compression would be OK, T=2, anything better than 10% would be OK, but under 10% compression would fail. T=3, anything better 20% would be OK, but under 20% compression would fail. ... T=10, I can't imagine a use for this. Anything better than 90% compression would be OK. This would imply that LZO would need to get a 10x compression ratio. The following actual output from the lzo_compresses function will help explain. 22501 in primary bucket (0.805%) lzo testing for incompressible data...OK for chunk 43408. Compressed size = 52.58% of chunk, 1 Passes Progress percentage pausing during lzma compression... lzo testing for incompressible data...FAILED - below threshold for chunk 523245383. Compressed size = 98.87% of chunk, 50 Passes This was for a video .VOB file of 1GB. A compression threshold of 2 was used. -T 2 means that the estimated compression size of the data chunk had to be better than 90% of the original size. There were 43,408 bytes in the primary hash bucket and this chunk was evaluated by lzo_compresses. The function estimated that the compressed data size would be 52.58% of the original 43,408 byte chunk. This resulted in LZMA compression occurring. The second data chunk which included the rest of the data in the current hash, 523,245,383 bytes, failed the test. the lzo_compresses function made 50 passes through the data using progressively larger samples until it reached the end of the data chunk. It could not find better than a 1.2% compression benefit and therefore FAILED, The result was NO LZMA compression and the data chunk was written to the .lrz file in rzip format (no compression). The higher the threshold option, the faster the LZMA compression will occur. However, this could also cause some chunks that are compressible to be omitted. After much testing, -T 2 seems to work very well in stopping data which will cause LZMA to hang yet allow most compressible data to come through. Peter Hyman pete@peterhyman.com December 2007 lrzip-0.651/doc/lrzip.conf.example000066400000000000000000000032261421175057200171020ustar00rootroot00000000000000# lrzip.conf example file # anything beginning with a # or whitespace will be ignored # valid parameters are separated with an = and a value # parameters and values are not case sensitive except where specified # # lrzip 0.24+, peter hyman, pete@peterhyman.com # ignored by earlier versions. # Compression Window size in 100MB. Normally selected by program. (-w) # WINDOW = 20 # Compression Level 1-9 (7 Default). (-L) # COMPRESSIONLEVEL = 7 # Use -U setting, Unlimited ram. Yes or No # UNLIMITED = NO # Compression Method, rzip, gzip, bzip2, lzo, or lzma (default), or zpaq. (-n -g -b -l --lzma -z) # May be overridden by command line compression choice. # COMPRESSIONMETHOD = lzma # Perform LZO Test. Default = YES (-T ) # LZOTEST = NO # Hash Check on decompression, (-c) # HASHCHECK = YES # Show HASH value on Compression even if Verbose is off, YES (-H) # SHOWHASH = YES # Default output directory (-O) # OUTPUTDIRECTORY = location # Verbosity, YES or MAX (v, vv) # VERBOSITY = max # Show Progress as file is parsed, YES or no (NO = -q option) # SHOWPROGRESS = YES # Set Niceness. 19 is default. -20 to 19 is the allowable range (-N) # NICE = 19 # Keep broken or damaged output files, YES (-K) # KEEPBROKEN = YES # Delete source file after compression (-D) # this parameter and value are case sensitive # value must be YES to activate # DELETEFILES = NO # Replace existing lrzip file when compressing (-f) # this parameter and value are case sensitive # value must be YES to activate # REPLACEFILE = YES # Override for Temporary Directory. Only valid when stdin/out or Test is used # TMPDIR = /tmp # Whether to use encryption on compression YES, NO (-e) # ENCRYPT = NO lrzip-0.651/doc/magic.header.txt000066400000000000000000000050671421175057200165160ustar00rootroot00000000000000lrzip-0.6x file format March 2011 Con Kolivas Byte Content 0-23 Magic --- 24+ Rzip Chunk Data (RCD) RCD+ Data blocks --- repeat (end-MD5_DIGEST_SIZE)->(end) md5 hash Magic data: 0->3 LRZI 4 LRZIP Major Version Number 5 LRZIP Minor Version Number 6->14 Source File Size or 0 if unknown, or salt in encrypted file 16->20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size) 21 1 = md5sum hash is stored at the end of the archive 22 1 = data is encrypted with sha512/aes128 23 Unused Encrypted salt (bytes 6->14 in magic if encrypted): 0->1 Encoded number of loops to hash password 2->7 Random data (RCD0 is set to 8 bytes always on encrypted files) Rzip Chunk Data: 0 Data offsets byte width (meaning length is < (2 * 8)^RCD0) 1 Flag that there is no chunk beyond this (RCD0 bytes) Chunk decompressed size (not stored in encrypted file) XX Stream 0 header data XX Stream 1 header data Stream Header Data: Byte: 0 Compressed data type (RCD0 bytes) Compressed data length (RCD0 bytes) Uncompressed data length (RCD0 bytes) Next block head Data blocks: 0->(end-2) data (end-1)->end crc data lrzip-0.5x file format March 2011 Con Kolivas Byte Content 0->23 Magic -- 24->74 Rzip chunk data 75+ Data blocks -- repeat (end-MD5_DIGEST_SIZE)->(end) md5 hash Magic data: 0->3 LRZI 4 LRZIP Major Version Number 5 LRZIP Minor Version Number 6->14 Source File Size 16->20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size) 21 Flag that md5sum hash is stored at the end of the archive 22-23 not used Rzip chunk data: 0 Data offsets byte width 1-25 Stream 0 header data 26-50 Stream 1 header data Stream Header Data: Byte: 0 Compressed data type 1-8 Compressed data length 9-16 Uncompressed data length 17-24 Next block head Data blocks: 0->(end-2) data (end-1)->end crc data lrzip-0.40+ file header format November 2009 Con Kolivas Byte Content 0-3 LRZI 4 LRZIP Major Version Number 5 LRZIP Minor Version Number 6-14 Source File Size 16-20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size) 21-24 not used 24-48 Stream 1 header data 49-74 Stream 2 header data Block Data: Byte: 0 Compressed data type 1-8 Compressed data length 9-16 Uncompressed data length 17-24 Next block head 25+ Data End: 0-1 crc data lrzip-0.24+ file header format January 2009 Peter Hyman, pete@peterhyman.com Byte Content 0-3 LRZI 4 LRZIP Major Version Number 5 LRZIP Minor Version Number 6-9 Source File Size (no HAVE_LARGE_FILES) 6-14 Source File Size 16-20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size) 21-23 not used 24-36 Stream 1 header data 37-50 Stream 2 header data 51 Compressed data type lrzip-0.651/libzpaq/000077500000000000000000000000001421175057200143335ustar00rootroot00000000000000lrzip-0.651/libzpaq/libzpaq.3.pod000066400000000000000000000606401421175057200166500ustar00rootroot00000000000000# Documentation for libzpaq # # Copyright (C) 2012, Dell Inc. Written by Matt Mahoney. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so without restriction. # This Software is provided "as is" without warranty. # # To create man page: pod2man libzpaq.3.pod > libzpaq.3 # To create HTML documentation: pod2html libzpaq.3.pod > libzpaq.html =pod =head1 NAME libzpaq - ZPAQ compression API =head1 SYNOPSIS #include "libzpaq.h" namespace libzpaq { extern void error(const char* msg); class Reader { public: virtual int get() = 0; virtual int read(char* buf, int n); // optional virtual ~Reader() {} }; class Writer { public: virtual void put(int c) = 0; virtual void write(const char* buf, int n); // optional virtual ~Writer() {} }; class SHA1 { public: SHA1(); void put(int c); double size() const; uint64_t usize() const const char* result(); }; class Compressor { public: Compressor(); void setOutput(Writer* out); void writeTag(); void startBlock(int level); void startBlock(const char* hcomp); void startSegment(const char* filename = 0, const char* comment = 0); void setInput(Reader* i); void postProcess(const char* pcomp = 0, int length = 0); bool compress(int n = -1); void endSegment(const char* sha1string = 0); void endBlock(); }; class Decompresser { public: Decompresser(); void setInput(Reader* in); bool findBlock(double* memptr = 0); void hcomp(Writer* out); bool findFilename(Writer* = 0); void readComment(Writer* = 0); void setOutput(Writer* out); void setSHA1(SHA1* sha1ptr); bool decompress(int n = -1); bool pcomp(Writer* out); void readSegmentEnd(char* sha1string = 0); }; void compress(Reader* in, Writer* out, int level); void decompress(Reader* in, Writer* out); } =head1 DESCRIPTION I is a C++ API for compressing or decompressing files or objects in memory comforming to the ZPAQ level 1 and 2 standards (see I). This document describes version 5.00 of the software. The software may be used without restriction under a modified MIT license. ZPAQ provides a high level of data compression in a streaming (single pass) self-describing format that supports single or multiple named objects (such as archives) with optional integrity checking. The library provides 3 default compression levels but supports custom algorithms. The performance of the default levels is shown in the table below for the 14 file Calgary corpus as a tar file. Compression and decompression times are in seconds on a 2 GHz T3200 running on one of two cores. Memory required to compress or decompress is in MB. Some popular formats are shown for comparison. Program Format Size Time (C, D) Memory ----------- ------ --------- ----------- ------ Uncompresed .tar 3,152,896 compress .tar.Z 1,319,521 1.6 0.2 .1 MB gzip -9 .tar.gz 1,022,810 0.7 0.1 .1 MB bzip2 -9 .tar.bz2 860,097 0.6 0.4 5 MB 7zip .tar.7z 824,573 1.5 0.1 195 MB zpaq 1 (fast) .tar.zpaq 806,959 2 2 38 MB zpaq 2 (mid) .tar.zpaq 699,191 8 8 112 MB zpaq 3 (max) .tar.zpaq 644,190 20 20 246 MB A ZPAQ stream consists of one or more blocks, possibly mixed with other data, that can be decompressed independently in any order. Each block consists of one or more segments that must be decompressed in order from the beginning of the block. Each block header contains a description of the decompression algorithm. Each segment consists of an optional filename string, an optional comment string, self delimiting compressed data, and an optional SHA-1 checksum. If ZPAQ blocks are mixed with other data, they must be preceded by an identifying 13 byte tag which does not otherwise appear in that data. ZPAQ compression is based on the PAQ context mixing model. An array of components predict the probability of the next bit of input, either independently or depending on the predictions of earlier components. The final prediction is arithmetic coded. Each component inputs a context computed from earlier input by a program written in ZPAQL byte code which runs on a virtual machine. Both the component array description and the ZPAQL code are encoded in a string called HCOMP in each block header. Data can also be stored uncompressed. A block may optionally specify a post-processor, a program (also in ZPAQL) which takes the decoded data as input and outputs the decompressed output. This program, if present, is encoded as a string called PCOMP which is compressed in the first segment prior to the compressed data. The first decoded byte from the first segment is a flag indicating whether a PCOMP string is present. The user is responsible for correctly pre-processing the data so that post-processing restores the original data. =head2 API Organization The I API consists of 2 files. =over =item libzpaq.h Header file to include in your application. =item libzpaq.cpp Source code file to link to your application. =back An application would have the line C<#include "libzpaq.h"> and link to libzpaq.cpp. The API provides two classes, C and C which write or read respectively each of the syntactic elements of a ZPAQ stream. The two functions C and C provide simple interfaces for the most common uses. In either case, the user must create classes derived from the abstract base classes C and C and define methods C and C which the code will use to read and write bytes. The user must also define a callback error handler. By default, libzpaq(3) uses just-in-time (JIT) acceleration by translating ZPAQL code to x86-32 or x86-64 internally and executing it. This feature can be disabled by compiling with -DNOJIT. If enabled, it requires an x86 processor capable of executing SSE2 instructions. SSE2 is supported by most Intel processors since 2001 and AMD since 2003. Run time checks (assertions) can be enabled with -DDEBUG for debugging purposes. All of the API code is contained in the namespace C. =head2 Callback Functions The following three functions must be defined by the user. =over =item C This function must be defined by the user to handle errors from libzpaq. The library will call the function with an English language message passed to C. Errors may result from bad input during decompression, out of memory, or illegal arguments or calling sequences to libzpaq functions. Errors should be considered unrecoverable. =item C The user must create a class derived from Reader with an implementation for C that reads one byte of input and returns its value in the range 0...255, or returns EOF (-1) at end of input. Objects of the derived type would then be passed to functions that require a C. =item C The user must create a class derived from Writer with an implemenation of C which is expected to take a byte value C in the range 0...255 and write it to output. Objects of the derived type would then be passed to functions that require a C. =back The following two functions are optional. Defining them can improve performance slightly. =over =item C If defined, this function should input up to C bytes into the array C and return the number actually read, in the range 0..n. A return value of 0 indicates end of input. If C is not defined, then the default implementation will call C n times. =item C If defined, this function should output the elements C through C in order. If not defined, then the default implementation will call C n times. =back =head2 Simple Compression In the remainder of this document, all classes and functions are assumed to be in namespace C. =over =item C C compresses from C to C until C returns EOF. It writes a single segment in a single block with empty filename, comment, and checksum fields. C must be 1, 2, or 3, to select models I, I, or I respectively. Higher modes compress smaller but take longer to compress and subsequently decompress. =item C C decompresses any valid ZPAQ stream from C to C until C returns EOF. Any non-ZPAQ data in the input is ignored. Any ZPAQ blocks following non-ZPAQ must be preceded by a marker tag to be recognized. Each block is decoded according to the instructions in the block header. The contents of the filename, comment, and checksum fields are ignored. Data with bad checksums will be decoded anyway. If there is more than one segment, then all of the output data will be concatenated. =back =head2 class SHA1 The SHA1 class is used to compute SHA-1 checksums for compression and verify them for decompression. It is believed to be computationally infeasible to find two different strings with the same hash value. Its member functions are as follows: =over =item C The constructor creates a new SHA1 object representing the hash of an empty string. =item C Appends one byte c (0...255) to the string whose hash is represented. =item C Returns the length (so far) of the string whose hash is represented. The largest possible value returned is 2^61 - 1 = 2305843009213693951.0, but values larger than 2^53 = 9007199254740992.0 will not be exact on systems using IEEE 64 bit floating point representation of type C. The initial value is 0.0. =item C Returns the length (so far) as a 64 bit unsigned integer. =item C Computes the 20 byte SHA-1 hash and resets the string back to a size of 0.0. The returned pointer points to an array inside the SHA1 object whose contents remain unchanged until the next call to C. =back =head2 class Compressor The C class has member functions to write each of the syntactic elements of a ZPAQ stream and to specify their values. It will compress using either built-in or user supplied models. =over =item C The constructor creates a Compression object. No input source, output destination, or compression model is specified. =item C Specifies a destination for output. Must be specified before calling any function that writes data. =item C Writes a 13 byte marker tag which can be used to identify the start of a block following non-ZPAQ data. =item C Writes a block header and specifies a compression model. If linked with F, then C must be 1, 2, or 3 to specify I, I, or I respectively. Higher numbers compress smaller but more slowly. These models are compatible with both the ZPAQ level 1 and 2 standards. =item C Writes a block header and specifies the HCOMP portion of the compression model. The first two bytes of the string should encode the length of the rest of the string as a 16 bit unsigned number with the least significant bit first. The meaning of the rest of the string is defined in the ZPAQ level 2 standard. If the number of components (C) is 0, then the block is saved in ZPAQ level 2 format, which cannot be read by older ZPAQ level 1 decoders. Otherwise the block is saved in ZPAQ level 1 format, which is compatible with all decoders. =item C Writes a segment header. C and C are NUL terminated strings. If specified, then their values are stored. Normally, C would be a file name when compressing to an archive or omitted otherwise. If a file is split among segments, then by convention only the first segment is named. C is normally the uncompressed size as a decimal number which is displayed when listing the contents of an archive. Omitting it does not affect decompression. =item C Specifies the optional PCOMP string used for post-processing. It must be called from within the first segment of each block prior to compressing any data, but not from within any other segment. If C is 0 or no argument is passed, then the decompresser will not post-process the data. The effect is to compress a 0 byte to indicate to the decompresser that no PCOMP string is present. If C is not 0, then I bytes of the string I are passed. If I is 0 or omitted, then the first two bytes must encode the length of the rest of the string as a 16 bit unsigned number with the least significant byte first. The format of the remainder of the string is described in the ZPAQ level 2 standard. The effect is to compress a 1 byte to indicate the presence of PCOMP, followed by the two length bytes and the string as passed. For example, either C or C would compress the 5 bytes 1, 2, 0, 5, 8. The user is responsible for pre-processing the input prior to compression so that PCOMP restores the original data. =item C Specifies the input source for compression. It must be set prior to the first call to C. =item C Compress n bytes of data, or until EOF is input, whichever comes first. If n < 0 or omitted, then compress until EOF. Returns true if there is more input available, or false if EOF was read. =item C Stop compressing and write the end of a segment. If C is specified, it should be a 20 byte string as returned by C on the input data for this segment I pre-processing. =item C Finish writing the current block. =back In order to create a valid ZPAQ stream, the components must be written in the following order: for each block do { if any non-ZPAQ data then { write non-ZPAQ data writeTag() } startBlock() for each segment do { startSegment() if first segment in block then { postProcess() } while (compress(n)) ; endSegment() } endBlock() } =head2 class Decompresser The class Decompresser has member functions to read each of the syntactic elements of a ZPAQ stream. =over =item C The constructor creates a Decompresser object. No input source or output destination is specified. =item C Specifies where the ZPAQ stream will be read from. Must be called before any function that reads the stream. =item C Scan the input to find the start of the next block. If a block does not start immediately, then the block must be preceded by a marker tag (written with C) or it will not be found. If C is not 0, then write the approximate memory requirement (in bytes) to decompress to C<*memptr>). The memory will be allocated by the first call to C. It returns true if a block is found, or false if it reads to EOF without finding a block. =item C Write the HCOMP string of the current block to C. It will be in a format suitable for passing to C. The first 2 bytes will encode the length of the rest of the string as a 16 bit unsigned integer with the least significant byte first. The format of the remainder of the string is described in the ZPAQ level 1 specification. =item C Find the start of the next segment. If another segment is found within the current block then return true. If the end of the block is found first, then return false. If a segment is found, the filename field is not empty, and C is not 0, then write the filename (without a terminating NUL byte) to C. =item C Read or skip past the comment field following the filename field in the segment header. If C is not 0 and the comment field is not empty, then write the comment (without a terminating NUL byte) to C. =item C Specify the destination for decompression. It must be set before any data can be decompressed. =item C Specify the address of a SHA1 object for computing the checksum of the decompressed data (after post-processing). As each byte C is output, it is also passed to Cput(c)>. In order to compute the correct checksum, the SHA1 object should be in its initial state, either newly created, or by calling C, before the first call to C. When the end of the segment is reached, the value returned by Cresult()> should match the stored checksum, if any. =item C Decode n bytes or until the end of segment, whichever comes first. Return false if the end of segment is reached first. If n < 0 or not specified, then decompress to the end of segment and return false. C is the number of bytes prior to post-processing. If the data is post-processed, then the size of the output may be different. =item C Write the PCOMP string, if any, for the current block to C. If there is no PCOMP string (no post-processor) then return false. Otherwise write the string to C in a format suitable for passing to C and return true. If written, then the first 2 bytes will encode the length of the rest of the string as a 16 bit unsigned integer with the least significant bit first. The format of the rest of the string is descibed in the ZPAQ level 1 standard. C is only valid after the first call to C in the current block. To read the PCOMP string without decompressing any data, then call C first. It is not necessary to call C in this case. =item C Skip any compressed data in the current segment that has not yet been decompressed and advance to the end of the segment. Then if C is not 0 then write into the 21 byte array that it points to. If a checksum is present, then write a 1 into C and write the stored checksum in C. Otherwise write a 0 in C. Note that it is not permitted to call decompress() if any compressed data has been skipped in any earlier segments in the same block. =back A valid sequence of calls is as follows: while (findBlock()) { while (findFilename()) { readComment(); if first segment in block then { (optional) decompress(0) pcomp() } while (decompress(n)) ; (optional) readSegmentEnd(); } } =head1 EXAMPLES The following program F lists the contents of a ZPAQ archive read from standard input. #include #include #include "libzpaq.h" // Implement Reader and Writer interfaces for file I/O class File: public libzpaq::Reader, public libzpaq::Writer { FILE* f; public: File(FILE* f_): f(f_) {} int get() {return getc(f);} void put(int c) {putc(c, f);} int read(char* buf, int n) {return fread(buf, 1, n, f);} void write(const char* buf, int n) {fwrite(buf, 1, n, f);} }; // Implement error handler namespace libzpaq { void error(const char* msg) { fprintf(stderr, "Error: %s\n", msg); exit(1); } } // List the contents of an archive. For each block, show // the memory required to decompress. For each segment, // show the filename and comment. void list(FILE* input, FILE* output) { libzpaq::Decompresser d; File in(input), out(output); double memory; d.setInput(&in); for (int block=1; d.findBlock(&memory); ++block) { printf("Block %d needs %1.0f MB\n", block, memory/1e6); while (d.findFilename(&out)) { // print filename printf("\t"); d.readComment(&out); // print comment printf("\n"); d.readSegmentEnd(); // skip compressed data } } } int main() { list(stdin, stdout); return 0; } The program could be compiled as follows: g++ listzpaq.cpp libzpaq.cpp The following code compresses a list of files into one block written to stdout. Each file is compressed to a separate segment. For each segment, the filename, comment, and SHA-1 checksum are stored. The comment, as conventional, is the file size as a decimal string. // Compress one file to one segment void compress_file(libzpaq::Compressor& c, const char* filename, bool first_segment) { // Open input file FILE* f; f=fopen(filename, "rb"); if (!f) return; // Compute SHA-1 checksum and file size libzpaq::SHA1 sha1; int ch; while ((ch=getc(f))!=EOF) sha1.put(ch); // Write file size as a comment. // The size can have at most 19 digits. char comment[20]; sprintf(comment, "%1.0f", sha1.size()); // Compress segment rewind(f); File in(f); c.startSegment(filename, comment); if (first_segment) c.postProcess(); c.setInput(&in); c.compress(); c.endSegment(sha1.result()); // Close input file fclose(f); } // Compress a list of argc files in argv[0...argc-1] into one // ZPAQ block to stdout at level 2. void compress_list(int argc, char** argv) { libzpaq::Compressor c; File out(stdout); c.setOutput(&out); c.startBlock(2); for (int i=0; i and C can be passed an argument n to display progress every n bytes, for example: for (int i=1; d.decompress(1000000); ++i) fprintf(stderr, "Decompressed %d MB\n", i); To compress or decompress to and from objects in memory, derive appropriate classes from C and C. For example, it is possible to compress or decompress to a C using the following class. struct String: public libzpaq::Writer { std::string s; void put(int c) {s+=char(c);} }; This class is also useful for reading the filename and comment fields during decompression as follows: String filename, comment; while (d.findFilename(&filename)) { d.readComment(&comment); // ... =head1 AVAILABILITY I, I, and the ZPAQ level 1 and 2 specifications are available from L. =head1 SEE ALSO C C =cut lrzip-0.651/libzpaq/libzpaq.cpp000066400000000000000000003362431421175057200165140ustar00rootroot00000000000000/* libzpaq.cpp - Part of LIBZPAQ Version 5.01 Copyright (C) 2011, Dell Inc. Written by Matt Mahoney. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so without restriction. This Software is provided "as is" without warranty. LIBZPAQ is a C++ library for compression and decompression of data conforming to the ZPAQ level 2 standard. See http://mattmahoney.net/zpaq/ */ #include "libzpaq.h" #include #include #include #include #ifndef NOJIT #ifndef _WIN32 #include #else #include #endif #endif namespace libzpaq { // Standard library redirections void* calloc(size_t a, size_t b) {return ::calloc(a, b);} void free(void* p) {::free(p);} int memcmp(const void* d, const void* s, size_t n) { return ::memcmp(d, s, n);} void* memset(void* d, int c, size_t n) {return ::memset(d, c, n);} double log(double x) {return ::log(x);} double exp(double x) {return ::exp(x);} double pow(double x, double y) {return ::pow(x, y);} // Read 16 bit little-endian number int toU16(const char* p) { return (p[0]&255)+256*(p[1]&255); } // Default read() and write() int Reader::read(char* buf, int n) { int i=0, c; while (i=0) buf[i++]=c; return i; } void Writer::write(const char* buf, int n) { for (int i=0; i 0 bytes of executable memory and update // p to point to it and newsize = n. Free any previously // allocated memory first. If newsize is 0 then free only. // Call error in case of failure. If NOJIT, ignore newsize // and set p=0, n=0 without allocating memory. void allocx(U8* &p, int &n, int newsize) { #ifdef NOJIT p=0; n=0; #else if (p || n) { if (p) #ifndef _WIN32 munmap(p, n); #else // Windows VirtualFree(p, 0, MEM_RELEASE); #endif p=0; n=0; } if (newsize>0) { #ifndef _WIN32 p=(U8*)mmap(0, newsize, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANON, -1, 0); if ((void*)p==MAP_FAILED) p=0; #else p=(U8*)VirtualAlloc(0, newsize, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE); #endif if (p) n=newsize; else { n=0; error("allocx failed"); } } #endif } //////////////////////////// SHA1 //////////////////////////// // SHA1 code, see http://en.wikipedia.org/wiki/SHA-1 // Start a new hash void SHA1::init() { len0=len1=0; h[0]=0x67452301; h[1]=0xEFCDAB89; h[2]=0x98BADCFE; h[3]=0x10325476; h[4]=0xC3D2E1F0; } // Return old result and start a new hash const char* SHA1::result() { // pad and append length const U32 s1=len1, s0=len0; put(0x80); while ((len0&511)!=448) put(0); put(s1>>24); put(s1>>16); put(s1>>8); put(s1); put(s0>>24); put(s0>>16); put(s0>>8); put(s0); // copy h to hbuf for (int i=0; i<5; ++i) { hbuf[4*i]=h[i]>>24; hbuf[4*i+1]=h[i]>>16; hbuf[4*i+2]=h[i]>>8; hbuf[4*i+3]=h[i]; } // return hash prior to clearing state init(); return hbuf; } // Hash 1 block of 64 bytes void SHA1::process() { for (int i=16; i<80; ++i) { w[i]=w[i-3]^w[i-8]^w[i-14]^w[i-16]; w[i]=w[i]<<1|w[i]>>31; } U32 a=h[0]; U32 b=h[1]; U32 c=h[2]; U32 d=h[3]; U32 e=h[4]; const U32 k1=0x5A827999, k2=0x6ED9EBA1, k3=0x8F1BBCDC, k4=0xCA62C1D6; #define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+((b&c)|(~b&d))+k1+w[i]; b=b<<30|b>>2; #define f5(i) f1(a,b,c,d,e,i) f1(e,a,b,c,d,i+1) f1(d,e,a,b,c,i+2) \ f1(c,d,e,a,b,i+3) f1(b,c,d,e,a,i+4) f5(0) f5(5) f5(10) f5(15) #undef f1 #define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+(b^c^d)+k2+w[i]; b=b<<30|b>>2; f5(20) f5(25) f5(30) f5(35) #undef f1 #define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+((b&c)|(b&d)|(c&d))+k3+w[i]; b=b<<30|b>>2; f5(40) f5(45) f5(50) f5(55) #undef f1 #define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+(b^c^d)+k4+w[i]; b=b<<30|b>>2; f5(60) f5(65) f5(70) f5(75) #undef f1 #undef f5 h[0]+=a; h[1]+=b; h[2]+=c; h[3]+=d; h[4]+=e; } //////////////////////////// Component /////////////////////// // A Component is a context model, indirect context model, match model, // fixed weight mixer, adaptive 2 input mixer without or with current // partial byte as context, adaptive m input mixer (without or with), // or SSE (without or with). const int compsize[256]={0,2,3,2,3,4,6,6,3,5}; void Component::init() { limit=cxt=a=b=c=0; cm.resize(0); ht.resize(0); a16.resize(0); } ////////////////////////// StateTable ////////////////////////// // How many states with count of n0 zeros, n1 ones (0...2) int StateTable::num_states(int n0, int n1) { const int B=6; const int bound[B]={20,48,15,8,6,5}; // n0 -> max n1, n1 -> max n0 if (n0=B || n0>bound[n1]) return 0; return 1+(n1>0 && n0+n1<=17); } // New value of count n0 if 1 is observed (and vice versa) void StateTable::discount(int& n0) { n0=(n0>=1)+(n0>=2)+(n0>=3)+(n0>=4)+(n0>=5)+(n0>=7)+(n0>=8); } // compute next n0,n1 (0 to N) given input y (0 or 1) void StateTable::next_state(int& n0, int& n1, int y) { if (n0 20,0 // 48,1,0 -> 48,1 // 15,2,0 -> 8,1 // 8,3,0 -> 6,2 // 8,3,1 -> 5,3 // 6,4,0 -> 5,3 // 5,5,0 -> 5,4 // 5,5,1 -> 4,5 while (!num_states(n0, n1)) { if (n1<2) --n0; else { n0=(n0*(n1-1)+(n1/2))/n1; --n1; } } } } // Initialize next state table ns[state*4] -> next if 0, next if 1, n0, n1 StateTable::StateTable() { // Assign states by increasing priority const int N=50; U8 t[N][N][2]={{{0}}}; // (n0,n1,y) -> state number int state=0; for (int i=0; i=0 && n<=2); if (n) { t[n0][n1][0]=state; t[n0][n1][1]=state+n-1; state+=n; } } } // Generate next state table memset(ns, 0, sizeof(ns)); for (int n0=0; n0=0 && s<256); int s0=n0, s1=n1; next_state(s0, s1, 0); assert(s0>=0 && s0=0 && s1=0 && s0=0 && s1=7); assert(hbegin>=cend); assert(hend>=hbegin); assert(out2); if (!pp) { // if not a postprocessor then write COMP for (int i=0; iput(header[i]); } else { // write PCOMP size only out2->put((hend-hbegin)&255); out2->put((hend-hbegin)>>8); } for (int i=hbegin; iput(header[i]); return true; } // Read header from in2 int ZPAQL::read(Reader* in2) { // Get header size and allocate int hsize=in2->get(); hsize+=in2->get()*256; header.resize(hsize+300); cend=hbegin=hend=0; header[cend++]=hsize&255; header[cend++]=hsize>>8; while (cend<7) header[cend++]=in2->get(); // hh hm ph pm n // Read COMP int n=header[cend-1]; for (int i=0; iget(); // component type if (type==-1) error("unexpected end of file"); header[cend++]=type; // component type int size=compsize[type]; if (size<1) error("Invalid component type"); if (cend+size>header.isize()-8) error("COMP list too big"); for (int j=1; jget(); } if ((header[cend++]=in2->get())!=0) error("missing COMP END"); // Insert a guard gap and read HCOMP hbegin=hend=cend+128; while (hendget(); if (op==-1) error("unexpected end of file"); header[hend++]=op; } if ((header[hend++]=in2->get())!=0) error("missing HCOMP END"); assert(cend>=7 && cendhbegin && hend6); assert(output==0); assert(sha1==0); init(header[2], header[3]); // hh, hm } // Initialize machine state as PCOMP void ZPAQL::initp() { assert(header.isize()>6); init(header[4], header[5]); // ph, pm } // Flush pending output void ZPAQL::flush() { if (output) output->write(&outbuf[0], bufptr); if (sha1) for (int i=0; iput(U8(outbuf[i])); bufptr=0; } // Return memory requirement in bytes double ZPAQL::memory() { double mem=pow(2.0,header[2]+2)+pow(2.0,header[3]) // hh hm +pow(2.0,header[4]+2)+pow(2.0,header[5]) // ph pm +header.size(); int cp=7; // start of comp list for (int i=0; i0); assert(cend>=7); assert(hbegin>=cend+128); assert(hend>=hbegin); assert(hend0); h.resize(1, hbits); m.resize(1, mbits); r.resize(256); a=b=c=d=pc=f=0; } // Run program on input by interpreting header void ZPAQL::run0(U32 input) { assert(cend>6); assert(hbegin>=cend+128); assert(hend>=hbegin); assert(hend0); assert(h.size()>0); assert(header[0]+256*header[1]==cend+hend-hbegin-2); pc=hbegin; a=input; while (execute()) ; } // Execute one instruction, return 0 after HALT else 1 int ZPAQL::execute() { switch(header[pc++]) { case 0: err(); break; // ERROR case 1: ++a; break; // A++ case 2: --a; break; // A-- case 3: a = ~a; break; // A! case 4: a = 0; break; // A=0 case 7: a = r[header[pc++]]; break; // A=R N case 8: swap(b); break; // B<>A case 9: ++b; break; // B++ case 10: --b; break; // B-- case 11: b = ~b; break; // B! case 12: b = 0; break; // B=0 case 15: b = r[header[pc++]]; break; // B=R N case 16: swap(c); break; // C<>A case 17: ++c; break; // C++ case 18: --c; break; // C-- case 19: c = ~c; break; // C! case 20: c = 0; break; // C=0 case 23: c = r[header[pc++]]; break; // C=R N case 24: swap(d); break; // D<>A case 25: ++d; break; // D++ case 26: --d; break; // D-- case 27: d = ~d; break; // D! case 28: d = 0; break; // D=0 case 31: d = r[header[pc++]]; break; // D=R N case 32: swap(m(b)); break; // *B<>A case 33: ++m(b); break; // *B++ case 34: --m(b); break; // *B-- case 35: m(b) = ~m(b); break; // *B! case 36: m(b) = 0; break; // *B=0 case 39: if (f) pc+=((header[pc]+128)&255)-127; else ++pc; break; // JT N case 40: swap(m(c)); break; // *C<>A case 41: ++m(c); break; // *C++ case 42: --m(c); break; // *C-- case 43: m(c) = ~m(c); break; // *C! case 44: m(c) = 0; break; // *C=0 case 47: if (!f) pc+=((header[pc]+128)&255)-127; else ++pc; break; // JF N case 48: swap(h(d)); break; // *D<>A case 49: ++h(d); break; // *D++ case 50: --h(d); break; // *D-- case 51: h(d) = ~h(d); break; // *D! case 52: h(d) = 0; break; // *D=0 case 55: r[header[pc++]] = a; break; // R=A N case 56: return 0 ; // HALT case 57: outc(a&255); break; // OUT case 59: a = (a+m(b)+512)*773; break; // HASH case 60: h(d) = (h(d)+a+512)*773; break; // HASHD case 63: pc+=((header[pc]+128)&255)-127; break; // JMP N case 64: break; // A=A case 65: a = b; break; // A=B case 66: a = c; break; // A=C case 67: a = d; break; // A=D case 68: a = m(b); break; // A=*B case 69: a = m(c); break; // A=*C case 70: a = h(d); break; // A=*D case 71: a = header[pc++]; break; // A= N case 72: b = a; break; // B=A case 73: break; // B=B case 74: b = c; break; // B=C case 75: b = d; break; // B=D case 76: b = m(b); break; // B=*B case 77: b = m(c); break; // B=*C case 78: b = h(d); break; // B=*D case 79: b = header[pc++]; break; // B= N case 80: c = a; break; // C=A case 81: c = b; break; // C=B case 82: break; // C=C case 83: c = d; break; // C=D case 84: c = m(b); break; // C=*B case 85: c = m(c); break; // C=*C case 86: c = h(d); break; // C=*D case 87: c = header[pc++]; break; // C= N case 88: d = a; break; // D=A case 89: d = b; break; // D=B case 90: d = c; break; // D=C case 91: break; // D=D case 92: d = m(b); break; // D=*B case 93: d = m(c); break; // D=*C case 94: d = h(d); break; // D=*D case 95: d = header[pc++]; break; // D= N case 96: m(b) = a; break; // *B=A case 97: m(b) = b; break; // *B=B case 98: m(b) = c; break; // *B=C case 99: m(b) = d; break; // *B=D case 100: m(b) = m(b); break; // *B=*B case 101: m(b) = m(c); break; // *B=*C case 102: m(b) = h(d); break; // *B=*D case 103: m(b) = header[pc++]; break; // *B= N case 104: m(c) = a; break; // *C=A case 105: m(c) = b; break; // *C=B case 106: m(c) = c; break; // *C=C case 107: m(c) = d; break; // *C=D case 108: m(c) = m(b); break; // *C=*B case 109: m(c) = m(c); break; // *C=*C case 110: m(c) = h(d); break; // *C=*D case 111: m(c) = header[pc++]; break; // *C= N case 112: h(d) = a; break; // *D=A case 113: h(d) = b; break; // *D=B case 114: h(d) = c; break; // *D=C case 115: h(d) = d; break; // *D=D case 116: h(d) = m(b); break; // *D=*B case 117: h(d) = m(c); break; // *D=*C case 118: h(d) = h(d); break; // *D=*D case 119: h(d) = header[pc++]; break; // *D= N case 128: a += a; break; // A+=A case 129: a += b; break; // A+=B case 130: a += c; break; // A+=C case 131: a += d; break; // A+=D case 132: a += m(b); break; // A+=*B case 133: a += m(c); break; // A+=*C case 134: a += h(d); break; // A+=*D case 135: a += header[pc++]; break; // A+= N case 136: a -= a; break; // A-=A case 137: a -= b; break; // A-=B case 138: a -= c; break; // A-=C case 139: a -= d; break; // A-=D case 140: a -= m(b); break; // A-=*B case 141: a -= m(c); break; // A-=*C case 142: a -= h(d); break; // A-=*D case 143: a -= header[pc++]; break; // A-= N case 144: a *= a; break; // A*=A case 145: a *= b; break; // A*=B case 146: a *= c; break; // A*=C case 147: a *= d; break; // A*=D case 148: a *= m(b); break; // A*=*B case 149: a *= m(c); break; // A*=*C case 150: a *= h(d); break; // A*=*D case 151: a *= header[pc++]; break; // A*= N case 152: div(a); break; // A/=A case 153: div(b); break; // A/=B case 154: div(c); break; // A/=C case 155: div(d); break; // A/=D case 156: div(m(b)); break; // A/=*B case 157: div(m(c)); break; // A/=*C case 158: div(h(d)); break; // A/=*D case 159: div(header[pc++]); break; // A/= N case 160: mod(a); break; // A%=A case 161: mod(b); break; // A%=B case 162: mod(c); break; // A%=C case 163: mod(d); break; // A%=D case 164: mod(m(b)); break; // A%=*B case 165: mod(m(c)); break; // A%=*C case 166: mod(h(d)); break; // A%=*D case 167: mod(header[pc++]); break; // A%= N case 168: a &= a; break; // A&=A case 169: a &= b; break; // A&=B case 170: a &= c; break; // A&=C case 171: a &= d; break; // A&=D case 172: a &= m(b); break; // A&=*B case 173: a &= m(c); break; // A&=*C case 174: a &= h(d); break; // A&=*D case 175: a &= header[pc++]; break; // A&= N case 176: a &= ~ a; break; // A&~A case 177: a &= ~ b; break; // A&~B case 178: a &= ~ c; break; // A&~C case 179: a &= ~ d; break; // A&~D case 180: a &= ~ m(b); break; // A&~*B case 181: a &= ~ m(c); break; // A&~*C case 182: a &= ~ h(d); break; // A&~*D case 183: a &= ~ header[pc++]; break; // A&~ N case 184: a |= a; break; // A|=A case 185: a |= b; break; // A|=B case 186: a |= c; break; // A|=C case 187: a |= d; break; // A|=D case 188: a |= m(b); break; // A|=*B case 189: a |= m(c); break; // A|=*C case 190: a |= h(d); break; // A|=*D case 191: a |= header[pc++]; break; // A|= N case 192: a ^= a; break; // A^=A case 193: a ^= b; break; // A^=B case 194: a ^= c; break; // A^=C case 195: a ^= d; break; // A^=D case 196: a ^= m(b); break; // A^=*B case 197: a ^= m(c); break; // A^=*C case 198: a ^= h(d); break; // A^=*D case 199: a ^= header[pc++]; break; // A^= N case 200: a <<= (a&31); break; // A<<=A case 201: a <<= (b&31); break; // A<<=B case 202: a <<= (c&31); break; // A<<=C case 203: a <<= (d&31); break; // A<<=D case 204: a <<= (m(b)&31); break; // A<<=*B case 205: a <<= (m(c)&31); break; // A<<=*C case 206: a <<= (h(d)&31); break; // A<<=*D case 207: a <<= (header[pc++]&31); break; // A<<= N case 208: a >>= (a&31); break; // A>>=A case 209: a >>= (b&31); break; // A>>=B case 210: a >>= (c&31); break; // A>>=C case 211: a >>= (d&31); break; // A>>=D case 212: a >>= (m(b)&31); break; // A>>=*B case 213: a >>= (m(c)&31); break; // A>>=*C case 214: a >>= (h(d)&31); break; // A>>=*D case 215: a >>= (header[pc++]&31); break; // A>>= N case 216: f = (true); break; // A==A case 217: f = (a == b); break; // A==B case 218: f = (a == c); break; // A==C case 219: f = (a == d); break; // A==D case 220: f = (a == U32(m(b))); break; // A==*B case 221: f = (a == U32(m(c))); break; // A==*C case 222: f = (a == h(d)); break; // A==*D case 223: f = (a == U32(header[pc++])); break; // A== N case 224: f = (false); break; // AA case 233: f = (a > b); break; // A>B case 234: f = (a > c); break; // A>C case 235: f = (a > d); break; // A>D case 236: f = (a > U32(m(b))); break; // A>*B case 237: f = (a > U32(m(c))); break; // A>*C case 238: f = (a > h(d)); break; // A>*D case 239: f = (a > U32(header[pc++])); break; // A> N case 255: if((pc=hbegin+header[pc]+256*header[pc+1])>=hend)err();break;//LJ default: err(); } return 1; } // Print illegal instruction error message and exit void ZPAQL::err() { error("ZPAQL execution error"); } ///////////////////////// Predictor ///////////////////////// // Initailize model-independent tables Predictor::Predictor(ZPAQL& zr): c8(1), hmap4(1), z(zr) { assert(sizeof(U8)==1); assert(sizeof(U16)==2); assert(sizeof(U32)==4); assert(sizeof(U64)==8); assert(sizeof(short)==2); assert(sizeof(int)==4); // Initialize tables dt2k[0]=0; for (int i=1; i<256; ++i) dt2k[i]=2048/i; for (int i=0; i<1024; ++i) dt[i]=(1<<17)/(i*2+3)*2; for (int i=0; i<32768; ++i) stretcht[i]=int(log((i+0.5)/(32767.5-i))*64+0.5+100000)-100000; for (int i=0; i<4096; ++i) squasht[i]=int(32768.0/(1+exp((i-2048)*(-1.0/64)))); // Verify floating point math for squash() and stretch() U32 sqsum=0, stsum=0; for (int i=32767; i>=0; --i) stsum=stsum*3+stretch(i); for (int i=4095; i>=0; --i) sqsum=sqsum*3+squash(i-2048); assert(stsum==3887533746u); assert(sqsum==2278286169u); pcode=0; pcode_size=0; } Predictor::~Predictor() { allocx(pcode, pcode_size, 0); // free executable memory } // Initialize the predictor with a new model in z void Predictor::init() { // Clear old JIT code if any allocx(pcode, pcode_size, 0); // Initialize context hash function z.inith(); // Initialize predictions for (int i=0; i<256; ++i) h[i]=p[i]=0; // Initialize components for (int i=0; i<256; ++i) // clear old model comp[i].init(); int n=z.header[6]; // hsize[0..1] hh hm ph pm n (comp)[n] END 0[128] (hcomp) END const U8* cp=&z.header[7]; // start of component list for (int i=0; i&z.header[0] && cp<&z.header[z.header.isize()-8]); Component& cr=comp[i]; switch(cp[0]) { case CONS: // c p[i]=(cp[1]-128)*4; break; case CM: // sizebits limit if (cp[1]>32) error("max size for CM is 32"); cr.cm.resize(1, cp[1]); // packed CM (22 bits) + CMCOUNT (10 bits) cr.limit=cp[2]*4; for (size_t j=0; j26) error("max size for ICM is 26"); cr.limit=1023; cr.cm.resize(256); cr.ht.resize(64, cp[1]); for (size_t j=0; j32 || cp[2]>32) error("max size for MATCH is 32 32"); cr.cm.resize(1, cp[1]); // index cr.ht.resize(1, cp[2]); // buf cr.ht(0)=1; break; case AVG: // j k wt if (cp[1]>=i) error("AVG j >= i"); if (cp[2]>=i) error("AVG k >= i"); break; case MIX2: // sizebits j k rate mask if (cp[1]>32) error("max size for MIX2 is 32"); if (cp[3]>=i) error("MIX2 k >= i"); if (cp[2]>=i) error("MIX2 j >= i"); cr.c=(size_t(1)<32) error("max size for MIX is 32"); if (cp[2]>=i) error("MIX j >= i"); if (cp[3]<1 || cp[3]>i-cp[2]) error("MIX m not in 1..i-j"); int m=cp[3]; // number of inputs assert(m>=1); cr.c=(size_t(1)<32) error("max size for ISSE is 32"); if (cp[2]>=i) error("ISSE j >= i"); cr.ht.resize(64, cp[1]); cr.cm.resize(512); for (int j=0; j<256; ++j) { cr.cm[j*2]=1<<15; cr.cm[j*2+1]=clamp512k(stretch(st.cminit(j)>>8)<<10); } break; case SSE: // sizebits j start limit if (cp[1]>32) error("max size for SSE is 32"); if (cp[2]>=i) error("SSE j >= i"); if (cp[3]>cp[4]*4) error("SSE start > limit*4"); cr.cm.resize(32, cp[1]); cr.limit=cp[4]*4; for (size_t j=0; j0); cp+=compsize[*cp]; assert(cp>=&z.header[7] && cp<&z.header[z.cend]); } } // Return next bit prediction using interpreted COMP code int Predictor::predict0() { assert(c8>=1 && c8<=255); // Predict next bit int n=z.header[6]; assert(n>0 && n<=255); const U8* cp=&z.header[7]; assert(cp[-1]==n); for (int i=0; i&z.header[0] && cp<&z.header[z.header.isize()-8]); Component& cr=comp[i]; switch(cp[0]) { case CONS: // c break; case CM: // sizebits limit cr.cxt=h[i]^hmap4; p[i]=stretch(cr.cm(cr.cxt)>>17); break; case ICM: // sizebits assert((hmap4&15)>0); if (c8==1 || (c8&0xf0)==16) cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); cr.cxt=cr.ht[cr.c+(hmap4&15)]; p[i]=stretch(cr.cm(cr.cxt)>>8); break; case MATCH: // sizebits bufbits: a=len, b=offset, c=bit, cxt=bitpos, // ht=buf, limit=pos assert(cr.cm.size()==(size_t(1)<>(7-cr.cxt))&1; // predicted bit p[i]=stretch(dt2k[cr.a]*(cr.c*-2+1)&32767); } break; case AVG: // j k wt p[i]=(p[cp[1]]*cp[3]+p[cp[2]]*(256-cp[3]))>>8; break; case MIX2: { // sizebits j k rate mask // c=size cm=wt[size] cxt=input cr.cxt=((h[i]+(c8&cp[5]))&(cr.c-1)); assert(cr.cxt=0 && w<65536); p[i]=(w*p[cp[2]]+(65536-w)*p[cp[3]])>>16; assert(p[i]>=-2048 && p[i]<2048); } break; case MIX: { // sizebits j m rate mask // c=size cm=wt[size][m] cxt=index of wt in cm int m=cp[3]; assert(m>=1 && m<=i); cr.cxt=h[i]+(c8&cp[5]); cr.cxt=(cr.cxt&(cr.c-1))*m; // pointer to row of weights assert(cr.cxt<=cr.cm.size()-m); int* wt=(int*)&cr.cm[cr.cxt]; p[i]=0; for (int j=0; j>8)*p[cp[2]+j]; p[i]=clamp2k(p[i]>>8); } break; case ISSE: { // sizebits j -- c=hi, cxt=bh assert((hmap4&15)>0); if (c8==1 || (c8&0xf0)==16) cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); cr.cxt=cr.ht[cr.c+(hmap4&15)]; // bit history int *wt=(int*)&cr.cm[cr.cxt*2]; p[i]=clamp2k((wt[0]*p[cp[2]]+wt[1]*64)>>16); } break; case SSE: { // sizebits j start limit cr.cxt=(h[i]+c8)*32; int pq=p[cp[2]]+992; if (pq<0) pq=0; if (pq>1983) pq=1983; int wt=pq&63; pq>>=6; assert(pq>=0 && pq<=30); cr.cxt+=pq; p[i]=stretch(((cr.cm(cr.cxt)>>10)*(64-wt)+(cr.cm(cr.cxt+1)>>10)*wt)>>13); cr.cxt+=wt>>5; } break; default: error("component predict not implemented"); } cp+=compsize[cp[0]]; assert(cp<&z.header[z.cend]); assert(p[i]>=-2048 && p[i]<2048); } assert(cp[0]==NONE); return squash(p[n-1]); } // Update model with decoded bit y (0...1) void Predictor::update0(int y) { assert(y==0 || y==1); assert(c8>=1 && c8<=255); assert(hmap4>=1 && hmap4<=511); // Update components const U8* cp=&z.header[7]; int n=z.header[6]; assert(n>=1 && n<=255); assert(cp[-1]==n); for (int i=0; i>8))>>2; } break; case MATCH: // sizebits bufbits: // a=len, b=offset, c=bit, cm=index, cxt=bitpos // ht=buf, limit=pos { assert(cr.a<=255); assert(cr.c==0 || cr.c==1); assert(cr.cxt<8); assert(cr.cm.size()==(size_t(1)<>5; int w=cr.a16[cr.cxt]; w+=(err*(p[cp[2]]-p[cp[3]])+(1<<12))>>13; if (w<0) w=0; if (w>65535) w=65535; cr.a16[cr.cxt]=w; } break; case MIX: { // sizebits j m rate mask // cm=wt[size][m], cxt=input int m=cp[3]; assert(m>0 && m<=i); assert(cr.cm.size()==m*cr.c); assert(cr.cxt+m<=cr.cm.size()); int err=(y*32767-squash(p[i]))*cp[4]>>4; int* wt=(int*)&cr.cm[cr.cxt]; for (int j=0; j>13)); } break; case ISSE: { // sizebits j -- c=hi, cxt=bh assert(cr.cxt==cr.ht[cr.c+(hmap4&15)]); int err=y*32767-squash(p[i]); int *wt=(int*)&cr.cm[cr.cxt*2]; wt[0]=clamp512k(wt[0]+((err*p[cp[2]]+(1<<12))>>13)); wt[1]=clamp512k(wt[1]+((err+16)>>5)); cr.ht[cr.c+(hmap4&15)]=st.next(cr.cxt, y); } break; case SSE: // sizebits j start limit train(cr, y); break; default: assert(0); } cp+=compsize[cp[0]]; assert(cp>=&z.header[7] && cp<&z.header[z.cend] && cp<&z.header[z.header.isize()-8]); } assert(cp[0]==NONE); // Save bit y in c8, hmap4 c8+=c8+y; if (c8>=256) { z.run(c8-256); hmap4=1; c8=1; for (int i=0; i=16 && c8<32) hmap4=(hmap4&0xf)<<5|y<<4|1; else hmap4=(hmap4&0x1f0)|(((hmap4&0xf)*2+y)&0xf); } // Find cxt row in hash table ht. ht has rows of 16 indexed by the // low sizebits of cxt with element 0 having the next higher 8 bits for // collision detection. If not found after 3 adjacent tries, replace the // row with lowest element 1 as priority. Return index of row. size_t Predictor::find(Array& ht, int sizebits, U32 cxt) { assert(ht.size()==size_t(16)<>sizebits&255; size_t h0=(cxt*16)&(ht.size()-16); if (ht[h0]==chk) return h0; size_t h1=h0^16; if (ht[h1]==chk) return h1; size_t h2=h0^32; if (ht[h2]==chk) return h2; if (ht[h0+1]<=ht[h1+1] && ht[h0+1]<=ht[h2+1]) return memset(&ht[h0], 0, 16), ht[h0]=chk, h0; else if (ht[h1+1]get(); if (c<0) error("unexpected end of input"); curr=curr<<8|c; } } U32 n=buf.size(); if (n>curr) n=curr; high=in->read(&buf[0], n); curr-=high; low=0; } // Return next bit of decoded input, which has 16 bit probability p of being 1 int Decoder::decode(int p) { assert(p>=0 && p<65536); assert(high>low && low>0); if (currhigh) error("archive corrupted"); assert(curr>=low && curr<=high); U32 mid=low+U32(((high-low)*U64(U32(p)))>>16); // split range assert(high>mid && mid>=low); int y=curr<=mid; if (y) high=mid; else low=mid+1; // pick half while ((high^low)<0x1000000) { // shift out identical leading bytes high=high<<8|255; low=low<<8; low+=(low==0); int c=in->get(); if (c<0) error("unexpected end of file"); curr=curr<<8|c; } return y; } // Decompress 1 byte or -1 at end of input int Decoder::decompress() { if (pr.isModeled()) { // n>0 components? if (curr==0) { // segment initialization for (int i=0; i<4; ++i) curr=curr<<8|in->get(); } if (decode(0)) { if (curr!=0) error("decoding end of stream"); return -1; } else { int c=1; while (c<256) { // get 8 bits int p=pr.predict()*2+1; c+=c+decode(p); pr.update(c&1); } return c-256; } } else { if (low==high) loadbuf(); if (low==high) return -1; return buf[low++]&255; } } // Find end of compressed data and return next byte int Decoder::skip() { int c=-1; if (pr.isModeled()) { while (curr==0) // at start? curr=in->get(); while (curr && (c=in->get())>=0) // find 4 zeros curr=curr<<8|c; while ((c=in->get())==0) ; // might be more than 4 return c; } else { if (curr==0) // at start? for (int i=0; i<4 && (c=in->get())>=0; ++i) curr=curr<<8|c; while (curr>0) { U32 n=BUFSIZE; if (n>curr) n=curr; U32 n1=in->read(&buf[0], n); curr-=n1; if (n1!=n) return -1; if (curr==0) for (int i=0; i<4 && (c=in->get())>=0; ++i) curr=curr<<8|c; } if (c>=0) c=in->get(); return c; } } ////////////////////// PostProcessor ////////////////////// // Copy ph, pm from block header void PostProcessor::init(int h, int m) { state=hsize=0; ph=h; pm=m; z.clear(); } // (PASS=0 | PROG=1 psize[0..1] pcomp[0..psize-1]) data... EOB=-1 // Return state: 1=PASS, 2..4=loading PROG, 5=PROG loaded int PostProcessor::write(int c) { assert(c>=-1 && c<=255); switch (state) { case 0: // initial state if (c<0) error("Unexpected EOS"); state=c+1; // 1=PASS, 2=PROG if (state>2) error("unknown post processing type"); if (state==1) z.clear(); break; case 1: // PASS z.outc(c); break; case 2: // PROG if (c<0) error("Unexpected EOS"); hsize=c; // low byte of size state=3; break; case 3: // PROG psize[0] if (c<0) error("Unexpected EOS"); hsize+=c*256; // high byte of psize z.header.resize(hsize+300); z.cend=8; z.hbegin=z.hend=z.cend+128; z.header[4]=ph; z.header[5]=pm; state=4; break; case 4: // PROG psize[0..1] pcomp[0...] if (c<0) error("Unexpected EOS"); assert(z.hend>8; z.initp(); state=5; } break; case 5: // PROG ... data z.run(c); if (c<0) z.flush(); break; } return state; } /////////////////////// Decompresser ///////////////////// // Find the start of a block and return true if found. Set memptr // to memory used. bool Decompresser::findBlock(double* memptr) { assert(state==BLOCK); // Find start of block U32 h1=0x3D49B113, h2=0x29EB7F93, h3=0x2614BE13, h4=0x3828EB13; // Rolling hashes initialized to hash of first 13 bytes int c; while ((c=dec.in->get())!=-1) { h1=h1*12+c; h2=h2*20+c; h3=h3*28+c; h4=h4*44+c; if (h1==0xB16B88F1 && h2==0xFF5376F1 && h3==0x72AC5BF1 && h4==0x2F909AF1) break; // hash of 16 byte string } if (c==-1) return false; // Read header if ((c=dec.in->get())!=1 && c!=2) error("unsupported ZPAQ level"); if (dec.in->get()!=1) error("unsupported ZPAQL type"); z.read(dec.in); if (c==1 && z.header.isize()>6 && z.header[6]==0) error("ZPAQ level 1 requires at least 1 component"); if (memptr) *memptr=z.memory(); state=FILENAME; decode_state=FIRSTSEG; return true; } // Read the start of a segment (1) or end of block code (255). // If a segment is found, write the filename and return true, else false. bool Decompresser::findFilename(Writer* filename) { assert(state==FILENAME); int c=dec.in->get(); if (c==1) { // segment found while (true) { c=dec.in->get(); if (c==-1) error("unexpected EOF"); if (c==0) { state=COMMENT; return true; } if (filename) filename->put(c); } } else if (c==255) { // end of block found state=BLOCK; return false; } else error("missing segment or end of block"); return false; } // Read the comment from the segment header void Decompresser::readComment(Writer* comment) { assert(state==COMMENT); state=DATA; while (true) { int c=dec.in->get(); if (c==-1) error("unexpected EOF"); if (c==0) break; if (comment) comment->put(c); } if (dec.in->get()!=0) error("missing reserved byte"); } // Decompress n bytes, or all if n < 0. Return false if done bool Decompresser::decompress(int n) { assert(state==DATA); assert(decode_state!=SKIP); // Initialize models to start decompressing block if (decode_state==FIRSTSEG) { dec.init(); assert(z.header.size()>5); pp.init(z.header[4], z.header[5]); decode_state=SEG; } // Decompress and load PCOMP into postprocessor while ((pp.getState()&3)!=1) pp.write(dec.decompress()); // Decompress n bytes, or all if n < 0 while (n) { int c=dec.decompress(); pp.write(c); if (c==-1) { state=SEGEND; return false; } if (n>0) --n; } return true; } // Read end of block. If a SHA1 checksum is present, write 1 and the // 20 byte checksum into sha1string, else write 0 in first byte. // If sha1string is 0 then discard it. void Decompresser::readSegmentEnd(char* sha1string) { assert(state==DATA || state==SEGEND); // Skip remaining data if any and get next byte int c=0; if (state==DATA) { c=dec.skip(); decode_state=SKIP; } else if (state==SEGEND) c=dec.in->get(); state=FILENAME; // Read checksum if (c==254) { if (sha1string) sha1string[0]=0; // no checksum } else if (c==253) { if (sha1string) sha1string[0]=1; for (int i=1; i<=20; ++i) { c=dec.in->get(); if (sha1string) sha1string[i]=c; } } else error("missing end of segment marker"); } /////////////////////////// decompress() ///////////////////// void decompress(Reader* in, Writer* out) { Decompresser d; d.setInput(in); d.setOutput(out); while (d.findBlock()) { // don't calculate memory while (d.findFilename()) { // discard filename d.readComment(); // discard comment d.decompress(); // to end of segment d.readSegmentEnd(); // discard sha1string } } } ////////////////////// Encoder //////////////////// // Initialize for start of block void Encoder::init() { low=1; high=0xFFFFFFFF; pr.init(); if (!pr.isModeled()) low=0, buf.resize(1<<16); } // compress bit y having probability p/64K void Encoder::encode(int y, int p) { assert(out); assert(p>=0 && p<65536); assert(y==0 || y==1); assert(high>low && low>0); U32 mid=low+U32(((high-low)*U64(U32(p)))>>16); // split range assert(high>mid && mid>=low); if (y) high=mid; else low=mid+1; // pick half while ((high^low)<0x1000000) { // write identical leading bytes out->put(high>>24); // same as low>>24 high=high<<8|255; low=low<<8; low+=(low==0); // so we don't code 4 0 bytes in a row } } // compress byte c (0..255 or -1=EOS) void Encoder::compress(int c) { assert(out); if (pr.isModeled()) { if (c==-1) encode(1, 0); else { assert(c>=0 && c<=255); encode(0, 0); for (int i=7; i>=0; --i) { int p=pr.predict()*2+1; assert(p>0 && p<65536); int y=c>>i&1; encode(y, p); pr.update(y); } } } else { if (c<0 || low==buf.size()) { out->put((low>>24)&255); out->put((low>>16)&255); out->put((low>>8)&255); out->put(low&255); out->write(&buf[0], low); low=0; } if (c>=0) buf[low++]=c; } } ///////////////////// Compressor ////////////////////// // Write 13 byte start tag // "\x37\x6B\x53\x74\xA0\x31\x83\xD3\x8C\xB2\x28\xB0\xD3" void Compressor::writeTag() { assert(state==INIT); enc.out->put(0x37); enc.out->put(0x6b); enc.out->put(0x53); enc.out->put(0x74); enc.out->put(0xa0); enc.out->put(0x31); enc.out->put(0x83); enc.out->put(0xd3); enc.out->put(0x8c); enc.out->put(0xb2); enc.out->put(0x28); enc.out->put(0xb0); enc.out->put(0xd3); } void Compressor::startBlock(int level) { // Model 1 - min.cfg static const char models[]={ 26,0,1,2,0,0,2,3,16,8,19,0,0,96,4,28, 59,10,59,112,25,10,59,10,59,112,56,0, // Model 2 - mid.cfg 69,0,3,3,0,0,8,3,5,8,13,0,8,17,1,8, 18,2,8,18,3,8,19,4,4,22,24,7,16,0,7,24, (char)-1,0,17,104,74,4,95,1,59,112,10,25,59,112,10,25, 59,112,10,25,59,112,10,25,59,112,10,25,59,10,59,112, 25,69,(char)-49,8,112,56,0, // Model 3 - max.cfg (char)-60,0,5,9,0,0,22,1,(char)-96,3,5,8,13,1,8,16, 2,8,18,3,8,19,4,8,19,5,8,20,6,4,22,24, 3,17,8,19,9,3,13,3,13,3,13,3,14,7,16,0, 15,24,(char)-1,7,8,0,16,10,(char)-1,6,0,15,16,24,0,9, 8,17,32,(char)-1,6,8,17,18,16,(char)-1,9,16,19,32,(char)-1,6, 0,19,20,16,0,0,17,104,74,4,95,2,59,112,10,25, 59,112,10,25,59,112,10,25,59,112,10,25,59,112,10,25, 59,10,59,112,10,25,59,112,10,25,69,(char)-73,32,(char)-17,64,47, 14,(char)-25,91,47,10,25,60,26,48,(char)-122,(char)-105,20,112,63,9,70, (char)-33,0,39,3,25,112,26,52,25,25,74,10,4,59,112,25, 10,4,59,112,25,10,4,59,112,25,65,(char)-113,(char)-44,72,4,59, 112,8,(char)-113,(char)-40,8,68,(char)-81,60,60,25,69,(char)-49,9,112,25,25, 25,25,25,112,56,0, 0,0}; // 0,0 = end of list if (level<1) error("compression level must be at least 1"); const char* p=models; int i; for (i=1; iput('z'); enc.out->put('P'); enc.out->put('Q'); enc.out->put(1+(len>6 && hcomp[6]==0)); // level 1 or 2 enc.out->put(1); for (int i=0; iput(hcomp[i]); MemoryReader m(hcomp); z.read(&m); state=BLOCK1; } // Write a segment header void Compressor::startSegment(const char* filename, const char* comment) { assert(state==BLOCK1 || state==BLOCK2); enc.out->put(1); while (filename && *filename) enc.out->put(*filename++); enc.out->put(0); while (comment && *comment) enc.out->put(*comment++); enc.out->put(0); enc.out->put(0); if (state==BLOCK1) state=SEG1; if (state==BLOCK2) state=SEG2; } // Initialize encoding and write pcomp to first segment // If len is 0 then length is encoded in pcomp[0..1] void Compressor::postProcess(const char* pcomp, int len) { assert(state==SEG1); enc.init(); if (pcomp) { enc.compress(1); if (len<=0) { len=toU16(pcomp); pcomp+=2; } enc.compress(len&255); enc.compress((len>>8)&255); for (int i=0; iget())>=0) { enc.compress(ch); if (n>0) --n; } return ch>=0; } // End segment, write sha1string if present void Compressor::endSegment(const char* sha1string) { assert(state==SEG2); enc.compress(-1); enc.out->put(0); enc.out->put(0); enc.out->put(0); enc.out->put(0); if (sha1string) { enc.out->put(253); for (int i=0; i<20; ++i) enc.out->put(sha1string[i]); } else enc.out->put(254); state=BLOCK2; } // End block void Compressor::endBlock() { assert(state==BLOCK2); enc.out->put(255); state=INIT; } /////////////////////////// compress() /////////////////////// void compress(Reader* in, Writer* out, int level) { assert(level>=1); Compressor c; c.setInput(in); c.setOutput(out); c.startBlock(level); c.startSegment(); c.postProcess(); c.compress(); c.endSegment(); c.endBlock(); } //////////////////////// ZPAQL::assemble() //////////////////// #ifndef NOJIT /* assemble(); Assembles the ZPAQL code in hcomp[0..hlen-1] and stores x86-32 or x86-64 code in rcode[0..rcode_size-1]. Execution begins at rcode[0]. It will not write beyond the end of rcode, but in any case it returns the number of bytes that would have been written. It returns 0 in case of error. The assembled code implements run() and returns 1 if successful or 0 if the ZPAQL code executes an invalid instruction or jumps out of bounds. A ZPAQL virtual machine has the following state. All values are unsigned and initially 0: a, b, c, d: 32 bit registers (pointed to by their respective parameters) f: 1 bit flag register (pointed to) r[0..255]: 32 bit registers m[0..msize-1]: 8 bit registers, where msize is a power of 2 h[0..hsize-1]: 32 bit registers, where hsize is a power of 2 out: pointer to a Writer sha1: pointer to a SHA1 Generally a ZPAQL machine is used to compute contexts which are placed in h. A second machine might post-process, and write its output to out and sha1. In either case, a machine is called with its input in a, representing a single byte (0..255) or (for a postprocessor) EOF (0xffffffff). Execution returs after a ZPAQL halt instruction. ZPAQL instructions are 1 byte unless the last 3 bits are 1. In this case, a second operand byte follows. Opcode 255 is the only 3 byte instruction. They are organized: 00dddxxx = unary opcode xxx on destination ddd (ddd < 111) 00111xxx = special instruction xxx 01dddsss = assignment: ddd = sss (ddd < 111) 1xxxxsss = operation sxxx from sss to a The meaning of sss and ddd are as follows: 000 = a (accumulator) 001 = b 010 = c 011 = d 100 = *b (means m[b mod msize]) 101 = *c (means m[c mod msize]) 110 = *d (means h[d mod hsize]) 111 = n (constant 0..255 in second byte of instruction) For example, 01001110 assigns *d to b. The other instructions xxx are as follows: Group 00dddxxx where ddd < 111 and xxx is: 000 = ddd<>a, swap with a (except 00000000 is an error, and swap with *b or *c leaves the high bits of a unchanged) 001 = ddd++, increment 010 = ddd--, decrement 011 = ddd!, not (invert all bits) 100 = ddd=0, clear (set all bits of ddd to 0) 101 = not used (error) 110 = not used 111 = ddd=r n, assign from r[n] to ddd, n=0..255 in next opcode byte Except: 00100111 = jt n, jump if f is true (n = -128..127, relative to next opcode) 00101111 = jf n, jump if f is false (n = -128..127) 00110111 = r=a n, assign r[n] = a (n = 0..255) Group 00111xxx where xxx is: 000 = halt (return) 001 = output a 010 = not used 011 = hash: a = (a + *b + 512) * 773 100 = hashd: *d = (*d + a + 512) * 773 101 = not used 110 = not used 111 = unconditional jump (n = -128 to 127, relative to next opcode) Group 1xxxxsss where xxxx is: 0000 = a += sss (add, subtract, multiply, divide sss to a) 0001 = a -= sss 0010 = a *= sss 0011 = a /= sss (unsigned, except set a = 0 if sss is 0) 0100 = a %= sss (remainder, except set a = 0 if sss is 0) 0101 = a &= sss (bitwise AND) 0110 = a &= ~sss (bitwise AND with complement of sss) 0111 = a |= sss (bitwise OR) 1000 = a ^= sss (bitwise XOR) 1001 = a <<= (sss % 32) (left shift by low 5 bits of sss) 1010 = a >>= (sss % 32) (unsigned, zero bits shifted in) 1011 = a == sss (compare, set f = true if equal or false otherwise) 1100 = a < sss (unsigned compare, result in f) 1101 = a > sss (unsigned compare) 1110 = not used 1111 = not used except 11111111 is a 3 byte jump to the absolute address in the next 2 bytes in little-endian (LSB first) order. assemble() translates ZPAQL to 32 bit x86 code to be executed by run(). Registers are mapped as follows: eax = source sss from *b, *c, *d or sometimes n ecx = pointer to destination *b, *c, *d, or spare edx = a ebx = f (1 for true, 0 for false) esp = stack pointer ebp = d esi = b edi = c run() saves non-volatile registers (ebp, esi, edi, ebx) on the stack, loads a, b, c, d, f, and executes the translated instructions. A halt instruction saves a, b, c, d, f, pops the saved registers and returns. Invalid instructions or jumps outside of the range of the ZPAQL code call libzpaq::error(). In 64 bit mode, the following additional registers are used: r12 = h r14 = r r15 = m */ // Called by out static void flush1(ZPAQL* z) { z->flush(); } // return true if op is an undefined ZPAQL instruction static bool iserr(int op) { return op==0 || (op>=120 && op<=127) || (op>=240 && op<=254) || op==58 || (op<64 && (op%8==5 || op%8==6)); } // Write k bytes of x to rcode[o++] MSB first static void put(U8* rcode, int n, int& o, U32 x, int k) { while (k-->0) { if (o>(k*8))&255; ++o; } } // Write 4 bytes of x to rcode[o++] LSB first static void put4lsb(U8* rcode, int n, int& o, U32 x) { for (int k=0; k<4; ++k) { if (o>(k*8))&255; ++o; } } // Write a 1-4 byte x86 opcode without or with an 4 byte operand // to rcode[o...] #define put1(x) put(rcode, rcode_size, o, (x), 1) #define put2(x) put(rcode, rcode_size, o, (x), 2) #define put3(x) put(rcode, rcode_size, o, (x), 3) #define put4(x) put(rcode, rcode_size, o, (x), 4) #define put5(x,y) put4(x), put1(y) #define put6(x,y) put4(x), put2(y) #define put4r(x) put4lsb(rcode, rcode_size, o, x) #define puta(x) t=U32(size_t(x)), put4r(t) #define put1a(x,y) put1(x), puta(y) #define put2a(x,y) put2(x), puta(y) #define put3a(x,y) put3(x), puta(y) #define put4a(x,y) put4(x), puta(y) #define put5a(x,y,z) put4(x), put1(y), puta(z) #define put2l(x,y) put2(x), t=U32(size_t(y)), put4r(t), \ t=U32(size_t(y)>>(S*4)), put4r(t) // Assemble ZPAQL in in the HCOMP section of header to rcode, // but do not write beyond rcode_size. Return the number of // bytes output or that would have been output. // Execution starts at rcode[0] and returns 1 if successful or 0 // in case of a ZPAQL execution error. int ZPAQL::assemble() { // x86? (not foolproof) const int S=sizeof(char*); // 4 = x86, 8 = x86-64 U32 t=0x12345678; if (*(char*)&t!=0x78 || (S!=4 && S!=8)) error("JIT supported only for x86-32 and x86-64"); const U8* hcomp=&header[hbegin]; const int hlen=hend-hbegin+1; const int msize=m.size(); const int hsize=h.size(); const int regcode[8]={2,6,7,5}; // a,b,c,d.. -> edx,esi,edi,ebp,eax.. Array it(hlen); // hcomp -> rcode locations int done=0; // number of instructions assembled (0..hlen) int o=5; // rcode output index, reserve space for jmp // Code for the halt instruction (restore registers and return) const int halt=o; if (S==8) { put2l(0x48b9, &a); // mov rcx, a put2(0x8911); // mov [rcx], edx put2l(0x48b9, &b); // mov rcx, b put2(0x8931); // mov [rcx], esi put2l(0x48b9, &c); // mov rcx, c put2(0x8939); // mov [rcx], edi put2l(0x48b9, &d); // mov rcx, d put2(0x8929); // mov [rcx], ebp put2l(0x48b9, &f); // mov rcx, f put2(0x8919); // mov [rcx], ebx put4(0x4883c438); // add rsp, 56 put2(0x415f); // pop r15 put2(0x415e); // pop r14 put2(0x415d); // pop r13 put2(0x415c); // pop r12 } else { put2a(0x8915, &a); // mov [a], edx put2a(0x8935, &b); // mov [b], esi put2a(0x893d, &c); // mov [c], edi put2a(0x892d, &d); // mov [d], ebp put2a(0x891d, &f); // mov [f], ebx put3(0x83c43c); // add esp, 60 } put1(0x5d); // pop ebp put1(0x5b); // pop ebx put1(0x5f); // pop edi put1(0x5e); // pop esi put1(0xc3); // ret // Code for the out instruction. // Store a=edx at outbuf[bufptr++]. If full, call flush1(). const int outlabel=o; if (S==8) { put2l(0x48b8, &outbuf[0]);// mov rax, outbuf.p put2l(0x49ba, &bufptr); // mov r10, &bufptr put3(0x418b0a); // mov ecx, [r10] put3(0x891408); // mov [rax+rcx], edx put2(0xffc1); // inc ecx put3(0x41890a); // mov [r10], ecx put2a(0x81f9, outbuf.size()); // cmp ecx, outbuf.size() put2(0x7401); // jz L1 put1(0xc3); // ret put4(0x4883ec30); // L1: sub esp, 48 ; call flush1(this) put4(0x48893c24); // mov [rsp], rdi put5(0x48897424,8); // mov [rsp+8], rsi put5(0x48895424,16); // mov [rsp+16], rdx put5(0x48894c24,24); // mov [rsp+24], rcx #ifndef _WIN32 put2l(0x48bf, this); // mov rdi, this #else // Windows put2l(0x48b9, this); // mov rcx, this #endif put2l(0x49bb, &flush1); // mov r11, &flush1 put3(0x41ffd3); // call r11 put5(0x488b4c24,24); // mov rcx, [rsp+24] put5(0x488b5424,16); // mov rdx, [rsp+16] put5(0x488b7424,8); // mov rsi, [rsp+8] put4(0x488b3c24); // mov rdi, [rsp] put4(0x4883c430); // add esp, 48 put1(0xc3); // ret } else { put1a(0xb8, &outbuf[0]); // mov eax, outbuf.p put2a(0x8b0d, &bufptr); // mov ecx, [bufptr] put3(0x891408); // mov [eax+ecx], edx put2(0xffc1); // inc ecx put2a(0x890d, &bufptr); // mov [bufptr], ecx put2a(0x81f9, outbuf.size()); // cmp ecx, outbuf.size() put2(0x7401); // jz L1 put1(0xc3); // ret put3(0x83ec08); // L1: sub esp, 8 put4(0x89542404); // mov [esp+4], edx put3a(0xc70424, this); // mov [esp], this put1a(0xb8, &flush1); // mov eax, &flush1 put2(0xffd0); // call eax put4(0x8b542404); // mov edx, [esp+4] put3(0x83c408); // add esp, 8 put1(0xc3); // ret } // Set it[i]=1 for each ZPAQL instruction reachable from the previous // instruction + 2 if reachable by a jump (or 3 if both). it[0]=2; assert(hlen>0 && hcomp[hlen-1]==0); // ends with error do { done=0; const int NONE=0x80000000; for (int i=0; i>24);// jt,jf,jmp if (op==63) next1=NONE; // jmp if ((next2<0 || next2>=hlen) && next2!=NONE) next2=hlen-1; // error if (next1!=NONE && !(it[next1]&1)) it[next1]|=1, ++done; if (next2!=NONE && !(it[next2]&2)) it[next2]|=2, ++done; } } } while (done>0); // Set it[i] bits 2-3 to 4, 8, or 12 if a comparison // (<, >, == respectively) does not need to save the result in f, // or if a conditional jump (jt, jf) does not need to read f. // This is true if a comparison is followed directly by a jt/jf, // the jt/jf is not a jump target, the byte before is not a jump // target (for a 2 byte comparison), and for the comparison instruction // if both paths after the jt/jf lead to another comparison or error // before another jt/jf. At most hlen steps are traced because after // that it must be an infinite loop. for (int i=0; i=216 && op1<240 && (op2==39 || op2==47) && it[i2]==1 && (i2==i+1 || it[i+1]==0)) { int code=(op1-208)/8*4; // 4,8,12 is ==,<,> it[i2]+=code; // OK to test CF, ZF instead of f for (int j=0; j<2 && code; ++j) { // trace each path from i2 int k=i2+2; // branch not taken if (j==1) k=i2+2+(hcomp[i2+1]<<24>>24); // branch taken for (int l=0; l=hlen) break; // out of bounds, pass const int op=hcomp[k]; if (op==39 || op==47) code=0; // jt,jf, fail else if (op>=216 && op<240) break; // ==,<,>, pass else if (iserr(op)) break; // error, pass else if (op==255) k=hcomp[k+1]+256*hcomp[k+2]; // lj else if (op==63) k=k+2+(hcomp[k+1]<<24>>24); // jmp else if (op==56) k=0; // halt else k=k+1+(op%8==7); // ordinary instruction } } it[i]+=code; // if > 0 then OK to not save flags in f (bl) } } // Start of run(): Save x86 and load ZPAQL registers const int start=o; assert(start>=16); put1(0x56); // push esi/rsi put1(0x57); // push edi/rdi put1(0x53); // push ebx/rbx put1(0x55); // push ebp/rbp if (S==8) { put2(0x4154); // push r12 put2(0x4155); // push r13 put2(0x4156); // push r14 put2(0x4157); // push r15 put4(0x4883ec38); // sub rsp, 56 put2l(0x48b8, &a); // mov rax, a put2(0x8b10); // mov edx, [rax] put2l(0x48b8, &b); // mov rax, b put2(0x8b30); // mov esi, [rax] put2l(0x48b8, &c); // mov rax, c put2(0x8b38); // mov edi, [rax] put2l(0x48b8, &d); // mov rax, d put2(0x8b28); // mov ebp, [rax] put2l(0x48b8, &f); // mov rax, f put2(0x8b18); // mov ebx, [rax] put2l(0x49bc, &h[0]); // mov r12, h put2l(0x49bd, &outbuf[0]); // mov r13, outbuf.p put2l(0x49be, &r[0]); // mov r14, r put2l(0x49bf, &m[0]); // mov r15, m } else { put3(0x83ec3c); // sub esp, 60 put2a(0x8b15, &a); // mov edx, [a] put2a(0x8b35, &b); // mov esi, [b] put2a(0x8b3d, &c); // mov edi, [c] put2a(0x8b2d, &d); // mov ebp, [d] put2a(0x8b1d, &f); // mov ebx, [f] } // Assemble in multiple passes until every byte of hcomp has a translation for (int istart=0; istarti); assert(i>=0 && i=16) { if (i>istart) { int a=code-o; if (a>-120 && a<120) put2(0xeb00+((a-2)&255)); // jmp short o else put1a(0xe9, a-5); // jmp near o } break; } // Else assemble the instruction at hcode[i] to rcode[o] else { assert(i>=0 && i0 && it[i]<16); assert(o>=16); it[i]=o; ++done; const int op=hcomp[i]; const int arg=hcomp[i+1]+((op==255)?256*hcomp[i+2]:0); const int ddd=op/8%8; const int sss=op%8; // error instruction: return 0 if (iserr(op)) { put2(0x31c0); // xor eax, eax put1a(0xe9, halt-o-4); // jmp near halt continue; } // Load source *b, *c, *d, or hash (*b) into eax except: // {a,b,c,d}=*d, a{+,-,*,&,|,^,=,==,>,>}=*d: load address to eax // {a,b,c,d}={*b,*c}: load source into ddd if (op==59 || (op>=64 && op<240 && op%8>=4 && op%8<7)) { put2(0x89c0+8*regcode[sss-3+(op==59)]); // mov eax, {esi,edi,ebp} const int sz=(sss==6?hsize:msize)-1; if (sz>=128) put1a(0x25, sz); // and eax, dword msize-1 else put3(0x83e000+sz); // and eax, byte msize-1 const int move=(op>=64 && op<112); // = or else ddd is eax if (sss<6) { // ddd={a,b,c,d,*b,*c} if (S==8) put5(0x410fb604+8*move*regcode[ddd],0x07); // movzx ddd, byte [r15+rax] else put3a(0x0fb680+8*move*regcode[ddd], &m[0]); // movzx ddd, byte [m+eax] } else if ((0x06587000>>(op/8))&1) {// {*b,*c,*d,a/,a%,a&~,a<<,a>>}=*d if (S==8) put4(0x418b0484); // mov eax, [r12+rax*4] else put3a(0x8b0485, &h[0]); // mov eax, [h+eax*4] } } // Load destination address *b, *c, *d or hashd (*d) into ecx if ((op>=32 && op<56 && op%8<5) || (op>=96 && op<120) || op==60) { put2(0x89c1+8*regcode[op/8%8-3-(op==60)]);// mov ecx,{esi,edi,ebp} const int sz=(ddd==6||op==60?hsize:msize)-1; if (sz>=128) put2a(0x81e1, sz); // and ecx, dword sz else put3(0x83e100+sz); // and ecx, byte sz if (op/8%8==6 || op==60) { // *d if (S==8) put4(0x498d0c8c); // lea rcx, [r12+rcx*4] else put3a(0x8d0c8d, &h[0]); // lea ecx, [ecx*4+h] } else { // *b, *c if (S==8) put4(0x498d0c0f); // lea rcx, [r15+rcx] else put2a(0x8d89, &m[0]); // lea ecx, [ecx+h] } } // Translate by opcode switch((op/8)&31) { case 0: // ddd = a case 1: // ddd = b case 2: // ddd = c case 3: // ddd = d switch(sss) { case 0: // ddd<>a (swap) put2(0x87d0+regcode[ddd]); // xchg edx, ddd break; case 1: // ddd++ put2(0xffc0+regcode[ddd]); // inc ddd break; case 2: // ddd-- put2(0xffc8+regcode[ddd]); // dec ddd break; case 3: // ddd! put2(0xf7d0+regcode[ddd]); // not ddd break; case 4: // ddd=0 put2(0x31c0+9*regcode[ddd]); // xor ddd,ddd break; case 7: // ddd=r n if (S==8) put3a(0x418b86+8*regcode[ddd], arg*4); // mov ddd, [r14+n*4] else put2a(0x8b05+8*regcode[ddd], (&r[arg]));//mov ddd, [r+n] break; } break; case 4: // ddd = *b case 5: // ddd = *c switch(sss) { case 0: // ddd<>a (swap) put2(0x8611); // xchg dl, [ecx] break; case 1: // ddd++ put2(0xfe01); // inc byte [ecx] break; case 2: // ddd-- put2(0xfe09); // dec byte [ecx] break; case 3: // ddd! put2(0xf611); // not byte [ecx] break; case 4: // ddd=0 put2(0x31c0); // xor eax, eax put2(0x8801); // mov [ecx], al break; case 7: // jt, jf { assert(code>=0 && code<16); const int jtab[2][4]={{5,4,2,7},{4,5,3,6}}; // jnz,je,jb,ja, jz,jne,jae,jbe if (code<4) put2(0x84db); // test bl, bl if (arg>=128 && arg-257-i>=0 && o-it[arg-257-i]<120) put2(0x7000+256*jtab[op==47][code/4]); // jx short 0 else put2a(0x0f80+jtab[op==47][code/4], 0); // jx near 0 break; } } break; case 6: // ddd = *d switch(sss) { case 0: // ddd<>a (swap) put2(0x8711); // xchg edx, [ecx] break; case 1: // ddd++ put2(0xff01); // inc dword [ecx] break; case 2: // ddd-- put2(0xff09); // dec dword [ecx] break; case 3: // ddd! put2(0xf711); // not dword [ecx] break; case 4: // ddd=0 put2(0x31c0); // xor eax, eax put2(0x8901); // mov [ecx], eax break; case 7: // ddd=r n if (S==8) put3a(0x418996, arg*4); // mov [r14+n*4], edx else put2a(0x8915, &r[arg]); // mov [r+n], edx break; } break; case 7: // special switch(op) { case 56: // halt put1a(0xb8, 1); // mov eax, 1 put1a(0xe9, halt-o-4); // jmp near halt break; case 57: // out put1a(0xe8, outlabel-o-4);// call outlabel break; case 59: // hash: a = (a + *b + 512) * 773 put3a(0x8d8410, 512); // lea edx, [eax+edx+512] put2a(0x69d0, 773); // imul edx, eax, 773 break; case 60: // hashd: *d = (*d + a + 512) * 773 put2(0x8b01); // mov eax, [ecx] put3a(0x8d8410, 512); // lea eax, [eax+edx+512] put2a(0x69c0, 773); // imul eax, eax, 773 put2(0x8901); // mov [ecx], eax break; case 63: // jmp put1a(0xe9, 0); // jmp near 0 (fill in target later) break; } break; case 8: // a= case 9: // b= case 10: // c= case 11: // d= if (sss==7) // n put1a(0xb8+regcode[ddd], arg); // mov ddd, n else if (sss==6) { // *d if (S==8) put4(0x418b0484+(regcode[ddd]<<11)); // mov ddd, [r12+rax*4] else put3a(0x8b0485+(regcode[ddd]<<11),&h[0]);// mov ddd, [h+eax*4] } else if (sss<4) // a, b, c, d put2(0x89c0+regcode[ddd]+8*regcode[sss]);// mov ddd,sss break; case 12: // *b= case 13: // *c= if (sss==7) put3(0xc60100+arg); // mov byte [ecx], n else if (sss==0) put2(0x8811); // mov byte [ecx], dl else { if (sss<4) put2(0x89c0+8*regcode[sss]);// mov eax, sss put2(0x8801); // mov byte [ecx], al } break; case 14: // *d= if (sss<7) put2(0x8901+8*regcode[sss]); // mov [ecx], sss else put2a(0xc701, arg); // mov dword [ecx], n break; case 15: break; // not used case 16: // a+= if (sss==6) { if (S==8) put4(0x41031484); // add edx, [r12+rax*4] else put3a(0x031485, &h[0]); // add edx, [h+eax*4] } else if (sss<7) put2(0x01c2+8*regcode[sss]);// add edx, sss else if (arg>128) put2a(0x81c2, arg); // add edx, n else put3(0x83c200+arg); // add edx, byte n break; case 17: // a-= if (sss==6) { if (S==8) put4(0x412b1484); // sub edx, [r12+rax*4] else put3a(0x2b1485, &h[0]); // sub edx, [h+eax*4] } else if (sss<7) put2(0x29c2+8*regcode[sss]);// sub edx, sss else if (arg>=128) put2a(0x81ea, arg); // sub edx, n else put3(0x83ea00+arg); // sub edx, byte n break; case 18: // a*= if (sss==6) { if (S==8) put5(0x410faf14,0x84); // imul edx, [r12+rax*4] else put4a(0x0faf1485, &h[0]); // imul edx, [h+eax*4] } else if (sss<7) put3(0x0fafd0+regcode[sss]);// imul edx, sss else if (arg>=128) put2a(0x69d2, arg); // imul edx, n else put3(0x6bd200+arg); // imul edx, byte n break; case 19: // a/= case 20: // a%= if (sss<7) put2(0x89c1+8*regcode[sss]); // mov ecx, sss else put1a(0xb9, arg); // mov ecx, n put2(0x85c9); // test ecx, ecx put3(0x0f44d1); // cmovz edx, ecx put2(0x7408-2*(op/8==20)); // jz (over rest) put2(0x89d0); // mov eax, edx put2(0x31d2); // xor edx, edx put2(0xf7f1); // div ecx if (op/8==19) put2(0x89c2); // mov edx, eax break; case 21: // a&= if (sss==6) { if (S==8) put4(0x41231484); // and edx, [r12+rax*4] else put3a(0x231485, &h[0]); // and edx, [h+eax*4] } else if (sss<7) put2(0x21c2+8*regcode[sss]);// and edx, sss else if (arg>=128) put2a(0x81e2, arg); // and edx, n else put3(0x83e200+arg); // and edx, byte n break; case 22: // a&~ if (sss==7) { if (arg<128) put3(0x83e200+(~arg&255));// and edx, byte ~n else put2a(0x81e2, ~arg); // and edx, ~n } else { if (sss<4) put2(0x89c0+8*regcode[sss]);// mov eax, sss put2(0xf7d0); // not eax put2(0x21c2); // and edx, eax } break; case 23: // a|= if (sss==6) { if (S==8) put4(0x410b1484); // or edx, [r12+rax*4] else put3a(0x0b1485, &h[0]); // or edx, [h+eax*4] } else if (sss<7) put2(0x09c2+8*regcode[sss]);// or edx, sss else if (arg>=128) put2a(0x81ca, arg); // or edx, n else put3(0x83ca00+arg); // or edx, byte n break; case 24: // a^= if (sss==6) { if (S==8) put4(0x41331484); // xor edx, [r12+rax*4] else put3a(0x331485, &h[0]); // xor edx, [h+eax*4] } else if (sss<7) put2(0x31c2+8*regcode[sss]);// xor edx, sss else if (arg>=128) put2a(0x81f2, arg); // xor edx, byte n else put3(0x83f200+arg); // xor edx, n break; case 25: // a<<= case 26: // a>>= if (sss==7) // sss = n put3(0xc1e200+8*256*(op/8==26)+arg); // shl/shr n else { put2(0x89c1+8*regcode[sss]); // mov ecx, sss put2(0xd3e2+8*(op/8==26)); // shl/shr edx, cl } break; case 27: // a== case 28: // a< case 29: // a> if (sss==6) { if (S==8) put4(0x413b1484); // cmp edx, [r12+rax*4] else put3a(0x3b1485, &h[0]); // cmp edx, [h+eax*4] } else if (sss==7) // sss = n put2a(0x81fa, arg); // cmp edx, dword n else put2(0x39c2+8*regcode[sss]); // cmp edx, sss if (code<4) { if (op/8==27) put3(0x0f94c3); // setz bl if (op/8==28) put3(0x0f92c3); // setc bl if (op/8==29) put3(0x0f97c3); // seta bl } break; case 30: // not used case 31: // 255 = lj if (op==255) put1a(0xe9, 0); // jmp near break; } } } } // Finish first pass const int rsize=o; if (o>rcode_size) return rsize; // Fill in jump addresses (second pass) for (int i=0; i=128) target-=256; target+=i+2; } if (target<0 || target>=hlen) target=hlen-1; // runtime ZPAQL error o=it[i]; assert(o>=16 && o skip test assert(o>=16 && o=0x72 && op<0x78) || op==0xeb) { // jx, jmp short --target; if (target<-128 || target>127) error("Cannot code x86 short jump"); assert(o=0x82 && op<0x88) || op==0xe9) // jx, jmp near { target-=4; puta(target); } else assert(false); // not a x86 jump } } // Jump to start o=0; put1a(0xe9, start-5); // jmp near start return rsize; } //////////////////////// Predictor::assemble_p() ///////////////////// // Assemble the ZPAQL code in the HCOMP section of z.header to pcomp and // return the number of bytes of x86 or x86-64 code written, or that would // be written if pcomp were large enough. The code for predict() begins // at pr.pcomp[0] and update() at pr.pcomp[5], both as jmp instructions. // The assembled code is equivalent to int predict(Predictor*) // and void update(Predictor*, int y); The Preditor address is placed in // edi/rdi. The update bit y is placed in ebp/rbp. int Predictor::assemble_p() { Predictor& pr=*this; U8* rcode=pr.pcode; // x86 output array int rcode_size=pcode_size; // output size int o=0; // output index in pcode const int S=sizeof(char*); // 4 or 8 U8* hcomp=&pr.z.header[0]; // The code to translate #define off(x) ((char*)&(pr.x)-(char*)&pr) #define offc(x) ((char*)&(pr.comp[i].x)-(char*)&pr) // test for little-endian (probably x86) U32 t=0x12345678; if (*(char*)&t!=0x78 || (S!=4 && S!=8)) error("JIT supported only for x86-32 and x86-64"); // Initialize for predict(). Put predictor address in edi/rdi put1a(0xe9, 5); // jmp predict put1a(0, 0x90909000); // reserve space for jmp update put1(0x53); // push ebx/rbx put1(0x55); // push ebp/rbp put1(0x56); // push esi/rsi put1(0x57); // push edi/rdi if (S==4) put4(0x8b7c2414); // mov edi,[esp+0x14] ; pr else { #ifdef _WIN32 put3(0x4889cf); // mov rdi, rcx (1st arg in Win64) #endif } // Code predict() for each component const int n=hcomp[6]; // number of components U8* cp=hcomp+7; for (int i=0; i=pr.z.cend) error("comp too big"); if (cp[0]<1 || cp[0]>9) error("invalid component"); assert(compsize[cp[0]]>0 && compsize[cp[0]]<8); switch (cp[0]) { case CONS: // c break; case CM: // sizebits limit // Component& cr=comp[i]; // cr.cxt=h[i]^hmap4; // p[i]=stretch(cr.cm(cr.cxt)>>17); put2a(0x8b87, off(h[i])); // mov eax, [edi+&h[i]] put2a(0x3387, off(hmap4)); // xor eax, [edi+&hmap4] put1a(0x25, (1<rsi) put2a(0x8bb7, offc(cm)); // mov esi, [edi+&cm] put3(0x8b0486); // mov eax, [esi+eax*4] put3(0xc1e811); // shr eax, 17 put4a(0x0fbf8447, off(stretcht)); // movsx eax,word[edi+eax*2+..] put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax break; case ISSE: // sizebits j -- c=hi, cxt=bh // assert((hmap4&15)>0); // if (c8==1 || (c8&0xf0)==16) // cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); // cr.cxt=cr.ht[cr.c+(hmap4&15)]; // bit history // int *wt=(int*)&cr.cm[cr.cxt*2]; // p[i]=clamp2k((wt[0]*p[cp[2]]+wt[1]*64)>>16); case ICM: // sizebits // assert((hmap4&15)>0); // if (c8==1 || (c8&0xf0)==16) cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); // cr.cxt=cr.ht[cr.c+(hmap4&15)]; // p[i]=stretch(cr.cm(cr.cxt)>>8); // // Find cxt row in hash table ht. ht has rows of 16 indexed by the low // sizebits of cxt with element 0 having the next higher 8 bits for // collision detection. If not found after 3 adjacent tries, replace // row with lowest element 1 as priority. Return index of row. // // size_t Predictor::find(Array& ht, int sizebits, U32 cxt) { // assert(ht.size()==size_t(16)<>sizebits&255; // size_t h0=(cxt*16)&(ht.size()-16); // if (ht[h0]==chk) return h0; // size_t h1=h0^16; // if (ht[h1]==chk) return h1; // size_t h2=h0^32; // if (ht[h2]==chk) return h2; // if (ht[h0+1]<=ht[h1+1] && ht[h0+1]<=ht[h2+1]) // return memset(&ht[h0], 0, 16), ht[h0]=chk, h0; // else if (ht[h1+1]>(7-cr.cxt))&1; // predicted bit // p[i]=stretch(dt2k[cr.a]*(cr.c*-2+1)&32767); // } if (S==8) put1(0x48); // rex.w put2a(0x8bb7, offc(ht)); // mov esi, [edi+&ht] // If match length (a) is 0 then p[i]=0 put2a(0x8b87, offc(a)); // mov eax, [edi+&a] put2(0x85c0); // test eax, eax put2(0x7449); // jz L2 ; p[i]=0 // Else put predicted bit in c put1a(0xb9, 7); // mov ecx, 7 put2a(0x2b8f, offc(cxt)); // sub ecx, [edi+&cxt] put2a(0x8b87, offc(limit)); // mov eax, [edi+&limit] put2a(0x2b87, offc(b)); // sub eax, [edi+&b] put1a(0x25, (1<>8; put2a(0x8b87, off(p[cp[1]])); // mov eax, [edi+&p[j]] put2a(0x2b87, off(p[cp[2]])); // sub eax, [edi+&p[k]] put2a(0x69c0, cp[3]); // imul eax, wt put3(0xc1f808); // sar eax, 8 put2a(0x0387, off(p[cp[2]])); // add eax, [edi+&p[k]] put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax break; case MIX2: // sizebits j k rate mask // c=size cm=wt[size] cxt=input // cr.cxt=((h[i]+(c8&cp[5]))&(cr.c-1)); // assert(cr.cxt=0 && w<65536); // p[i]=(w*p[cp[2]]+(65536-w)*p[cp[3]])>>16; // assert(p[i]>=-2048 && p[i]<2048); put2(0x8b07); // mov eax, [edi] ; c8 put1a(0x25, cp[5]); // and eax, mask put2a(0x0387, off(h[i])); // add eax, [edi+&h[i]] put1a(0x25, (1<=1 && m<=i); // cr.cxt=h[i]+(c8&cp[5]); // cr.cxt=(cr.cxt&(cr.c-1))*m; // pointer to row of weights // assert(cr.cxt<=cr.cm.size()-m); // int* wt=(int*)&cr.cm[cr.cxt]; // p[i]=0; // for (int j=0; j>8)*p[cp[2]+j]; // p[i]=clamp2k(p[i]>>8); put2(0x8b07); // mov eax, [edi] ; c8 put1a(0x25, cp[5]); // and eax, mask put2a(0x0387, off(h[i])); // add eax, [edi+&h[i]] put1a(0x25, (1<3) put4a(0xf30f6f96, k*4+16);//movdqu xmm2, [esi+k*4+16] put5(0x660f72e1,0x08); // psrad xmm1, 8 if (tail>3) put5(0x660f72e2,0x08); // psrad xmm2, 8 put4(0x660f6bca); // packssdw xmm1, xmm2 put4a(0xf30f6f9f, off(p[cp[2]+k])); // movdqu xmm3, [edi+&p[j+k]] if (tail>3) put4a(0xf30f6fa7,off(p[cp[2]+k+4]));//movdqu xmm4, [edi+&p[j+k+4]] put4(0x660f6bdc); // packssdw, xmm3, xmm4 if (tail>0 && tail<8) { // last loop, mask extra weights put4(0x660f76ed); // pcmpeqd xmm5, xmm5 ; -1 put5(0x660f73dd, 16-tail*2); // psrldq xmm5, 16-tail*2 put4(0x660fdbcd); // pand xmm1, xmm5 } if (k==0) { // first loop, initialize sum in xmm0 put4(0xf30f6fc1); // movdqu xmm0, xmm1 put4(0x660ff5c3); // pmaddwd xmm0, xmm3 } else { // accumulate sum in xmm0 put4(0xf30f6fd1); // movdqu xmm2, xmm1 put4(0x660ff5d3); // pmaddwd xmm2, xmm3 put4(0x660ffec2); // paddd, xmm0, xmm2 } } // Add up the 4 elements of xmm0 = p[i] in the first element put4(0xf30f6fc8); // movdqu xmm1, xmm0 put5(0x660f73d9,0x08); // psrldq xmm1, 8 put4(0x660ffec1); // paddd xmm0, xmm1 put4(0xf30f6fc8); // movdqu xmm1, xmm0 put5(0x660f73d9,0x04); // psrldq xmm1, 4 put4(0x660ffec1); // paddd xmm0, xmm1 put4(0x660f7ec0); // movd eax, xmm0 ; p[i] put3(0xc1f808); // sar eax, 8 put1a(0xb9, 2047); // mov ecx, 2047 ; clamp2k put2(0x39c8); // cmp eax, ecx put3(0x0f4fc1); // cmovg eax, ecx put2(0xf7d1); // not ecx ; -2048 put2(0x39c8); // cmp eax, ecx put3(0x0f4cc1); // cmovl eax, ecx put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax break; case SSE: // sizebits j start limit // cr.cxt=(h[i]+c8)*32; // int pq=p[cp[2]]+992; // if (pq<0) pq=0; // if (pq>1983) pq=1983; // int wt=pq&63; // pq>>=6; // assert(pq>=0 && pq<=30); // cr.cxt+=pq; // p[i]=stretch(((cr.cm(cr.cxt)>>10)*(64-wt) // p0 // +(cr.cm(cr.cxt+1)>>10)*wt)>>13); // p1 // // p = p0*(64-wt)+p1*wt = (p1-p0)*wt + p0*64 // cr.cxt+=wt>>5; put2a(0x8b8f, off(h[i])); // mov ecx, [edi+&h[i]] put2(0x030f); // add ecx, [edi] ; c0 put2a(0x81e1, (1<>5 put2a(0x898f, offc(cxt)); // mov [edi+cxt], ecx ; cxt saved put3(0xc1e80a); // shr eax, 10 ; p0 = cm[cxt]>>10 put3(0xc1eb0a); // shr ebx, 10 ; p1 = cm[cxt+1]>>10 put2(0x29c3); // sub ebx, eax, ; p1-p0 put3(0x0fafda); // imul ebx, edx ; (p1-p0)*wt put3(0xc1e006); // shr eax, 6 put2(0x01d8); // add eax, ebx ; p in 0..2^28-1 put3(0xc1e80d); // shr eax, 13 ; p in 0..32767 put4a(0x0fbf8447, off(stretcht)); // movsx eax, word [edi+eax*2+...] put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax break; default: error("invalid ZPAQ component"); } } // return squash(p[n-1]) put2a(0x8b87, off(p[n-1])); // mov eax, [edi+...] put1a(0x05, 0x800); // add eax, 2048 put4a(0x0fbf8447, off(squasht[0])); // movsx eax, word [edi+eax*2+...] put1(0x5f); // pop edi put1(0x5e); // pop esi put1(0x5d); // pop ebp put1(0x5b); // pop ebx put1(0xc3); // ret // Initialize for update() Put predictor address in edi/rdi // and bit y=0..1 in ebp int save_o=o; o=5; put1a(0xe9, save_o-10); // jmp update o=save_o; put1(0x53); // push ebx/rbx put1(0x55); // push ebp/rbp put1(0x56); // push esi/rsi put1(0x57); // push edi/rdi if (S==4) { put4(0x8b7c2414); // mov edi,[esp+0x14] ; (1st arg = pr) put4(0x8b6c2418); // mov ebp,[esp+0x18] ; (2nd arg = y) } else { #ifndef _WIN32 put3(0x4889f5); // mov rbp, rsi (2nd arg in Linux-64) #else put3(0x4889cf); // mov rdi, rcx (1st arg in Win64) put3(0x4889d5); // mov rbp, rdx (2nd arg) #endif } // Code update() for each component cp=hcomp+7; for (int i=0; i=1 && cp[0]<=9); assert(compsize[cp[0]]>0 && compsize[cp[0]]<8); switch (cp[0]) { case CONS: // c break; case SSE: // sizebits j start limit case CM: // sizebits limit // train(cr, y); // // reduce prediction error in cr.cm // void train(Component& cr, int y) { // assert(y==0 || y==1); // U32& pn=cr.cm(cr.cxt); // U32 count=pn&0x3ff; // int error=y*32767-(cr.cm(cr.cxt)>>17); // pn+=(error*dt[count]&-1024)+(countrsi) put2a(0x8bb7, offc(cm)); // mov esi,[edi+cm] ; cm put2a(0x8b87, offc(cxt)); // mov eax,[edi+cxt] ; cxt put1a(0x25, pr.comp[i].cm.size()-1); // and eax, size-1 if (S==8) put1(0x48); // rex.w put3(0x8d3486); // lea esi,[esi+eax*4] ; &cm[cxt] put2(0x8b06); // mov eax,[esi] ; cm[cxt] put2(0x89c2); // mov edx, eax ; cm[cxt] put3(0xc1e811); // shr eax, 17 ; cm[cxt]>>17 put2(0x89e9); // mov ecx, ebp ; y put3(0xc1e10f); // shl ecx, 15 ; y*32768 put2(0x29e9); // sub ecx, ebp ; y*32767 put2(0x29c1); // sub ecx, eax ; error put2a(0x81e2, 0x3ff); // and edx, 1023 ; count put3a(0x8b8497, off(dt)); // mov eax,[edi+edx*4+dt] ; dt[count] put3(0x0fafc8); // imul ecx, eax ; error*dt[count] put2a(0x81e1, 0xfffffc00); // and ecx, -1024 put2a(0x81fa, cp[2+2*(cp[0]==SSE)]*4); // cmp edx, limit*4 put2(0x110e); // adc [esi], ecx ; pn+=... break; case ICM: // sizebits: cxt=bh, ht[c][0..15]=bh row // cr.ht[cr.c+(hmap4&15)]=st.next(cr.ht[cr.c+(hmap4&15)], y); // U32& pn=cr.cm(cr.cxt); // pn+=int(y*32767-(pn>>8))>>2; case ISSE: // sizebits j -- c=hi, cxt=bh // assert(cr.cxt==cr.ht[cr.c+(hmap4&15)]); // int err=y*32767-squash(p[i]); // int *wt=(int*)&cr.cm[cr.cxt*2]; // wt[0]=clamp512k(wt[0]+((err*p[cp[2]]+(1<<12))>>13)); // wt[1]=clamp512k(wt[1]+((err+16)>>5)); // cr.ht[cr.c+(hmap4&15)]=st.next(cr.cxt, y); // update bit history bh to next(bh,y=ebp) in ht[c+(hmap4&15)] put3(0x8b4700+off(hmap4)); // mov eax, [edi+&hmap4] put3(0x83e00f); // and eax, 15 put2a(0x0387, offc(c)); // add eax [edi+&c] ; cxt if (S==8) put1(0x48); // rex.w put2a(0x8bb7, offc(ht)); // mov esi, [edi+&ht] put4(0x0fb61406); // movzx edx, byte [esi+eax] ; bh put4(0x8d5c9500); // lea ebx, [ebp+edx*4] ; index to st put4a(0x0fb69c1f, off(st)); // movzx ebx,byte[edi+ebx+st]; next bh put3(0x881c06); // mov [esi+eax], bl ; save next bh if (S==8) put1(0x48); // rex.w put2a(0x8bb7, offc(cm)); // mov esi, [edi+&cm] // ICM: update cm[cxt=edx=bit history] to reduce prediction error // esi = &cm if (cp[0]==ICM) { if (S==8) put1(0x48); // rex.w put3(0x8d3496); // lea esi, [esi+edx*4] ; &cm[bh] put2(0x8b06); // mov eax, [esi] ; pn put3(0xc1e808); // shr eax, 8 ; pn>>8 put2(0x89e9); // mov ecx, ebp ; y put3(0xc1e10f); // shl ecx, 15 put2(0x29e9); // sub ecx, ebp ; y*32767 put2(0x29c1); // sub ecx, eax put3(0xc1f902); // sar ecx, 2 put2(0x010e); // add [esi], ecx } // ISSE: update weights. edx=cxt=bit history (0..255), esi=cm[512] else { put2a(0x8b87, off(p[i])); // mov eax, [edi+&p[i]] put1a(0x05, 2048); // add eax, 2048 put4a(0x0fb78447, off(squasht)); // movzx eax, word [edi+eax*2+..] put2(0x89e9); // mov ecx, ebp ; y put3(0xc1e10f); // shl ecx, 15 put2(0x29e9); // sub ecx, ebp ; y*32767 put2(0x29c1); // sub ecx, eax ; err put2a(0x8b87, off(p[cp[2]]));// mov eax, [edi+&p[j]] put3(0x0fafc1); // imul eax, ecx put1a(0x05, (1<<12)); // add eax, 4096 put3(0xc1f80d); // sar eax, 13 put3(0x0304d6); // add eax, [esi+edx*8] ; wt[0] put1a(0xbb, (1<<19)-1); // mov ebx, 524287 put2(0x39d8); // cmp eax, ebx put3(0x0f4fc3); // cmovg eax, ebx put2(0xf7d3); // not ebx ; -524288 put2(0x39d8); // cmp eax, ebx put3(0x0f4cc3); // cmovl eax, ebx put3(0x8904d6); // mov [esi+edx*8], eax put3(0x83c110); // add ecx, 16 ; err put3(0xc1f905); // sar ecx, 5 put4(0x034cd604); // add ecx, [esi+edx*8+4] ; wt[1] put1a(0xb8, (1<<19)-1); // mov eax, 524287 put2(0x39c1); // cmp ecx, eax put3(0x0f4fc8); // cmovg ecx, eax put2(0xf7d0); // not eax ; -524288 put2(0x39c1); // cmp ecx, eax put3(0x0f4cc8); // cmovl ecx, eax put4(0x894cd604); // mov [esi+edx*8+4], ecx } break; case MATCH: // sizebits bufbits: // a=len, b=offset, c=bit, cm=index, cxt=bitpos // ht=buf, limit=pos // assert(cr.a<=255); // assert(cr.c==0 || cr.c==1); // assert(cr.cxt<8); // assert(cr.cm.size()==(size_t(1)<>5; // int w=cr.a16[cr.cxt]; // w+=(err*(p[cp[2]]-p[cp[3]])+(1<<12))>>13; // if (w<0) w=0; // if (w>65535) w=65535; // cr.a16[cr.cxt]=w; // set ecx=err put2a(0x8b87, off(p[i])); // mov eax, [edi+&p[i]] put1a(0x05, 2048); // add eax, 2048 put4a(0x0fb78447, off(squasht));//movzx eax, word [edi+eax*2+&squasht] put2(0x89e9); // mov ecx, ebp ; y put3(0xc1e10f); // shl ecx, 15 put2(0x29e9); // sub ecx, ebp ; y*32767 put2(0x29c1); // sub ecx, eax put2a(0x69c9, cp[4]); // imul ecx, rate put3(0xc1f905); // sar ecx, 5 ; err // Update w put2a(0x8b87, offc(cxt)); // mov eax, [edi+&cxt] if (S==8) put1(0x48); // rex.w put2a(0x8bb7, offc(a16)); // mov esi, [edi+&a16] if (S==8) put1(0x48); // rex.w put3(0x8d3446); // lea esi, [esi+eax*2] ; &w put2a(0x8b87, off(p[cp[2]])); // mov eax, [edi+&p[j]] put2a(0x2b87, off(p[cp[3]])); // sub eax, [edi+&p[k]] ; p[j]-p[k] put3(0x0fafc1); // imul eax, ecx ; * err put1a(0x05, 1<<12); // add eax, 4096 put3(0xc1f80d); // sar eax, 13 put3(0x0fb716); // movzx edx, word [esi] ; w put2(0x01d0); // add eax, edx put1a(0xba, 0xffff); // mov edx, 65535 put2(0x39d0); // cmp eax, edx put3(0x0f4fc2); // cmovg eax, edx put2(0x31d2); // xor edx, edx put2(0x39d0); // cmp eax, edx put3(0x0f4cc2); // cmovl eax, edx put3(0x668906); // mov word [esi], ax break; case MIX: // sizebits j m rate mask // cm=wt[size][m], cxt=input // int m=cp[3]; // assert(m>0 && m<=i); // assert(cr.cm.size()==m*cr.c); // assert(cr.cxt+m<=cr.cm.size()); // int err=(y*32767-squash(p[i]))*cp[4]>>4; // int* wt=(int*)&cr.cm[cr.cxt]; // for (int j=0; j>13)); // set ecx=err put2a(0x8b87, off(p[i])); // mov eax, [edi+&p[i]] put1a(0x05, 2048); // add eax, 2048 put4a(0x0fb78447, off(squasht));//movzx eax, word [edi+eax*2+&squasht] put2(0x89e9); // mov ecx, ebp ; y put3(0xc1e10f); // shl ecx, 15 put2(0x29e9); // sub ecx, ebp ; y*32767 put2(0x29c1); // sub ecx, eax put2a(0x69c9, cp[4]); // imul ecx, rate put3(0xc1f904); // sar ecx, 4 ; err // set esi=wt put2a(0x8b87, offc(cxt)); // mov eax, [edi+&cxt] ; cxt if (S==8) put1(0x48); // rex.w put2a(0x8bb7, offc(cm)); // mov esi, [edi+&cm] if (S==8) put1(0x48); // rex.w put3(0x8d3486); // lea esi, [esi+eax*4] ; wt for (int k=0; k=256) { z.run(c8-256); hmap4=1; c8=1; for (int i=0; i=16 && c8<32) hmap4=(hmap4&0xf)<<5|y<<4|1; else hmap4=(hmap4&0x1f0)|(((hmap4&0xf)*2+y)&0xf); #endif } // Execute the ZPAQL code with input byte or -1 for EOF. // Use JIT code at rcode if available, or else create it. void ZPAQL::run(U32 input) { #ifdef NOJIT run0(input); #else if (!rcode) { int n=assemble(); allocx(rcode, rcode_size, n); if (!rcode || n<10 || rcode_size<10 || n!=assemble()) error("run JIT failed"); } a=input; if (!((int(*)())(&rcode[0]))()) libzpaq::error("Bad ZPAQL opcode"); #endif } } // end namespace libzpaq lrzip-0.651/libzpaq/libzpaq.h000066400000000000000000000414121421175057200161500ustar00rootroot00000000000000/* libzpaq.h - LIBZPAQ Version 5.00. Copyright (C) 2011, Dell Inc. Written by Matt Mahoney. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so without restriction. This Software is provided "as is" without warranty. LIBZPAQ is a C++ library for compression and decompression of data conforming to the ZPAQ level 2 standard. See http://mattmahoney.net/zpaq/ By default, LIBZPAQ uses JIT (just in time) acceleration. This only works on x86-32 and x86-64 processors that support the SSE2 instruction set. To disable JIT, compile with -DNOJIT. To enable run time checks, compile with -DDEBUG. Both options will decrease speed. The decompression code, when compiled with -DDEBUG and -DNOJIT, comprises the reference decoder for the ZPAQ level 2 standard. */ #ifndef LIBZPAQ_H #define LIBZPAQ_H #ifndef DEBUG #define NDEBUG 1 #endif #include #include #include #include namespace libzpaq { // 1, 2, 4, 8 byte unsigned integers typedef uint8_t U8; typedef uint16_t U16; typedef uint32_t U32; typedef uint64_t U64; // Standard library prototypes redirected to libzpaq.cpp void* calloc(size_t, size_t); void free(void*); // Callback for error handling extern void error(const char* msg); // Virtual base classes for input and output // get() and put() must be overridden to read or write 1 byte. // read() and write() may be overridden to read or write n bytes more // efficiently than calling get() or put() n times. class Reader { public: virtual int get() = 0; // should return 0..255, or -1 at EOF virtual int read(char* buf, int n); // read to buf[n], return no. read virtual ~Reader() {} }; class Writer { public: virtual void put(int c) = 0; // should output low 8 bits of c virtual void write(const char* buf, int n); // write buf[n] virtual ~Writer() {} }; // Read 16 bit little-endian number int toU16(const char* p); // An Array of T is cleared and aligned on a 64 byte address // with no constructors called. No copy or assignment. // Array a(n, ex=0); - creates n< class Array { T *data; // user location of [0] on a 64 byte boundary size_t n; // user size int offset; // distance back in bytes to start of actual allocation void operator=(const Array&); // no assignment Array(const Array&); // no copy public: Array(size_t sz=0, int ex=0): data(0), n(0), offset(0) { resize(sz, ex);} // [0..sz-1] = 0 void resize(size_t sz, int ex=0); // change size, erase content to zeros ~Array() {resize(0);} // free memory size_t size() const {return n;} // get size int isize() const {return int(n);} // get size as an int T& operator[](size_t i) {assert(n>0 && i0 && (n&(n-1))==0); return data[i&(n-1)];} }; // Change size to sz< void Array::resize(size_t sz, int ex) { assert(size_t(-1)>0); // unsigned type? while (ex>0) { if (sz>sz*2) error("Array too big"); sz*=2, --ex; } if (n>0) { assert(offset>0 && offset<=64); assert((char*)data-offset); free((char*)data-offset); } n=0; if (sz==0) return; n=sz; const size_t nb=128+n*sizeof(T); // test for overflow if (nb<=128 || (nb-128)/sizeof(T)!=n) error("Array too big"); data=(T*)calloc(nb, 1); if (!data) error("Out of memory"); offset=64-(((char*)data-(char*)0)&63); assert(offset>0 && offset<=64); data=(T*)((char*)data+offset); } //////////////////////////// SHA1 //////////////////////////// // For computing SHA-1 checksums class SHA1 { public: void put(int c) { // hash 1 byte U32& r=w[len0>>5&15]; r=(r<<8)|(c&255); if (!(len0+=8)) ++len1; if ((len0&511)==0) process(); } double size() const {return len0/8+len1*536870912.0;} // size in bytes uint64_t usize() const {return len0/8+(U64(len1)<<29);} // size in bytes const char* result(); // get hash and reset SHA1() {init();} private: void init(); // reset, but don't clear hbuf U32 len0, len1; // length in bits (low, high) U32 h[5]; // hash state U32 w[80]; // input buffer char hbuf[20]; // result void process(); // hash 1 block }; //////////////////////////// ZPAQL /////////////////////////// // Symbolic constants, instruction size, and names typedef enum {NONE,CONS,CM,ICM,MATCH,AVG,MIX2,MIX,ISSE,SSE} CompType; extern const int compsize[256]; // A ZPAQL machine COMP+HCOMP or PCOMP. class ZPAQL { public: ZPAQL(); ~ZPAQL(); void clear(); // Free memory, erase program, reset machine state void inith(); // Initialize as HCOMP to run void initp(); // Initialize as PCOMP to run double memory(); // Return memory requirement in bytes void run(U32 input); // Execute with input int read(Reader* in2); // Read header bool write(Writer* out2, bool pp); // If pp write PCOMP else HCOMP header int step(U32 input, int mode); // Trace execution (defined externally) Writer* output; // Destination for OUT instruction, or 0 to suppress SHA1* sha1; // Points to checksum computer U32 H(int i) {return h(i);} // get element of h void flush(); // write outbuf[0..bufptr-1] to output and sha1 void outc(int c) { // output byte c (0..255) or -1 at EOS if (c<0 || (outbuf[bufptr]=c, ++bufptr==outbuf.isize())) flush(); } // ZPAQ1 block header Array header; // hsize[2] hh hm ph pm n COMP (guard) HCOMP (guard) int cend; // COMP in header[7...cend-1] int hbegin, hend; // HCOMP/PCOMP in header[hbegin...hend-1] private: // Machine state for executing HCOMP Array m; // memory array M for HCOMP Array h; // hash array H for HCOMP Array r; // 256 element register array Array outbuf; // output buffer int bufptr; // number of bytes in outbuf U32 a, b, c, d; // machine registers int f; // condition flag int pc; // program counter int rcode_size; // length of rcode U8* rcode; // JIT code for run() // Support code int assemble(); // put JIT code in rcode void init(int hbits, int mbits); // initialize H and M sizes int execute(); // execute 1 instruction, return 0 after HALT, else 1 void run0(U32 input); // default run() when select==0 void div(U32 x) {if (x) a/=x; else a=0;} void mod(U32 x) {if (x) a%=x; else a=0;} void swap(U32& x) {a^=x; x^=a; a^=x;} void swap(U8& x) {a^=x; x^=a; a^=x;} void err(); // exit with run time error }; ///////////////////////// Component ////////////////////////// // A Component is a context model, indirect context model, match model, // fixed weight mixer, adaptive 2 input mixer without or with current // partial byte as context, adaptive m input mixer (without or with), // or SSE (without or with). struct Component { size_t limit; // max count for cm size_t cxt; // saved context size_t a, b, c; // multi-purpose variables Array cm; // cm[cxt] -> p in bits 31..10, n in 9..0; MATCH index Array ht; // ICM/ISSE hash table[0..size1][0..15] and MATCH buf Array a16; // MIX weights void init(); // initialize to all 0 Component() {init();} }; ////////////////////////// StateTable //////////////////////// // Next state table generator class StateTable { enum {N=64}; // sizes of b, t int num_states(int n0, int n1); // compute t[n0][n1][1] void discount(int& n0); // set new value of n0 after 1 or n1 after 0 void next_state(int& n0, int& n1, int y); // new (n0,n1) after bit y public: U8 ns[1024]; // state*4 -> next state if 0, if 1, n0, n1 int next(int state, int y) { // next state for bit y assert(state>=0 && state<256); assert(y>=0 && y<4); return ns[state*4+y]; } int cminit(int state) { // initial probability of 1 * 2^23 assert(state>=0 && state<256); return ((ns[state*4+3]*2+1)<<22)/(ns[state*4+2]+ns[state*4+3]+1); } StateTable(); }; ///////////////////////// Predictor ////////////////////////// // A predictor guesses the next bit class Predictor { public: Predictor(ZPAQL&); ~Predictor(); void init(); // build model int predict(); // probability that next bit is a 1 (0..4095) void update(int y); // train on bit y (0..1) int stat(int); // Defined externally bool isModeled() { // n>0 components? assert(z.header.isize()>6); return z.header[6]!=0; } private: // Predictor state int c8; // last 0...7 bits. int hmap4; // c8 split into nibbles int p[256]; // predictions U32 h[256]; // unrolled copy of z.h ZPAQL& z; // VM to compute context hashes, includes H, n Component comp[256]; // the model, includes P // Modeling support functions int predict0(); // default void update0(int y); // default int dt2k[256]; // division table for match: dt2k[i] = 2^12/i int dt[1024]; // division table for cm: dt[i] = 2^16/(i+1.5) U16 squasht[4096]; // squash() lookup table short stretcht[32768];// stretch() lookup table StateTable st; // next, cminit functions U8* pcode; // JIT code for predict() and update() int pcode_size; // length of pcode // reduce prediction error in cr.cm void train(Component& cr, int y) { assert(y==0 || y==1); U32& pn=cr.cm(cr.cxt); U32 count=pn&0x3ff; int error=y*32767-(cr.cm(cr.cxt)>>17); pn+=(error*dt[count]&-1024)+(count floor(32768/(1+exp(-x/64))) int squash(int x) { assert(x>=-2048 && x<=2047); return squasht[x+2048]; } // x -> round(64*log((x+0.5)/(32767.5-x))), approx inverse of squash int stretch(int x) { assert(x>=0 && x<=32767); return stretcht[x]; } // bound x to a 12 bit signed int int clamp2k(int x) { if (x<-2048) return -2048; else if (x>2047) return 2047; else return x; } // bound x to a 20 bit signed int int clamp512k(int x) { if (x<-(1<<19)) return -(1<<19); else if (x>=(1<<19)) return (1<<19)-1; else return x; } // Get cxt in ht, creating a new row if needed size_t find(Array& ht, int sizebits, U32 cxt); // Put JIT code in pcode int assemble_p(); }; //////////////////////////// Decoder ///////////////////////// // Decoder decompresses using an arithmetic code class Decoder { public: Reader* in; // destination Decoder(ZPAQL& z); int decompress(); // return a byte or EOF int skip(); // skip to the end of the segment, return next byte void init(); // initialize at start of block int stat(int x) {return pr.stat(x);} private: U32 low, high; // range U32 curr; // last 4 bytes of archive Predictor pr; // to get p enum {BUFSIZE=1<<16}; Array buf; // input buffer of size BUFSIZE bytes // of unmodeled data. buf[low..high-1] is input with curr // remaining in sub-block. int decode(int p); // return decoded bit (0..1) with prob. p (0..65535) void loadbuf(); // read unmodeled data into buf to EOS }; /////////////////////////// PostProcessor //////////////////// class PostProcessor { int state; // input parse state: 0=INIT, 1=PASS, 2..4=loading, 5=POST int hsize; // header size int ph, pm; // sizes of H and M in z public: ZPAQL z; // holds PCOMP PostProcessor(): state(0), hsize(0), ph(0), pm(0) {} void init(int h, int m); // ph, pm sizes of H and M int write(int c); // Input a byte, return state int getState() const {return state;} void setOutput(Writer* out) {z.output=out;} void setSHA1(SHA1* sha1ptr) {z.sha1=sha1ptr;} }; //////////////////////// Decompresser //////////////////////// // For decompression and listing archive contents class Decompresser { public: Decompresser(): z(), dec(z), pp(), state(BLOCK), decode_state(FIRSTSEG) {} void setInput(Reader* in) {dec.in=in;} bool findBlock(double* memptr = 0); void hcomp(Writer* out2) {z.write(out2, false);} bool findFilename(Writer* = 0); void readComment(Writer* = 0); void setOutput(Writer* out) {pp.setOutput(out);} void setSHA1(SHA1* sha1ptr) {pp.setSHA1(sha1ptr);} bool decompress(int n = -1); // n bytes, -1=all, return true until done bool pcomp(Writer* out2) {return pp.z.write(out2, true);} void readSegmentEnd(char* sha1string = 0); int stat(int x) {return dec.stat(x);} private: ZPAQL z; Decoder dec; PostProcessor pp; enum {BLOCK, FILENAME, COMMENT, DATA, SEGEND} state; // expected next enum {FIRSTSEG, SEG, SKIP} decode_state; // which segment in block? }; /////////////////////////// decompress() ///////////////////// void decompress(Reader* in, Writer* out); ////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////// // Code following this point is not a part of the ZPAQ level 2 standard. //////////////////////////// Encoder ///////////////////////// // Encoder compresses using an arithmetic code class Encoder { public: Encoder(ZPAQL& z): out(0), low(1), high(0xFFFFFFFF), pr(z) {} void init(); void compress(int c); // c is 0..255 or EOF int stat(int x) {return pr.stat(x);} Writer* out; // destination private: U32 low, high; // range Predictor pr; // to get p Array buf; // unmodeled input void encode(int y, int p); // encode bit y (0..1) with prob. p (0..65535) }; //////////////////////// Compressor ////////////////////////// class Compressor { public: Compressor(): enc(z), in(0), state(INIT) {} void setOutput(Writer* out) {enc.out=out;} void writeTag(); void startBlock(int level); // level=1,2,3 void startBlock(const char* hcomp); void startSegment(const char* filename = 0, const char* comment = 0); void setInput(Reader* i) {in=i;} void postProcess(const char* pcomp = 0, int len = 0); bool compress(int n = -1); // n bytes, -1=all, return true until done void endSegment(const char* sha1string = 0); void endBlock(); int stat(int x) {return enc.stat(x);} private: ZPAQL z; Encoder enc; Reader* in; enum {INIT, BLOCK1, SEG1, BLOCK2, SEG2} state; }; /////////////////////////// compress() /////////////////////// void compress(Reader* in, Writer* out, int level); } // namespace libzpaq /////////////////////////// lrzip functions ////////////////// #include #ifndef uchar #define uchar unsigned char #endif #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) #define __maybe_unused __attribute__((unused)) typedef int64_t i64; struct bufRead: public libzpaq::Reader { uchar *s_buf; i64 *s_len; i64 total_len; int *last_pct; bool progress; long thread; FILE *msgout; bufRead(uchar *buf_, i64 *n_, i64 total_len_, int *last_pct_, bool progress_, long thread_, FILE *msgout_): s_buf(buf_), s_len(n_), total_len(total_len_), last_pct(last_pct_), progress(progress_), thread(thread_), msgout(msgout_) {} int get() { if (progress && !(*s_len % 128)) { int pct = (total_len > 0) ? (total_len - *s_len) * 100 / total_len : 100; if (pct / 10 != *last_pct / 10) { int i; fprintf(msgout, "\r\t\t\tZPAQ\t"); for (i = 0; i < thread; i++) fprintf(msgout, "\t"); fprintf(msgout, "%ld:%i%% \r", thread + 1, pct); fflush(msgout); *last_pct = pct; } } if (likely(*s_len > 0)) { (*s_len)--; return ((int)(uchar)*s_buf++); } return -1; } // read and return byte 0..255, or -1 at EOF int read(char *buf, int n) { if (unlikely(n > *s_len)) n = *s_len; if (likely(n > 0)) { *s_len -= n; memcpy(buf, s_buf, n); } return n; } }; struct bufWrite: public libzpaq::Writer { uchar *c_buf; i64 *c_len; bufWrite(uchar *buf_, i64 *n_): c_buf(buf_), c_len(n_) {} void put(int c) { c_buf[(*c_len)++] = (uchar)c; } void write(const char *buf, int n) { memcpy(c_buf + *c_len, buf, n); *c_len += n; } }; extern "C" void zpaq_compress(uchar *c_buf, i64 *c_len, uchar *s_buf, i64 s_len, int level, FILE *msgout, bool progress, long thread) { i64 total_len = s_len; int last_pct = 100; bufRead bufR(s_buf, &s_len, total_len, &last_pct, progress, thread, msgout); bufWrite bufW(c_buf, c_len); compress (&bufR, &bufW, level); } extern "C" void zpaq_decompress(uchar *s_buf, i64 *d_len, uchar *c_buf, i64 c_len, FILE *msgout, bool progress, long thread) { i64 total_len = c_len; int last_pct = 100; bufRead bufR(c_buf, &c_len, total_len, &last_pct, progress, thread, msgout); bufWrite bufW(s_buf, d_len); decompress(&bufR, &bufW); } #endif // LIBZPAQ_H lrzip-0.651/lrzip.c000066400000000000000000001261321421175057200142020ustar00rootroot00000000000000/* Copyright (C) 2006-2016,2018,2021-2022 Con Kolivas Copyright (C) 2011 Peter Hyman Copyright (C) 1998-2003 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #ifdef HAVE_SYS_STAT_H # include #endif #include #include #ifdef HAVE_UNISTD_H # include #endif #include #ifdef HAVE_ERRNO_H #include #endif #include #include #include #ifdef HAVE_ENDIAN_H # include #elif HAVE_SYS_ENDIAN_H # include #endif #ifdef HAVE_ARPA_INET_H # include #endif #include #include #include #include "md5.h" #include "rzip.h" #include "runzip.h" #include "util.h" #include "stream.h" #define MAGIC_LEN (24) static void release_hashes(rzip_control *control); static i64 fdout_seekto(rzip_control *control, i64 pos) { if (TMP_OUTBUF) { pos -= control->out_relofs; control->out_ofs = pos; if (unlikely(pos > control->out_len || pos < 0)) { print_err("Trying to seek to %lld outside tmp outbuf in fdout_seekto\n", pos); return -1; } return 0; } return lseek(control->fd_out, pos, SEEK_SET); } #ifdef __APPLE__ # include i64 get_ram(rzip_control *control) { int mib[2]; size_t len; i64 *p, ramsize; mib[0] = CTL_HW; mib[1] = HW_MEMSIZE; sysctl(mib, 2, NULL, &len, NULL, 0); p = malloc(len); sysctl(mib, 2, p, &len, NULL, 0); ramsize = *p; return ramsize; } #else /* __APPLE__ */ i64 get_ram(rzip_control *control) { i64 ramsize; FILE *meminfo; char aux[256]; ramsize = (i64)sysconf(_SC_PHYS_PAGES) * PAGE_SIZE; if (ramsize > 0) return ramsize; /* Workaround for uclibc which doesn't properly support sysconf */ if(!(meminfo = fopen("/proc/meminfo", "r"))) fatal_return(("fopen\n"), -1); while(!feof(meminfo) && !fscanf(meminfo, "MemTotal: %"PRId64" kB", &ramsize)) { if (unlikely(fgets(aux, sizeof(aux), meminfo) == NULL)) { fclose(meminfo); fatal_return(("Failed to fgets in get_ram\n"), -1); } } if (fclose(meminfo) == -1) fatal_return(("fclose"), -1); ramsize *= 1000; return ramsize; } #endif i64 nloops(i64 seconds, uchar *b1, uchar *b2) { i64 nloops; int nbits; nloops = ARBITRARY_AT_EPOCH * pow(MOORE_TIMES_PER_SECOND, seconds); if (nloops < ARBITRARY) nloops = ARBITRARY; for (nbits = 0; nloops > 255; nbits ++) nloops = nloops >> 1; *b1 = nbits; *b2 = nloops; return nloops << nbits; } bool write_magic(rzip_control *control) { char magic[MAGIC_LEN] = { 'L', 'R', 'Z', 'I', LRZIP_MAJOR_VERSION, LRZIP_MINOR_VERSION }; /* File size is stored as zero for streaming STDOUT blocks when the * file size is unknown. In encrypted files, the size is left unknown * and instead the salt is stored here to preserve space. */ if (ENCRYPT) memcpy(&magic[6], &control->salt, 8); else if (!STDIN || !STDOUT || control->eof) { i64 esize = htole64(control->st_size); memcpy(&magic[6], &esize, 8); } /* save LZMA compression flags */ if (LZMA_COMPRESS) { int i; for (i = 0; i < 5; i++) magic[i + 16] = (char)control->lzma_properties[i]; } /* This is a flag that the archive contains an md5 sum at the end * which can be used as an integrity check instead of crc check. * crc is still stored for compatibility with 0.5 versions. */ if (!NO_MD5) magic[21] = 1; if (ENCRYPT) magic[22] = 1; if (unlikely(fdout_seekto(control, 0))) fatal_return(("Failed to seek to BOF to write Magic Header\n"), false); if (unlikely(put_fdout(control, magic, MAGIC_LEN) != MAGIC_LEN)) fatal_return(("Failed to write magic header\n"), false); control->magic_written = 1; return true; } static inline i64 enc_loops(uchar b1, uchar b2) { return (i64)b2 << (i64)b1; } static bool get_magic(rzip_control *control, char *magic) { int encrypted, md5, i; i64 expected_size; uint32_t v; if (unlikely(strncmp(magic, "LRZI", 4))) failure_return(("Not an lrzip file\n"), false); memcpy(&control->major_version, &magic[4], 1); memcpy(&control->minor_version, &magic[5], 1); print_verbose("Detected lrzip version %d.%d file.\n", control->major_version, control->minor_version); if (control->major_version > LRZIP_MAJOR_VERSION || (control->major_version == LRZIP_MAJOR_VERSION && control->minor_version > LRZIP_MINOR_VERSION)) print_output("Attempting to work with file produced by newer lrzip version %d.%d file.\n", control->major_version, control->minor_version); /* Support the convoluted way we described size in versions < 0.40 */ if (control->major_version == 0 && control->minor_version < 4) { memcpy(&v, &magic[6], 4); expected_size = ntohl(v); memcpy(&v, &magic[10], 4); expected_size |= ((i64)ntohl(v)) << 32; } else { memcpy(&expected_size, &magic[6], 8); expected_size = le64toh(expected_size); } control->st_size = expected_size; if (control->major_version == 0 && control->minor_version < 6) control->eof = 1; /* restore LZMA compression flags only if stored */ if ((int) magic[16]) { for (i = 0; i < 5; i++) control->lzma_properties[i] = magic[i + 16]; /* Cludge to allow us to read possibly corrupted archives */ if (!control->lzma_properties[0]) control->lzma_properties[0] = 93; } /* Whether this archive contains md5 data at the end or not */ md5 = magic[21]; if (md5) { if (md5 == 1) control->flags |= FLAG_MD5; else print_verbose("Unknown hash, falling back to CRC\n"); } encrypted = magic[22]; if (encrypted) { if (encrypted == 1) control->flags |= FLAG_ENCRYPT; else failure_return(("Unknown encryption\n"), false); /* In encrypted files, the size field is used to store the salt * instead and the size is unknown, just like a STDOUT chunked * file */ memcpy(&control->salt, &magic[6], 8); control->st_size = expected_size = 0; control->encloops = enc_loops(control->salt[0], control->salt[1]); print_maxverbose("Encryption hash loops %lld\n", control->encloops); } else if (ENCRYPT) { print_output("Asked to decrypt a non-encrypted archive. Bypassing decryption.\n"); control->flags &= ~FLAG_ENCRYPT; } return true; } bool read_magic(rzip_control *control, int fd_in, i64 *expected_size) { char magic[MAGIC_LEN]; memset(magic, 0, sizeof(magic)); /* Initially read only st_size; return true; } /* preserve ownership and permissions where possible */ static bool preserve_perms(rzip_control *control, int fd_in, int fd_out) { struct stat st; if (unlikely(fstat(fd_in, &st))) fatal_return(("Failed to fstat input file\n"), false); if (unlikely(fchmod(fd_out, (st.st_mode & 0666)))) print_verbose("Warning, unable to set permissions on %s\n", control->outfile); /* chown fail is not fatal_return(( */ if (unlikely(fchown(fd_out, st.st_uid, st.st_gid))) print_verbose("Warning, unable to set owner on %s\n", control->outfile); return true; } static bool preserve_times(rzip_control *control, int fd_in) { struct utimbuf times; struct stat st; if (unlikely(fstat(fd_in, &st))) fatal_return(("Failed to fstat input file\n"), false); times.actime = 0; times.modtime = st.st_mtime; if (unlikely(utime(control->outfile, ×))) print_verbose("Warning, unable to set time on %s\n", control->outfile); return true; } /* Open a temporary outputfile to emulate stdout */ int open_tmpoutfile(rzip_control *control) { int fd_out; if (STDOUT && !TEST_ONLY) print_verbose("Outputting to stdout.\n"); if (control->tmpdir) { control->outfile = realloc(NULL, strlen(control->tmpdir) + 16); if (unlikely(!control->outfile)) fatal_return(("Failed to allocate outfile name\n"), -1); strcpy(control->outfile, control->tmpdir); strcat(control->outfile, "lrzipout.XXXXXX"); } fd_out = mkstemp(control->outfile); if (fd_out == -1) { print_output("WARNING: Failed to create out tmpfile: %s, will fail if cannot perform %scompression entirely in ram\n", control->outfile, DECOMPRESS ? "de" : ""); } else register_outfile(control, control->outfile, TEST_ONLY || STDOUT || !KEEP_BROKEN); print_maxverbose("Created temporary outfile %s\n", control->outfile); return fd_out; } static bool fwrite_stdout(rzip_control *control, void *buf, i64 len) { uchar *offset_buf = buf; ssize_t ret; i64 total; total = 0; while (len > 0) { ssize_t wrote; if (BITS32) ret = MIN(len, one_g); else ret = len; wrote = fwrite(offset_buf, 1, ret, control->outFILE); if (unlikely(wrote != ret)) fatal_return(("Failed to fwrite in fwrite_stdout\n"), false); len -= ret; offset_buf += ret; total += ret; } fflush(control->outFILE); return true; } bool write_fdout(rzip_control *control, void *buf, i64 len) { uchar *offset_buf = buf; ssize_t ret; while (len > 0) { if (BITS32) ret = MIN(len, one_g); else ret = len; ret = write(control->fd_out, offset_buf, (size_t)ret); if (unlikely(ret <= 0)) fatal_return(("Failed to write to fd_out in write_fdout\n"), false); len -= ret; offset_buf += ret; } return true; } static bool flush_tmpoutbuf(rzip_control *control) { if (!TEST_ONLY) { print_maxverbose("Dumping buffer to physical file.\n"); if (STDOUT) { if (unlikely(!fwrite_stdout(control, control->tmp_outbuf, control->out_len))) return false; } else { if (unlikely(!write_fdout(control, control->tmp_outbuf, control->out_len))) return false; } } control->out_relofs += control->out_len; control->out_ofs = control->out_len = 0; return true; } /* Dump temporary outputfile to perform stdout */ static bool dump_tmpoutfile(rzip_control *control) { int tmpchar, fd_out = control->fd_out; FILE *tmpoutfp; if (unlikely(fd_out == -1)) fatal_return(("Failed: No temporary outfile created, unable to do in ram\n"), false); /* flush anything not yet in the temporary file */ fsync(fd_out); tmpoutfp = fdopen(fd_out, "r"); if (unlikely(tmpoutfp == NULL)) fatal_return(("Failed to fdopen out tmpfile\n"), false); rewind(tmpoutfp); if (!TEST_ONLY) { print_verbose("Dumping temporary file to control->outFILE.\n"); while ((tmpchar = fgetc(tmpoutfp)) != EOF) putchar(tmpchar); fflush(control->outFILE); rewind(tmpoutfp); } if (unlikely(ftruncate(fd_out, 0))) fatal_return(("Failed to ftruncate fd_out in dump_tmpoutfile\n"), false); return true; } bool flush_tmpout(rzip_control *control) { if (!STDOUT) return true; if (TMP_OUTBUF) return flush_tmpoutbuf(control); return dump_tmpoutfile(control); } /* Used if we're unable to read STDIN into the temporary buffer, shunts data * to temporary file */ bool write_fdin(rzip_control *control) { uchar *offset_buf = control->tmp_inbuf; i64 len = control->in_len; ssize_t ret; while (len > 0) { if (BITS32) ret = MIN(len, one_g); else ret = len; ret = write(control->fd_in, offset_buf, (size_t)ret); if (unlikely(ret <= 0)) fatal_return(("Failed to write to fd_in in write_fdin\n"), false); len -= ret; offset_buf += ret; } return true; } /* Open a temporary inputfile to perform stdin decompression */ int open_tmpinfile(rzip_control *control) { int fd_in = -1; /* Use temporary directory if there is one */ if (control->tmpdir) { control->infile = malloc(strlen(control->tmpdir) + 15); if (unlikely(!control->infile)) fatal_return(("Failed to allocate infile name\n"), -1); strcpy(control->infile, control->tmpdir); strcat(control->infile, "lrzipin.XXXXXX"); fd_in = mkstemp(control->infile); } /* Try the current directory */ if (fd_in == -1) { dealloc(control->infile); control->infile = malloc(16); if (unlikely(!control->infile)) fatal_return(("Failed to allocate infile name\n"), -1); strcpy(control->infile, "lrzipin.XXXXXX"); fd_in = mkstemp(control->infile); } /* Use /tmp if nothing is writeable so far */ if (fd_in == -1) { dealloc(control->infile); control->infile = malloc(20); if (unlikely(!control->infile)) fatal_return(("Failed to allocate infile name\n"), -1); strcpy(control->infile, "/tmp/lrzipin.XXXXXX"); fd_in = mkstemp(control->infile); } if (fd_in == -1) { print_output("WARNING: Failed to create in tmpfile: %s, will fail if cannot perform %scompression entirely in ram\n", control->infile, DECOMPRESS ? "de" : ""); } else { register_infile(control, control->infile, (DECOMPRESS || TEST_ONLY) && STDIN); /* Unlink temporary file immediately to minimise chance of files left * lying around in cases of failure_return((. */ if (unlikely(unlink(control->infile))) { fatal("Failed to unlink tmpfile: %s\n", control->infile); close(fd_in); return -1; } } return fd_in; } static bool read_tmpinmagic(rzip_control *control) { char magic[MAGIC_LEN]; int i, tmpchar; memset(magic, 0, sizeof(magic)); for (i = 0; i < 24; i++) { tmpchar = getchar(); if (unlikely(tmpchar == EOF)) failure_return(("Reached end of file on STDIN prematurely on v05 magic read\n"), false); magic[i] = (char)tmpchar; } return get_magic(control, magic); } /* Read data from stdin into temporary inputfile */ bool read_tmpinfile(rzip_control *control, int fd_in) { FILE *tmpinfp; int tmpchar; if (fd_in == -1) return false; if (control->flags & FLAG_SHOW_PROGRESS) fprintf(control->msgout, "Copying from stdin.\n"); tmpinfp = fdopen(fd_in, "w+"); if (unlikely(tmpinfp == NULL)) fatal_return(("Failed to fdopen in tmpfile\n"), false); while ((tmpchar = getchar()) != EOF) fputc(tmpchar, tmpinfp); fflush(tmpinfp); rewind(tmpinfp); return true; } /* To perform STDOUT, we allocate a proportion of ram that is then used as * a pseudo-temporary file */ static bool open_tmpoutbuf(rzip_control *control) { i64 maxlen = control->maxram; void *buf; while (42) { round_to_page(&maxlen); buf = malloc(maxlen); if (buf) { print_maxverbose("Malloced %"PRId64" for tmp_outbuf\n", maxlen); break; } maxlen = maxlen / 3 * 2; if (maxlen < 100000000) fatal_return(("Unable to even malloc 100MB for tmp_outbuf\n"), false); } control->flags |= FLAG_TMP_OUTBUF; /* Allocate slightly more so we can cope when the buffer overflows and * fall back to a real temporary file */ control->out_maxlen = maxlen - control->page_size; control->tmp_outbuf = buf; if (!DECOMPRESS && !TEST_ONLY) control->out_ofs = control->out_len = MAGIC_LEN;\ return true; } /* We've decided to use a temporary output file instead of trying to store * all the output buffer in ram so we can free up the ram and increase the * maximum sizes of ram we can allocate */ void close_tmpoutbuf(rzip_control *control) { control->flags &= ~FLAG_TMP_OUTBUF; dealloc(control->tmp_outbuf); if (!BITS32) control->usable_ram = control->maxram += control->ramsize / 18; } static bool open_tmpinbuf(rzip_control *control) { control->flags |= FLAG_TMP_INBUF; control->in_maxlen = control->maxram; control->tmp_inbuf = malloc(control->maxram + control->page_size); if (unlikely(!control->tmp_inbuf)) fatal_return(("Failed to malloc tmp_inbuf in open_tmpinbuf\n"), false); return true; } void clear_tmpinbuf(rzip_control *control) { control->in_len = control->in_ofs = 0; } bool clear_tmpinfile(rzip_control *control) { if (unlikely(lseek(control->fd_in, 0, SEEK_SET))) fatal_return(("Failed to lseek on fd_in in clear_tmpinfile\n"), false); if (unlikely(ftruncate(control->fd_in, 0))) fatal_return(("Failed to truncate fd_in in clear_tmpinfile\n"), false); return true; } /* As per temporary output file but for input file */ void close_tmpinbuf(rzip_control *control) { control->flags &= ~FLAG_TMP_INBUF; dealloc(control->tmp_inbuf); if (!BITS32) control->usable_ram = control->maxram += control->ramsize / 18; } static int get_pass(rzip_control *control, char *s) { int len; memset(s, 0, PASS_LEN - SALT_LEN); if (control->passphrase) strncpy(s, control->passphrase, PASS_LEN - SALT_LEN - 1); else if (unlikely(fgets(s, PASS_LEN - SALT_LEN, stdin) == NULL)) failure_return(("Failed to retrieve passphrase\n"), -1); len = strlen(s); if (len > 0 && ('\r' == s[len - 1] || '\n' == s[len - 1])) s[len - 1] = '\0'; if (len > 1 && ('\r' == s[len - 2] || '\n' == s[len - 2])) s[len - 2] = '\0'; len = strlen(s); if (unlikely(0 == len)) failure_return(("Empty passphrase\n"), -1); return len; } static bool get_hash(rzip_control *control, int make_hash) { char *passphrase, *testphrase; struct termios termios_p; int prompt = control->passphrase == NULL; passphrase = calloc(PASS_LEN, 1); testphrase = calloc(PASS_LEN, 1); control->salt_pass = calloc(PASS_LEN, 1); control->hash = calloc(HASH_LEN, 1); if (unlikely(!passphrase || !testphrase || !control->salt_pass || !control->hash)) { fatal("Failed to calloc encrypt buffers in compress_file\n"); dealloc(testphrase); dealloc(passphrase); return false; } mlock(passphrase, PASS_LEN); mlock(testphrase, PASS_LEN); mlock(control->salt_pass, PASS_LEN); mlock(control->hash, HASH_LEN); if (control->pass_cb) { control->pass_cb(control->pass_data, passphrase, PASS_LEN - SALT_LEN); if (!passphrase[0]) { fatal("Supplied password was null!"); munlock(passphrase, PASS_LEN); munlock(testphrase, PASS_LEN); dealloc(testphrase); dealloc(passphrase); release_hashes(control); return false; } control->salt_pass_len = strlen(passphrase) + SALT_LEN; } else { /* Disable stdin echo to screen */ tcgetattr(fileno(stdin), &termios_p); termios_p.c_lflag &= ~ECHO; tcsetattr(fileno(stdin), 0, &termios_p); retry_pass: if (prompt) print_output("Enter passphrase: "); control->salt_pass_len = get_pass(control, passphrase) + SALT_LEN; if (prompt) print_output("\n"); if (make_hash) { if (prompt) print_output("Re-enter passphrase: "); get_pass(control, testphrase); if (prompt) print_output("\n"); if (strcmp(passphrase, testphrase)) { print_output("Passwords do not match. Try again.\n"); goto retry_pass; } } termios_p.c_lflag |= ECHO; tcsetattr(fileno(stdin), 0, &termios_p); memset(testphrase, 0, PASS_LEN); } memcpy(control->salt_pass, control->salt, SALT_LEN); memcpy(control->salt_pass + SALT_LEN, passphrase, PASS_LEN - SALT_LEN); lrz_stretch(control); memset(passphrase, 0, PASS_LEN); munlock(passphrase, PASS_LEN); munlock(testphrase, PASS_LEN); dealloc(testphrase); dealloc(passphrase); return true; } static void release_hashes(rzip_control *control) { memset(control->salt_pass, 0, PASS_LEN); memset(control->hash, 0, SALT_LEN); munlock(control->salt_pass, PASS_LEN); munlock(control->hash, HASH_LEN); dealloc(control->salt_pass); dealloc(control->hash); } static void clear_rulist(rzip_control *control) { while (control->ruhead) { struct runzip_node *node = control->ruhead; struct stream_info *sinfo = node->sinfo; dealloc(sinfo->ucthreads); dealloc(node->pthreads); dealloc(sinfo->s); dealloc(sinfo); control->ruhead = node->prev; dealloc(node); } } /* decompress one file from the command line */ bool decompress_file(rzip_control *control) { char *tmp, *tmpoutfile, *infilecopy = NULL; int fd_in, fd_out = -1, fd_hist = -1; i64 expected_size = 0, free_space; struct statvfs fbuf; if (!STDIN && !IS_FROM_FILE) { struct stat fdin_stat; stat(control->infile, &fdin_stat); if (!S_ISREG(fdin_stat.st_mode) && (tmp = strrchr(control->infile, '.')) && strcmp(tmp,control->suffix)) { /* make sure infile has an extension. If not, add it * because manipulations may be made to input filename, set local ptr */ infilecopy = alloca(strlen(control->infile) + strlen(control->suffix) + 1); strcpy(infilecopy, control->infile); strcat(infilecopy, control->suffix); } else infilecopy = strdupa(control->infile); /* regardless, infilecopy has the input filename */ } if (!STDOUT && !TEST_ONLY) { /* if output name already set, use it */ if (control->outname) { control->outfile = strdup(control->outname); } else { /* default output name from infilecopy * test if outdir specified. If so, strip path from filename of * infilecopy, then remove suffix. */ if (control->outdir && (tmp = strrchr(infilecopy, '/'))) tmpoutfile = strdupa(tmp + 1); else tmpoutfile = strdupa(infilecopy); /* remove suffix to make outfile name */ if ((tmp = strrchr(tmpoutfile, '.')) && !strcmp(tmp, control->suffix)) *tmp='\0'; control->outfile = malloc((control->outdir == NULL? 0: strlen(control->outdir)) + strlen(tmpoutfile) + 1); if (unlikely(!control->outfile)) fatal_return(("Failed to allocate outfile name\n"), false); if (control->outdir) { /* prepend control->outdir */ strcpy(control->outfile, control->outdir); strcat(control->outfile, tmpoutfile); } else strcpy(control->outfile, tmpoutfile); } if (!STDOUT) print_output("Output filename is: %s\n", control->outfile); } if ( IS_FROM_FILE ) { fd_in = fileno(control->inFILE); } else if (STDIN) { fd_in = open_tmpinfile(control); read_tmpinmagic(control); if (ENCRYPT) failure_return(("Cannot decompress encrypted file from STDIN\n"), false); expected_size = control->st_size; if (unlikely(!open_tmpinbuf(control))) return false; } else { fd_in = open(infilecopy, O_RDONLY); if (unlikely(fd_in == -1)) { fatal_return(("Failed to open %s\n", infilecopy), false); } } control->fd_in = fd_in; if (!(TEST_ONLY | STDOUT)) { fd_out = open(control->outfile, O_WRONLY | O_CREAT | O_EXCL, 0666); if (FORCE_REPLACE && (-1 == fd_out) && (EEXIST == errno)) { if (unlikely(unlink(control->outfile))) fatal_return(("Failed to unlink an existing file: %s\n", control->outfile), false); fd_out = open(control->outfile, O_WRONLY | O_CREAT | O_EXCL, 0666); } if (unlikely(fd_out == -1)) { /* We must ensure we don't delete a file that already * exists just because we tried to create a new one */ control->flags |= FLAG_KEEP_BROKEN; fatal_return(("Failed to create %s\n", control->outfile), false); } fd_hist = open(control->outfile, O_RDONLY); if (unlikely(fd_hist == -1)) fatal_return(("Failed to open history file %s\n", control->outfile), false); /* Can't copy permissions from STDIN */ if (!STDIN) if (unlikely(!preserve_perms(control, fd_in, fd_out))) return false; } else { fd_out = open_tmpoutfile(control); if (fd_out == -1) { fd_hist = -1; } else { fd_hist = open(control->outfile, O_RDONLY); if (unlikely(fd_hist == -1)) fatal_return(("Failed to open history file %s\n", control->outfile), false); /* Unlink temporary file as soon as possible */ if (unlikely(unlink(control->outfile))) fatal_return(("Failed to unlink tmpfile: %s\n", control->outfile), false); } } if (STDOUT) { if (unlikely(!open_tmpoutbuf(control))) return false; } if (!STDIN) { if (unlikely(!read_magic(control, fd_in, &expected_size))) return false; if (unlikely(expected_size < 0)) fatal_return(("Invalid expected size %lld\n", expected_size), false); } if (!STDOUT && !TEST_ONLY) { /* Check if there's enough free space on the device chosen to fit the * decompressed file. */ if (unlikely(fstatvfs(fd_out, &fbuf))) fatal_return(("Failed to fstatvfs in decompress_file\n"), false); free_space = (i64)fbuf.f_bsize * (i64)fbuf.f_bavail; if (free_space < expected_size) { if (FORCE_REPLACE) print_err("Warning, inadequate free space detected, but attempting to decompress due to -f option being used.\n"); else failure_return(("Inadequate free space to decompress file, use -f to override.\n"), false); } } control->fd_out = fd_out; control->fd_hist = fd_hist; if (NO_MD5) print_verbose("Not performing MD5 hash check\n"); if (HAS_MD5) print_verbose("MD5 "); else print_verbose("CRC32 "); print_verbose("being used for integrity testing.\n"); if (ENCRYPT) if (unlikely(!get_hash(control, 0))) return false; print_output("Decompressing...\n"); if (unlikely(runzip_fd(control, fd_in, fd_hist, expected_size) < 0)) { clear_rulist(control); return false; } /* We can now safely delete sinfo and pthread data of all threads * created. */ clear_rulist(control); /* if we get here, no fatal_return(( errors during decompression */ print_progress("\r"); if (!(STDOUT | TEST_ONLY)) print_output("Output filename is: %s: ", control->outfile); if (!expected_size) expected_size = control->st_size; if (!ENCRYPT) print_output("[OK] - %lld bytes \n", expected_size); else print_output("[OK] \n"); if (TMP_OUTBUF) close_tmpoutbuf(control); if (fd_out > 0) { if (unlikely(close(fd_hist) || close(fd_out))) fatal_return(("Failed to close files\n"), false); } if (unlikely(!STDIN && !STDOUT && !TEST_ONLY && !preserve_times(control, fd_in))) return false; if ( ! IS_FROM_FILE ) { close(fd_in); } if (!KEEP_FILES && !STDIN) { if (unlikely(unlink(control->infile))) fatal_return(("Failed to unlink %s\n", infilecopy), false); } if (ENCRYPT) release_hashes(control); dealloc(control->outfile); return true; } bool get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len, i64 *u_len, i64 *last_head, int chunk_bytes) { if (unlikely(read(fd_in, ctype, 1) != 1)) fatal_return(("Failed to read in get_header_info\n"), false); *c_len = *u_len = *last_head = 0; if (control->major_version == 0 && control->minor_version < 4) { u32 c_len32, u_len32, last_head32; if (unlikely(read(fd_in, &c_len32, 4) != 4)) fatal_return(("Failed to read in get_header_info"), false); if (unlikely(read(fd_in, &u_len32, 4) != 4)) fatal_return(("Failed to read in get_header_info"), false); if (unlikely(read(fd_in, &last_head32, 4) != 4)) fatal_return(("Failed to read in get_header_info"), false); c_len32 = le32toh(c_len32); u_len32 = le32toh(u_len32); last_head32 = le32toh(last_head32); *c_len = c_len32; *u_len = u_len32; *last_head = last_head32; } else { int read_len; if (control->major_version == 0 && control->minor_version == 5) read_len = 8; else read_len = chunk_bytes; if (unlikely(read(fd_in, c_len, read_len) != read_len)) fatal_return(("Failed to read in get_header_info"), false); if (unlikely(read(fd_in, u_len, read_len) != read_len)) fatal_return(("Failed to read in get_header_info"), false); if (unlikely(read(fd_in, last_head, read_len) != read_len)) fatal_return(("Failed to read_i64 in get_header_info"), false); *c_len = le64toh(*c_len); *u_len = le64toh(*u_len); *last_head = le64toh(*last_head); } return true; } static double percentage(i64 num, i64 den) { double d_num, d_den; if (den < 100) { d_num = num * 100; d_den = den; if (!d_den) d_den = 1; } else { d_num = num; d_den = den / 100; } return d_num / d_den; } bool get_fileinfo(rzip_control *control) { i64 u_len, c_len, second_last, last_head, utotal = 0, ctotal = 0, ofs = 25, stream_head[2]; i64 expected_size, infile_size, chunk_size = 0, chunk_total = 0; int header_length, stream = 0, chunk = 0; char *tmp, *infilecopy = NULL; char chunk_byte = 0; long double cratio; uchar ctype = 0; uchar save_ctype = 255; struct stat st; int fd_in; if (!STDIN) { struct stat fdin_stat; stat(control->infile, &fdin_stat); if (!S_ISREG(fdin_stat.st_mode) && (tmp = strrchr(control->infile, '.')) && strcmp(tmp,control->suffix)) { infilecopy = alloca(strlen(control->infile) + strlen(control->suffix) + 1); strcpy(infilecopy, control->infile); strcat(infilecopy, control->suffix); } else infilecopy = strdupa(control->infile); } if ( IS_FROM_FILE ) fd_in = fileno(control->inFILE); else if (STDIN) fd_in = 0; else { fd_in = open(infilecopy, O_RDONLY); if (unlikely(fd_in == -1)) fatal_return(("Failed to open %s\n", infilecopy), false); } /* Get file size */ if (unlikely(fstat(fd_in, &st))) fatal_goto(("bad magic file descriptor!?\n"), error); infile_size = st.st_size; /* Get decompressed size */ if (unlikely(!read_magic(control, fd_in, &expected_size))) goto error; if (ENCRYPT) { print_output("Encrypted lrzip archive. No further information available\n"); if (!STDIN && !IS_FROM_FILE) close(fd_in); goto out; } if (control->major_version == 0 && control->minor_version > 4) { if (unlikely(read(fd_in, &chunk_byte, 1) != 1)) fatal_goto(("Failed to read chunk_byte in get_fileinfo\n"), error); if (unlikely(chunk_byte < 1 || chunk_byte > 8)) fatal_goto(("Invalid chunk bytes %d\n", chunk_byte), error); if (control->major_version == 0 && control->minor_version > 5) { if (unlikely(read(fd_in, &control->eof, 1) != 1)) fatal_goto(("Failed to read eof in get_fileinfo\n"), error); if (unlikely(read(fd_in, &chunk_size, chunk_byte) != chunk_byte)) fatal_goto(("Failed to read chunk_size in get_fileinfo\n"), error); chunk_size = le64toh(chunk_size); if (unlikely(chunk_size < 0)) fatal_goto(("Invalid chunk size %lld\n", chunk_size), error); } } if (control->major_version == 0 && control->minor_version < 4) { ofs = 24; header_length = 13; } else if (control->major_version == 0 && control->minor_version == 4) { ofs = 24; header_length = 25; } else if (control->major_version == 0 && control->minor_version == 5) { ofs = 25; header_length = 25; } else { ofs = 26 + chunk_byte; header_length = 1 + (chunk_byte * 3); } if (control->major_version == 0 && control->minor_version < 6 && !expected_size) goto done; next_chunk: stream = 0; stream_head[0] = 0; stream_head[1] = stream_head[0] + header_length; print_verbose("Rzip chunk: %d\n", ++chunk); if (chunk_byte) print_verbose("Chunk byte width: %d\n", chunk_byte); if (chunk_size) { chunk_total += chunk_size; print_verbose("Chunk size: %"PRId64"\n", chunk_size); } if (unlikely(chunk_byte && (chunk_byte > 8 || chunk_size < 0))) failure("Invalid chunk data\n"); while (stream < NUM_STREAMS) { int block = 1; second_last = 0; if (unlikely(lseek(fd_in, stream_head[stream] + ofs, SEEK_SET) == -1)) fatal_goto(("Failed to seek to header data in get_fileinfo\n"), error); if (unlikely(!get_header_info(control, fd_in, &ctype, &c_len, &u_len, &last_head, chunk_byte))) return false; print_verbose("Stream: %d\n", stream); print_maxverbose("Offset: %"PRId64"\n", stream_head[stream] + ofs); print_verbose("%s\t%s\t%s\t%16s / %14s", "Block","Comp","Percent","Comp Size", "UComp Size"); print_maxverbose("%18s : %14s", "Offset", "Head"); print_verbose("\n"); do { i64 head_off; if (unlikely(last_head && last_head < second_last)) failure_goto(("Invalid earlier last_head position, corrupt archive.\n"), error); second_last = last_head; if (unlikely(last_head + ofs > infile_size)) failure_goto(("Offset greater than archive size, likely corrupted/truncated archive.\n"), error); if (unlikely((head_off = lseek(fd_in, last_head + ofs, SEEK_SET)) == -1)) fatal_goto(("Failed to seek to header data in get_fileinfo\n"), error); if (unlikely(!get_header_info(control, fd_in, &ctype, &c_len, &u_len, &last_head, chunk_byte))) return false; if (unlikely(last_head < 0 || c_len < 0 || u_len < 0)) failure_goto(("Entry negative, likely corrupted archive.\n"), error); print_verbose("%d\t", block); if (ctype == CTYPE_NONE) print_verbose("none"); else if (ctype == CTYPE_BZIP2) print_verbose("bzip2"); else if (ctype == CTYPE_LZO) print_verbose("lzo"); else if (ctype == CTYPE_LZMA) print_verbose("lzma"); else if (ctype == CTYPE_GZIP) print_verbose("gzip"); else if (ctype == CTYPE_ZPAQ) print_verbose("zpaq"); else print_verbose("Dunno wtf"); if (save_ctype == 255) save_ctype = ctype; /* need this for lzma when some chunks could have no compression * and info will show rzip + none on info display if last chunk * is not compressed. Adjust for all types in case it's used in * the future */ utotal += u_len; ctotal += c_len; print_verbose("\t%5.1f%%\t%16"PRId64" / %14"PRId64"", percentage(c_len, u_len), c_len, u_len); print_maxverbose("%18"PRId64" : %14"PRId64"", head_off, last_head); print_verbose("\n"); block++; } while (last_head); ++stream; } if (unlikely((ofs = lseek(fd_in, c_len, SEEK_CUR)) == -1)) fatal_goto(("Failed to lseek c_len in get_fileinfo\n"), error); if (ofs >= infile_size - (HAS_MD5 ? MD5_DIGEST_SIZE : 0)) goto done; /* Chunk byte entry */ if (control->major_version == 0 && control->minor_version > 4) { if (unlikely(read(fd_in, &chunk_byte, 1) != 1)) fatal_goto(("Failed to read chunk_byte in get_fileinfo\n"), error); if (unlikely(chunk_byte < 1 || chunk_byte > 8)) fatal_goto(("Invalid chunk bytes %d\n", chunk_byte), error); ofs++; if (control->major_version == 0 && control->minor_version > 5) { if (unlikely(read(fd_in, &control->eof, 1) != 1)) fatal_goto(("Failed to read eof in get_fileinfo\n"), error); if (unlikely(read(fd_in, &chunk_size, chunk_byte) != chunk_byte)) fatal_goto(("Failed to read chunk_size in get_fileinfo\n"), error); chunk_size = le64toh(chunk_size); if (unlikely(chunk_size < 0)) fatal_goto(("Invalid chunk size %lld\n", chunk_size), error); ofs += 1 + chunk_byte; header_length = 1 + (chunk_byte * 3); } } goto next_chunk; done: cratio = (long double)expected_size / (long double)infile_size; if (unlikely(ofs > infile_size)) failure_goto(("Offset greater than archive size, likely corrupted/truncated archive.\n"), error); print_output("\nSummary\n=======\n"); print_output("File: %s\nlrzip version: %d.%d \n\n", infilecopy, control->major_version, control->minor_version); if (!expected_size) print_output("Due to %s, expected decompression size not available\n", "Compression to STDOUT"); print_verbose(" Stats Percent Compressed / Uncompressed\n -------------------------------------------------------\n"); /* If we can't show expected size, tailor output for it */ if (expected_size) { print_verbose(" Rzip: %5.1f%%\t%16"PRId64" / %14"PRId64"\n", percentage (utotal, expected_size), utotal, expected_size); print_verbose(" Back end: %5.1f%%\t%16"PRId64" / %14"PRId64"\n", percentage(ctotal, utotal), ctotal, utotal); print_verbose(" Overall: %5.1f%%\t%16"PRId64" / %14"PRId64"\n", percentage(ctotal, expected_size), ctotal, expected_size); } else { print_verbose(" Rzip: Unavailable\n"); print_verbose(" Back end: %5.1f%%\t%16"PRId64" / %14"PRId64"\n", percentage(ctotal, utotal), ctotal, utotal); print_verbose(" Overall: Unavailable\n"); } print_verbose("\n"); print_output(" Compression Method: "); if (save_ctype == CTYPE_NONE) print_output("rzip alone\n"); else if (save_ctype == CTYPE_BZIP2) print_output("rzip + bzip2\n"); else if (save_ctype == CTYPE_LZO) print_output("rzip + lzo\n"); else if (save_ctype == CTYPE_LZMA) print_output("rzip + lzma\n"); else if (save_ctype == CTYPE_GZIP) print_output("rzip + gzip\n"); else if (save_ctype == CTYPE_ZPAQ) print_output("rzip + zpaq\n"); else print_output("Dunno wtf\n"); print_output("\n"); if (expected_size) { print_output(" Decompressed file size: %14"PRIu64"\n", expected_size); print_output(" Compressed file size: %14"PRIu64"\n", infile_size); print_output(" Compression ratio: %14.3Lfx\n", cratio); } else { print_output(" Decompressed file size: Unavailable\n"); print_output(" Compressed file size: %14"PRIu64"\n", infile_size); print_output(" Compression ratio: Unavailable\n"); } if (HAS_MD5) { char md5_stored[MD5_DIGEST_SIZE]; int i; if (unlikely(lseek(fd_in, -MD5_DIGEST_SIZE, SEEK_END) == -1)) fatal_goto(("Failed to seek to md5 data in runzip_fd\n"), error); if (unlikely(read(fd_in, md5_stored, MD5_DIGEST_SIZE) != MD5_DIGEST_SIZE)) fatal_goto(("Failed to read md5 data in runzip_fd\n"), error); print_output("\n MD5 Checksum: "); for (i = 0; i < MD5_DIGEST_SIZE; i++) print_output("%02x", md5_stored[i] & 0xFF); print_output("\n"); } else print_output("\n CRC32 used for integrity testing\n"); if ( !IS_FROM_FILE ) if (unlikely(close(fd_in))) fatal_return(("Failed to close fd_in in get_fileinfo\n"), false); out: dealloc(control->outfile); return true; error: if (!STDIN && ! IS_FROM_FILE) close(fd_in); return false; } /* compress one file from the command line */ bool compress_file(rzip_control *control) { const char *tmp, *tmpinfile; /* we're just using this as a proxy for control->infile. * Spares a compiler warning */ int fd_in = -1, fd_out = -1; char header[MAGIC_LEN]; control->flags |= FLAG_MD5; if (ENCRYPT) if (unlikely(!get_hash(control, 1))) return false; memset(header, 0, sizeof(header)); if ( IS_FROM_FILE ) fd_in = fileno(control->inFILE); else if (!STDIN) { /* is extension at end of infile? */ if ((tmp = strrchr(control->infile, '.')) && !strcmp(tmp, control->suffix)) { print_err("%s: already has %s suffix. Skipping...\n", control->infile, control->suffix); return false; } fd_in = open(control->infile, O_RDONLY); if (unlikely(fd_in == -1)) fatal_return(("Failed to open %s\n", control->infile), false); } else fd_in = 0; if (!STDOUT) { if (control->outname) { /* check if outname has control->suffix */ if (*(control->suffix) == '\0') /* suffix is empty string */ control->outfile = strdup(control->outname); else if ((tmp=strrchr(control->outname, '.')) && strcmp(tmp, control->suffix)) { control->outfile = malloc(strlen(control->outname) + strlen(control->suffix) + 1); if (unlikely(!control->outfile)) fatal_goto(("Failed to allocate outfile name\n"), error); strcpy(control->outfile, control->outname); strcat(control->outfile, control->suffix); print_output("Suffix added to %s.\nFull pathname is: %s\n", control->outname, control->outfile); } else /* no, already has suffix */ control->outfile = strdup(control->outname); } else { /* default output name from control->infile * test if outdir specified. If so, strip path from filename of * control->infile */ if (control->outdir && (tmp = strrchr(control->infile, '/'))) tmpinfile = tmp + 1; else tmpinfile = control->infile; control->outfile = malloc((control->outdir == NULL? 0: strlen(control->outdir)) + strlen(tmpinfile) + strlen(control->suffix) + 1); if (unlikely(!control->outfile)) fatal_goto(("Failed to allocate outfile name\n"), error); if (control->outdir) { /* prepend control->outdir */ strcpy(control->outfile, control->outdir); strcat(control->outfile, tmpinfile); } else strcpy(control->outfile, tmpinfile); strcat(control->outfile, control->suffix); print_output("Output filename is: %s\n", control->outfile); } fd_out = open(control->outfile, O_RDWR | O_CREAT | O_EXCL, 0666); if (FORCE_REPLACE && (-1 == fd_out) && (EEXIST == errno)) { if (unlikely(unlink(control->outfile))) fatal_goto(("Failed to unlink an existing file: %s\n", control->outfile), error); fd_out = open(control->outfile, O_RDWR | O_CREAT | O_EXCL, 0666); } if (unlikely(fd_out == -1)) { /* We must ensure we don't delete a file that already * exists just because we tried to create a new one */ control->flags |= FLAG_KEEP_BROKEN; fatal_goto(("Failed to create %s\n", control->outfile), error); } control->fd_out = fd_out; if (!STDIN) { if (unlikely(!preserve_perms(control, fd_in, fd_out))) goto error; } } else { control->fd_out = fd_out = open_tmpoutfile(control); if (likely(fd_out != -1)) { /* Unlink temporary file as soon as possible */ if (unlikely(unlink(control->outfile))) fatal_return(("Failed to unlink tmpfile: %s\n", control->outfile), false); } if (unlikely(!open_tmpoutbuf(control))) goto error; } /* Write zeroes to header at beginning of file */ if (unlikely(!STDOUT && write(fd_out, header, sizeof(header)) != sizeof(header))) fatal_goto(("Cannot write file header\n"), error); rzip_fd(control, fd_in, fd_out); /* Write magic at end b/c lzma does not tell us properties until it is done */ if (!STDOUT) { if (unlikely(!write_magic(control))) goto error; } if (ENCRYPT) release_hashes(control); if (unlikely(!STDIN && !STDOUT && !preserve_times(control, fd_in))) { fatal("Failed to preserve times on output file\n"); goto error; } if (unlikely(close(fd_in))) { fatal("Failed to close fd_in\n"); fd_in = -1; goto error; } if (unlikely(!STDOUT && close(fd_out))) fatal_return(("Failed to close fd_out\n"), false); if (TMP_OUTBUF) close_tmpoutbuf(control); if (!KEEP_FILES && !STDIN) { if (unlikely(unlink(control->infile))) fatal_return(("Failed to unlink %s\n", control->infile), false); } dealloc(control->outfile); return true; error: if (! IS_FROM_FILE && STDIN && (fd_in > 0)) close(fd_in); if ((!STDOUT) && (fd_out > 0)) close(fd_out); return false; } bool initialise_control(rzip_control *control) { time_t now_t, tdiff; char localeptr[] = "./", *eptr; /* for environment */ size_t len; memset(control, 0, sizeof(rzip_control)); control->msgout = stderr; control->msgerr = stderr; register_outputfile(control, control->msgout); control->flags = FLAG_SHOW_PROGRESS | FLAG_KEEP_FILES | FLAG_THRESHOLD; control->suffix = ".lrz"; control->compression_level = 7; control->ramsize = get_ram(control); if (unlikely(control->ramsize == -1)) return false; /* for testing single CPU */ control->threads = PROCESSORS; /* get CPUs for LZMA */ control->page_size = PAGE_SIZE; control->nice_val = 19; /* The first 5 bytes of the salt is the time in seconds. * The next 2 bytes encode how many times to hash the password. * The last 9 bytes are random data, making 16 bytes of salt */ if (unlikely((now_t = time(NULL)) == ((time_t)-1))) fatal_return(("Failed to call time in main\n"), false); if (unlikely(now_t < T_ZERO)) { print_output("Warning your time reads before the year 2011, check your system clock\n"); now_t = T_ZERO; } /* Workaround for CPUs no longer keeping up with Moore's law! * This way we keep the magic header format unchanged. */ tdiff = (now_t - T_ZERO) / 4; now_t = T_ZERO + tdiff; control->secs = now_t; control->encloops = nloops(control->secs, control->salt, control->salt + 1); if (unlikely(!get_rand(control, control->salt + 2, 6))) return false; /* Get Temp Dir. Try variations on canonical unix environment variable */ eptr = getenv("TMPDIR"); if (!eptr) eptr = getenv("TMP"); if (!eptr) eptr = getenv("TEMPDIR"); if (!eptr) eptr = getenv("TEMP"); if (!eptr) eptr = localeptr; len = strlen(eptr); control->tmpdir = malloc(len + 2); if (control->tmpdir == NULL) fatal_return(("Failed to allocate for tmpdir\n"), false); strcpy(control->tmpdir, eptr); if (control->tmpdir[len - 1] != '/') { control->tmpdir[len] = '/'; /* need a trailing slash */ control->tmpdir[len + 1] = '\0'; } return true; } lrzip-0.651/lrzip_core.h000066400000000000000000000042201421175057200152100ustar00rootroot00000000000000/* Copyright (C) 2006-2016,2022 Con Kolivas Copyright (C) 2011 Peter Hyman Copyright (C) 1998-2003 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef LRZIP_CORE_H #define LRZIP_CORE_H #include "lrzip_private.h" i64 get_ram(rzip_control *control); i64 nloops(i64 seconds, uchar *b1, uchar *b2); bool write_magic(rzip_control *control); bool read_magic(rzip_control *control, int fd_in, i64 *expected_size); bool preserve_perms(rzip_control *control, int fd_in, int fd_out); int open_tmpoutfile(rzip_control *control); bool flush_tmpout(rzip_control *control); int open_tmpinfile(rzip_control *control); bool read_tmpinfile(rzip_control *control, int fd_in); bool decompress_file(rzip_control *control); bool get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len, i64 *u_len, i64 *last_head); bool get_fileinfo(rzip_control *control); bool compress_file(rzip_control *control); bool write_fdout(rzip_control *control, void *buf, i64 len); bool write_fdin(rzip_control *control); void close_tmpoutbuf(rzip_control *control); void clear_tmpinbuf(rzip_control *control); bool clear_tmpinfile(rzip_control *control); void close_tmpinbuf(rzip_control *control); bool initialise_control(rzip_control *control); #define initialize_control(_control) initialise_control(_control) extern void zpaq_compress(uchar *c_buf, i64 *c_len, uchar *s_buf, i64 s_len, int level, FILE *msgout, bool progress, long thread); extern void zpaq_decompress(uchar *s_buf, i64 *d_len, uchar *c_buf, i64 c_len, FILE *msgout, bool progress, long thread); #endif lrzip-0.651/lrzip_private.h000066400000000000000000000343061421175057200157420ustar00rootroot00000000000000/* Copyright (C) 2006-2016,2018,2021-2022 Con Kolivas Copyright (C) 2011 Peter Hyman Copyright (C) 1998-2003 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef LRZIP_PRIV_H #define LRZIP_PRIV_H #include "config.h" #define NUM_STREAMS 2 #define STREAM_BUFSIZE (1024 * 1024 * 10) #include #include #include #include #include #include #ifdef HAVE_PTHREAD_H # include #endif #ifdef HAVE_STRING_H # include #endif #ifdef HAVE_MALLOC_H # include #endif #ifdef HAVE_ALLOCA_H # include #elif defined __GNUC__ # define alloca __builtin_alloca #elif defined _AIX # define alloca __alloca #elif defined _MSC_VER # include # define alloca _alloca #else # include # ifdef __cplusplus extern "C" # endif void *alloca (size_t); #endif #ifdef HAVE_ENDIAN_H # include #elif HAVE_SYS_ENDIAN_H # include #endif #ifndef __BYTE_ORDER # ifndef __BIG_ENDIAN # define __BIG_ENDIAN 4321 # define __LITTLE_ENDIAN 1234 # endif # ifdef WORDS_BIGENDIAN # define __BYTE_ORDER __BIG_ENDIAN # else # define __BYTE_ORDER __LITTLE_ENDIAN # endif #endif #ifndef MD5_DIGEST_SIZE # define MD5_DIGEST_SIZE 16 #endif #define free(X) do { free((X)); (X) = NULL; } while (0) #ifndef strdupa # define strdupa(str) strcpy(alloca(strlen(str) + 1), str) #endif #ifndef strndupa # define strndupa(str, len) strncpy(alloca(len + 1), str, len) #endif #ifndef uchar #define uchar unsigned char #endif #ifndef int32 #if (SIZEOF_INT == 4) #define int32 int #elif (SIZEOF_LONG == 4) #define int32 long #elif (SIZEOF_SHORT == 4) #define int32 short #endif #endif #ifndef int16 #if (SIZEOF_INT == 2) #define int16 int #elif (SIZEOF_SHORT == 2) #define int16 short #endif #endif #ifndef uint32 #define uint32 unsigned int32 #endif #ifndef uint16 #define uint16 unsigned int16 #endif #ifndef MIN #define MIN(a, b) ((a) < (b)? (a): (b)) #endif #ifndef MAX #define MAX(a, b) ((a) > (b)? (a): (b)) #endif #if !HAVE_STRERROR extern char *sys_errlist[]; #define strerror(i) sys_errlist[i] #endif #ifndef HAVE_ERRNO_H extern int errno; #endif #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) #define __maybe_unused __attribute__((unused)) #if defined(__MINGW32__) || defined(__CYGWIN__) || defined(__ANDROID__) || defined(__APPLE__) # define ffsll __builtin_ffsll #endif typedef int64_t i64; typedef uint32_t u32; typedef struct rzip_control rzip_control; typedef struct md5_ctx md5_ctx; /* ck specific unnamed semaphore implementations to cope with osx not * implementing them. */ #ifdef __APPLE__ struct cksem { int pipefd[2]; }; typedef struct cksem cksem_t; #else typedef sem_t cksem_t; #endif #if !defined(__linux) #define mremap fake_mremap #endif #define bswap_32(x) \ ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) # define bswap_64(x) \ ((((x) & 0xff00000000000000ull) >> 56) \ | (((x) & 0x00ff000000000000ull) >> 40) \ | (((x) & 0x0000ff0000000000ull) >> 24) \ | (((x) & 0x000000ff00000000ull) >> 8) \ | (((x) & 0x00000000ff000000ull) << 8) \ | (((x) & 0x0000000000ff0000ull) << 24) \ | (((x) & 0x000000000000ff00ull) << 40) \ | (((x) & 0x00000000000000ffull) << 56)) #ifdef leto32h # define le32toh(x) leto32h(x) # define le64toh(x) leto64h(x) #endif #ifndef le32toh # if __BYTE_ORDER == __LITTLE_ENDIAN # define htole32(x) (x) # define le32toh(x) (x) # define htole64(x) (x) # define le64toh(x) (x) # elif __BYTE_ORDER == __BIG_ENDIAN # define htole32(x) bswap_32 (x) # define le32toh(x) bswap_32 (x) # define htole64(x) bswap_64 (x) # define le64toh(x) bswap_64 (x) #else #error UNKNOWN BYTE ORDER #endif #endif #define FLAG_SHOW_PROGRESS (1 << 0) #define FLAG_KEEP_FILES (1 << 1) #define FLAG_TEST_ONLY (1 << 2) #define FLAG_FORCE_REPLACE (1 << 3) #define FLAG_DECOMPRESS (1 << 4) #define FLAG_NO_COMPRESS (1 << 5) #define FLAG_LZO_COMPRESS (1 << 6) #define FLAG_BZIP2_COMPRESS (1 << 7) #define FLAG_ZLIB_COMPRESS (1 << 8) #define FLAG_ZPAQ_COMPRESS (1 << 9) #define FLAG_VERBOSITY (1 << 10) #define FLAG_VERBOSITY_MAX (1 << 11) #define FLAG_STDIN (1 << 12) #define FLAG_STDOUT (1 << 13) #define FLAG_INFO (1 << 14) #define FLAG_UNLIMITED (1 << 15) #define FLAG_HASH (1 << 16) #define FLAG_MD5 (1 << 17) #define FLAG_CHECK (1 << 18) #define FLAG_KEEP_BROKEN (1 << 19) #define FLAG_THRESHOLD (1 << 20) #define FLAG_TMP_OUTBUF (1 << 21) #define FLAG_TMP_INBUF (1 << 22) #define FLAG_ENCRYPT (1 << 23) #define FLAG_OUTPUT (1 << 24) #define NO_MD5 (!(HASH_CHECK) && !(HAS_MD5)) #define BITS32 (sizeof(long) == 4) #define CTYPE_NONE 3 #define CTYPE_BZIP2 4 #define CTYPE_LZO 5 #define CTYPE_LZMA 6 #define CTYPE_GZIP 7 #define CTYPE_ZPAQ 8 #define PASS_LEN 512 #define HASH_LEN 64 #define SALT_LEN 8 #define CBC_LEN 16 #define one_g (1000 * 1024 * 1024) #if defined(NOTHREAD) || !defined(_SC_NPROCESSORS_ONLN) # define PROCESSORS (1) #else # define PROCESSORS (sysconf(_SC_NPROCESSORS_ONLN)) #endif #ifndef PAGE_SIZE # ifdef _SC_PAGE_SIZE # define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) # else # define PAGE_SIZE (4096) # endif #endif #define dealloc(ptr) do { \ free(ptr); \ ptr = NULL; \ } while (0) /* Determine how many times to hash the password when encrypting, based on * the date such that we increase the number of loops according to Moore's * law relative to when the data is encrypted. It is then stored as a two * byte value in the header */ #define MOORE 1.835 // world constant [TIMES per YEAR] #define ARBITRARY 1000000 // number of sha2 calls per one second in 2011 #define T_ZERO 1293840000 // seconds since epoch in 2011 #define SECONDS_IN_A_YEAR (365*86400) #define MOORE_TIMES_PER_SECOND pow (MOORE, 1.0 / SECONDS_IN_A_YEAR) #define ARBITRARY_AT_EPOCH (ARBITRARY * pow (MOORE_TIMES_PER_SECOND, -T_ZERO)) #define FLAG_VERBOSE (FLAG_VERBOSITY | FLAG_VERBOSITY_MAX) #define FLAG_NOT_LZMA (FLAG_NO_COMPRESS | FLAG_LZO_COMPRESS | FLAG_BZIP2_COMPRESS | FLAG_ZLIB_COMPRESS | FLAG_ZPAQ_COMPRESS) #define LZMA_COMPRESS (!(control->flags & FLAG_NOT_LZMA)) #define SHOW_PROGRESS (control->flags & FLAG_SHOW_PROGRESS) #define KEEP_FILES (control->flags & FLAG_KEEP_FILES) #define TEST_ONLY (control->flags & FLAG_TEST_ONLY) #define FORCE_REPLACE (control->flags & FLAG_FORCE_REPLACE) #define DECOMPRESS (control->flags & FLAG_DECOMPRESS) #define NO_COMPRESS (control->flags & FLAG_NO_COMPRESS) #define LZO_COMPRESS (control->flags & FLAG_LZO_COMPRESS) #define BZIP2_COMPRESS (control->flags & FLAG_BZIP2_COMPRESS) #define ZLIB_COMPRESS (control->flags & FLAG_ZLIB_COMPRESS) #define ZPAQ_COMPRESS (control->flags & FLAG_ZPAQ_COMPRESS) #define VERBOSE (control->flags & FLAG_VERBOSE) #define VERBOSITY (control->flags & FLAG_VERBOSITY) #define MAX_VERBOSE (control->flags & FLAG_VERBOSITY_MAX) #define STDIN (control->flags & FLAG_STDIN) #define STDOUT (control->flags & FLAG_STDOUT) #define INFO (control->flags & FLAG_INFO) #define UNLIMITED (control->flags & FLAG_UNLIMITED) #define HASH_CHECK (control->flags & FLAG_HASH) #define HAS_MD5 (control->flags & FLAG_MD5) #define CHECK_FILE (control->flags & FLAG_CHECK) #define KEEP_BROKEN (control->flags & FLAG_KEEP_BROKEN) #define LZ4_TEST (control->flags & FLAG_THRESHOLD) #define TMP_OUTBUF (control->flags & FLAG_TMP_OUTBUF) #define TMP_INBUF (control->flags & FLAG_TMP_INBUF) #define ENCRYPT (control->flags & FLAG_ENCRYPT) #define SHOW_OUTPUT (control->flags & FLAG_OUTPUT) #define IS_FROM_FILE ( !!(control->inFILE) && !STDIN ) /* Structure to save state of computation between the single steps. */ struct md5_ctx { uint32_t A; uint32_t B; uint32_t C; uint32_t D; uint32_t total[2]; uint32_t buflen; uint32_t buffer[32]; }; struct sliding_buffer { uchar *buf_low; /* The low window buffer */ uchar *buf_high;/* "" high "" */ i64 orig_offset;/* Where the original buffer started */ i64 offset_low; /* What the current offset the low buffer has */ i64 offset_high;/* "" high buffer "" */ i64 offset_search;/* Where the search is up to */ i64 orig_size; /* How big the full buffer would be */ i64 size_low; /* How big the low buffer is */ i64 size_high; /* "" high "" */ i64 high_length;/* How big the high buffer should be */ int fd; /* The fd of the mmap */ }; struct checksum { uint32_t *cksum; uchar *buf; i64 len; }; typedef i64 tag; struct node { void *data; struct node *prev; }; struct runzip_node { struct stream_info *sinfo; pthread_t *pthreads; struct runzip_node *prev; }; struct rzip_state { void *ss; struct node *sslist; struct node *head; struct level *level; tag hash_index[256]; struct hash_entry *hash_table; char hash_bits; i64 hash_count; i64 hash_limit; tag minimum_tag_mask; i64 tag_clean_ptr; i64 last_match; i64 chunk_size; i64 mmap_size; char chunk_bytes; uint32_t cksum; int fd_in, fd_out; char stdin_eof; struct { i64 inserts; i64 literals; i64 literal_bytes; i64 matches; i64 match_bytes; i64 tag_hits; i64 tag_misses; } stats; }; struct rzip_control { char *infile; FILE *inFILE; // if a FILE is being read from char *outname; char *outfile; FILE *outFILE; // if a FILE is being written to char *outdir; char *tmpdir; // when stdin, stdout, or test used uchar *tmp_outbuf; // Temporary file storage for stdout i64 out_ofs; // Output offset when tmp_outbuf in use i64 hist_ofs; // History offset i64 out_len; // Total length of tmp_outbuf i64 out_maxlen; // The largest the tmp_outbuf can be used i64 out_relofs; // Relative tmp_outbuf offset when stdout has been flushed uchar *tmp_inbuf; i64 in_ofs; i64 in_len; i64 in_maxlen; FILE *msgout; //stream for output messages FILE *msgerr; //stream for output errors char *suffix; uchar compression_level; i64 overhead; // compressor overhead i64 usable_ram; // the most ram we'll try to use on one activity i64 maxram; // the largest chunk of ram to allocate unsigned char lzma_properties[5]; // lzma properties, encoded i64 window; unsigned long flags; i64 ramsize; i64 max_chunk; i64 max_mmap; int threads; char nice_val; // added for consistency int current_priority; char major_version; char minor_version; i64 st_size; long page_size; int fd_in; int fd_out; int fd_hist; i64 encloops; i64 secs; void (*pass_cb)(void *, char *, size_t); /* callback to get password in lib */ void *pass_data; uchar salt[SALT_LEN]; uchar *salt_pass; int salt_pass_len; uchar *hash; char *passphrase; pthread_mutex_t control_lock; unsigned char eof; unsigned char magic_written; bool lzma_prop_set; cksem_t cksumsem; md5_ctx ctx; uchar md5_resblock[MD5_DIGEST_SIZE]; i64 md5_read; // How far into the file the md5 has done so far struct checksum checksum; const char *util_infile; char delete_infile; const char *util_outfile; char delete_outfile; FILE *outputfile; char library_mode; int log_level; void (*info_cb)(void *data, int pct, int chunk_pct); void *info_data; void (*log_cb)(void *data, unsigned int level, unsigned int line, const char *file, const char *func, const char *format, va_list args); void *log_data; char chunk_bytes; struct sliding_buffer sb; void (*do_mcpy)(rzip_control *, unsigned char *, i64, i64); void (*next_tag)(rzip_control *, struct rzip_state *, i64, tag *); tag (*full_tag)(rzip_control *, struct rzip_state *, i64); i64 (*match_len)(rzip_control *, struct rzip_state *, i64, i64, i64, i64 *); pthread_t *pthreads; struct runzip_node *ruhead; }; struct uncomp_thread { uchar *s_buf; i64 u_len, c_len; i64 last_head; uchar c_type; int busy; int streamno; }; struct stream { i64 last_head; uchar *buf; i64 buflen; i64 bufp; uchar eos; long uthread_no; long unext_thread; long base_thread; int total_threads; i64 last_headofs; }; struct stream_info { struct stream *s; uchar num_streams; int fd; i64 bufsize; i64 cur_pos; i64 initial_pos; i64 total_read; i64 ram_alloced; i64 size; struct uncomp_thread *ucthreads; long thread_no; long next_thread; int chunks; char chunk_bytes; }; static inline void print_stuff(const rzip_control *control, int level, unsigned int line, const char *file, const char *func, const char *format, ...) { va_list ap; if (control->library_mode && control->log_cb && (control->log_level >= level)) { va_start(ap, format); control->log_cb(control->log_data, level, line, file, func, format, ap); va_end(ap); } else if (control->msgout) { va_start(ap, format); vfprintf(control->msgout, format, ap); va_end(ap); fflush(control->msgout); } } static inline void print_err(const rzip_control *control, unsigned int line, const char *file, const char *func, const char *format, ...) { va_list ap; if (control->library_mode && control->log_cb && (control->log_level >= 0)) { va_start(ap, format); control->log_cb(control->log_data, 0, line, file, func, format, ap); va_end(ap); } else if (control->msgerr) { va_start(ap, format); vfprintf(control->msgerr, format, ap); va_end(ap); fflush(control->msgerr); } } #define print_stuff(level, ...) do {\ print_stuff(control, level, __LINE__, __FILE__, __func__, __VA_ARGS__); \ } while (0) #define print_output(...) do {\ if (SHOW_OUTPUT) \ print_stuff(1, __VA_ARGS__); \ } while (0) #define print_progress(...) do {\ if (SHOW_PROGRESS) \ print_stuff(2, __VA_ARGS__); \ } while (0) #define print_verbose(...) do {\ if (VERBOSE) \ print_stuff(3, __VA_ARGS__); \ } while (0) #define print_maxverbose(...) do {\ if (MAX_VERBOSE) \ print_stuff(4, __VA_ARGS__); \ } while (0) #define print_err(...) do {\ print_err(control, __LINE__, __FILE__, __func__, __VA_ARGS__); \ } while (0) #endif lrzip-0.651/lrztar000077500000000000000000000124411421175057200141370ustar00rootroot00000000000000#!/bin/bash # Copyright (C) George Makrydakis 2009-2011,2013 # Copyright (C) Con Kolivas 2011-2012,2016,2018,2021 # A bash wrapper for Con Kolivas' excellent lrzip utility. For the time # being, lrzip does not like pipes, so we had to do this. It is kind of # self - documenting, spawned out of a test tube bash shell script. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . function lrztar_local() { local hv="\ lrztar GNU/bash wrapper script for lrzip and tar input/output over directories. Copyright (C) George Makrydakis 2009-2011,2013 Copyright (C) Con Kolivas 2011-2012,2016,2018,2021 Usage : lrztar [lrzip options] Result: a lrzip tarball is produced. Extras: when an lrzip tarball is used with -d, -O, it gets extracted: -h: will display this message. -d: will decompress a lrzip tarball to current directory. -O: will decompress a -d specified lrzip tarball to path. -f: will force overwrites. Notice: - The input argument is always last, all options and their arguments precede. - The -O flag is an option flag, goes before: (-O ). - You can use the remaining options of lrzip as they were. - lrzuntar is equivalent to lrztar [options] -d . - This script exists because of how lrzip behaves. - Beware the -f flag, it stands for what it says... " [[ $1 == "" ]] && { printf "lrztar: no arguments given\n"; return 1; } local p=("${@:1:$(($#-1))}") s="${!#}" vopt=("lrz") \ v_w=0 v_S=0 v_D=0 v_p=0 v_q=0 v_L=0 \ v_n=0 v_l=0 v_b=0 v_g=0 v_z=0 v_U=0 \ v_T=0 v_N=0 v_v=0 v_f=0 v_d=0 v_h=0 \ v_H=0 v_c=0 v_k=0 v_o=0 v_O=0 v_m=0 x= i="$(pwd)" tar --version &> /dev/null \ || { printf "lrztar: no tar in your path\n"; return 1; } lrzip --version &> /dev/null \ || { printf "lrztar: no lrzip in your path\n"; return 1; } lrzcat --version &> /dev/null \ || { printf "lrztar: no lrzcat in your path\n"; return 1; } while getopts w:O:S:DqL:nlbgzUm:TN:p:vfo:d:tVhHck x; do [[ $x == [tV] ]] && { printf "lrztar: invalid option for lrztar: %s\n" "$x"; return 1; } ((v_$x=${#vopt[@]})) vopt[${#vopt[@]}]="$OPTARG" done [[ $(basename "$0") == lrzuntar ]] \ && { ((v_d=${#vopt[@]})); vopt[${#vopt[@]}]="$s"; } { ! (($#)) || ((v_h)); } && { printf "%s\n" "$hv" return } ((v_d)) && { [[ -e ${vopt[v_d]} ]] || { printf "lrztar: file does not exist: %s\n" \ "${vopt[v_d]}" return 1 } i+="/${vopt[v_d]##*/}" i="${i%.tar.*}" if ((v_O)); then for x in ${!p[@]};do [[ ${p[x]} == "-O" ]] && { p[x]= p[$((x+1))]= break; } done i="${vopt[v_O]%/}" x="${s##*/}" if [[ -d "$i/${x%.tar.*}" ]] && ! ((v_f)); then printf "lrztar: %s exists, use -f.\n" \ "$i/${x%.tar.*}" return 1 fi if ! [[ -d $i ]]; then printf "lrztar: %s output path does not exist.\n" \ "$i" return 1 fi else i="./" fi [ ! -z "$s" ] && { lrzcat ${p[@]// /\\ } "$s" | tar x -C "$i" x=$? } || { lrzcat ${p[@]// /\\ } | tar x -C "$i" x=$? } } || { if ((v_o)); then ! ((v_f)) && [[ -e ${vopt[$v_o]} ]] && { printf "lrztar: %s exists, use -f to overwrite.\n" \ "${vopt[$v_o]}" return 1 } else if ((v_O)); then if ! [[ -d ${vopt[v_O]} ]]; then printf "lrztar: %s output path does not exist.\n" \ "${vopt[v_O]}" return 1 fi for x in ${!p[@]};do [[ ${p[x]} == "-O" ]] && { p[x]= i="${p[$((x+1))]%/}" p[$((x+1))]= s="${!#}" break; } done fi s="${s%/}" p+=(-o "$i/${s##*/}.tar.${vopt[v_S]}"); fi if ! ((v_o)); then ! ((v_f)) && [[ -e $i/${s##*/}.tar.${vopt[v_S]} ]] && { printf "lrztar: %s exists, use -f to overwrite\n" \ "$i/${s##*/}.tar.${vopt[v_S]}" return 1 } fi tar c "$s" | lrzip "${p[@]}" x=$? } return $x } lrztar_local "${@}" lrzip-0.651/lzma/000077500000000000000000000000001421175057200136345ustar00rootroot00000000000000lrzip-0.651/lzma/7zC.txt000066400000000000000000000125311421175057200150420ustar00rootroot000000000000007z ANSI-C Decoder 4.62 ---------------------- 7z ANSI-C provides 7z/LZMA decoding. 7z ANSI-C version is simplified version ported from C++ code. LZMA is default and general compression method of 7z format in 7-Zip compression program (www.7-zip.org). LZMA provides high compression ratio and very fast decompression. LICENSE ------- 7z ANSI-C Decoder is part of the LZMA SDK. LZMA SDK is written and placed in the public domain by Igor Pavlov. Files --------------------- 7zDecode.* - Low level 7z decoding 7zExtract.* - High level 7z decoding 7zHeader.* - .7z format constants 7zIn.* - .7z archive opening 7zItem.* - .7z structures 7zMain.c - Test application How To Use ---------- You must download 7-Zip program from www.7-zip.org. You can create .7z archive with 7z.exe or 7za.exe: 7za.exe a archive.7z *.htm -r -mx -m0fb=255 If you have big number of files in archive, and you need fast extracting, you can use partly-solid archives: 7za.exe a archive.7z *.htm -ms=512K -r -mx -m0fb=255 -m0d=512K In that example 7-Zip will use 512KB solid blocks. So it needs to decompress only 512KB for extracting one file from such archive. Limitations of current version of 7z ANSI-C Decoder --------------------------------------------------- - It reads only "FileName", "Size", "LastWriteTime" and "CRC" information for each file in archive. - It supports only LZMA and Copy (no compression) methods with BCJ or BCJ2 filters. - It converts original UTF-16 Unicode file names to UTF-8 Unicode file names. These limitations will be fixed in future versions. Using 7z ANSI-C Decoder Test application: ----------------------------------------- Usage: 7zDec : e: Extract files from archive l: List contents of archive t: Test integrity of archive Example: 7zDec l archive.7z lists contents of archive.7z 7zDec e archive.7z extracts files from archive.7z to current folder. How to use .7z Decoder ---------------------- Memory allocation ~~~~~~~~~~~~~~~~~ 7z Decoder uses two memory pools: 1) Temporary pool 2) Main pool Such scheme can allow you to avoid fragmentation of allocated blocks. Steps for using 7z decoder -------------------------- Use code at 7zMain.c as example. 1) Declare variables: inStream /* implements ILookInStream interface */ CSzArEx db; /* 7z archive database structure */ ISzAlloc allocImp; /* memory functions for main pool */ ISzAlloc allocTempImp; /* memory functions for temporary pool */ 2) call CrcGenerateTable(); function to initialize CRC structures. 3) call SzArEx_Init(&db); function to initialize db structures. 4) call SzArEx_Open(&db, inStream, &allocMain, &allocTemp) to open archive This function opens archive "inStream" and reads headers to "db". All items in "db" will be allocated with "allocMain" functions. SzArEx_Open function allocates and frees temporary structures by "allocTemp" functions. 5) List items or Extract items Listing code: ~~~~~~~~~~~~~ { UInt32 i; for (i = 0; i < db.db.NumFiles; i++) { CFileItem *f = db.db.Files + i; printf("%10d %s\n", (int)f->Size, f->Name); } } Extracting code: ~~~~~~~~~~~~~~~~ SZ_RESULT SzAr_Extract( CArchiveDatabaseEx *db, ILookInStream *inStream, UInt32 fileIndex, /* index of file */ UInt32 *blockIndex, /* index of solid block */ Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */ size_t *outBufferSize, /* buffer size for output buffer */ size_t *offset, /* offset of stream for required file in *outBuffer */ size_t *outSizeProcessed, /* size of file in *outBuffer */ ISzAlloc *allocMain, ISzAlloc *allocTemp); If you need to decompress more than one file, you can send these values from previous call: blockIndex, outBuffer, outBufferSize, You can consider "outBuffer" as cache of solid block. If your archive is solid, it will increase decompression speed. After decompressing you must free "outBuffer": allocImp.Free(outBuffer); 6) call SzArEx_Free(&db, allocImp.Free) to free allocated items in "db". Memory requirements for .7z decoding ------------------------------------ Memory usage for Archive opening: - Temporary pool: - Memory for uncompressed .7z headers - some other temporary blocks - Main pool: - Memory for database: Estimated size of one file structures in solid archive: - Size (4 or 8 Bytes) - CRC32 (4 bytes) - LastWriteTime (8 bytes) - Some file information (4 bytes) - File Name (variable length) + pointer + allocation structures Memory usage for archive Decompressing: - Temporary pool: - Memory for LZMA decompressing structures - Main pool: - Memory for decompressed solid block - Memory for temprorary buffers, if BCJ2 fileter is used. Usually these temprorary buffers can be about 15% of solid block size. 7z Decoder doesn't allocate memory for compressed blocks. Instead of this, you must allocate buffer with desired size before calling 7z Decoder. Use 7zMain.c as example. Defines ------- _SZ_ALLOC_DEBUG - define it if you want to debug alloc/free operations to stderr. --- http://www.7-zip.org http://www.7-zip.org/sdk.html http://www.7-zip.org/support.html lrzip-0.651/lzma/7zFormat.txt000066400000000000000000000161641421175057200161160ustar00rootroot000000000000007z Format description (2.30 Beta 25) ----------------------------------- This file contains description of 7z archive format. 7z archive can contain files compressed with any method. See "Methods.txt" for description for defined compressing methods. Format structure Overview ------------------------- Some fields can be optional. Archive structure ~~~~~~~~~~~~~~~~~ SignatureHeader [PackedStreams] [PackedStreamsForHeaders] [ Header or { Packed Header HeaderInfo } ] Header structure ~~~~~~~~~~~~~~~~ { ArchiveProperties AdditionalStreams { PackInfo { PackPos NumPackStreams Sizes[NumPackStreams] CRCs[NumPackStreams] } CodersInfo { NumFolders Folders[NumFolders] { NumCoders CodersInfo[NumCoders] { ID NumInStreams; NumOutStreams; PropertiesSize Properties[PropertiesSize] } NumBindPairs BindPairsInfo[NumBindPairs] { InIndex; OutIndex; } PackedIndices } UnPackSize[Folders][Folders.NumOutstreams] CRCs[NumFolders] } SubStreamsInfo { NumUnPackStreamsInFolders[NumFolders]; UnPackSizes[] CRCs[] } } MainStreamsInfo { (Same as in AdditionalStreams) } FilesInfo { NumFiles Properties[] { ID Size Data } } } HeaderInfo structure ~~~~~~~~~~~~~~~~~~~~ { (Same as in AdditionalStreams) } Notes about Notation and encoding --------------------------------- 7z uses little endian encoding. 7z archive format has optional headers that are marked as [] Header [] REAL_UINT64 means real UINT64. UINT64 means real UINT64 encoded with the following scheme: Size of encoding sequence depends from first byte: First_Byte Extra_Bytes Value (binary) 0xxxxxxx : ( xxxxxxx ) 10xxxxxx BYTE y[1] : ( xxxxxx << (8 * 1)) + y 110xxxxx BYTE y[2] : ( xxxxx << (8 * 2)) + y ... 1111110x BYTE y[6] : ( x << (8 * 6)) + y 11111110 BYTE y[7] : y 11111111 BYTE y[8] : y Property IDs ------------ 0x00 = kEnd, 0x01 = kHeader, 0x02 = kArchiveProperties, 0x03 = kAdditionalStreamsInfo, 0x04 = kMainStreamsInfo, 0x05 = kFilesInfo, 0x06 = kPackInfo, 0x07 = kUnPackInfo, 0x08 = kSubStreamsInfo, 0x09 = kSize, 0x0A = kCRC, 0x0B = kFolder, 0x0C = kCodersUnPackSize, 0x0D = kNumUnPackStream, 0x0E = kEmptyStream, 0x0F = kEmptyFile, 0x10 = kAnti, 0x11 = kName, 0x12 = kCreationTime, 0x13 = kLastAccessTime, 0x14 = kLastWriteTime, 0x15 = kWinAttributes, 0x16 = kComment, 0x17 = kEncodedHeader, 7z format headers ----------------- SignatureHeader ~~~~~~~~~~~~~~~ BYTE kSignature[6] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; ArchiveVersion { BYTE Major; // now = 0 BYTE Minor; // now = 2 }; UINT32 StartHeaderCRC; StartHeader { REAL_UINT64 NextHeaderOffset REAL_UINT64 NextHeaderSize UINT32 NextHeaderCRC } ........................... ArchiveProperties ~~~~~~~~~~~~~~~~~ BYTE NID::kArchiveProperties (0x02) for (;;) { BYTE PropertyType; if (aType == 0) break; UINT64 PropertySize; BYTE PropertyData[PropertySize]; } Digests (NumStreams) ~~~~~~~~~~~~~~~~~~~~~ BYTE AllAreDefined if (AllAreDefined == 0) { for(NumStreams) BIT Defined } UINT32 CRCs[NumDefined] PackInfo ~~~~~~~~~~~~ BYTE NID::kPackInfo (0x06) UINT64 PackPos UINT64 NumPackStreams [] BYTE NID::kSize (0x09) UINT64 PackSizes[NumPackStreams] [] [] BYTE NID::kCRC (0x0A) PackStreamDigests[NumPackStreams] [] BYTE NID::kEnd Folder ~~~~~~ UINT64 NumCoders; for (NumCoders) { BYTE { 0:3 DecompressionMethod.IDSize 4: 0 - IsSimple 1 - Is not simple 5: 0 - No Attributes 1 - There Are Attributes 7: 0 - Last Method in Alternative_Method_List 1 - There are more alternative methods } BYTE DecompressionMethod.ID[DecompressionMethod.IDSize] if (!IsSimple) { UINT64 NumInStreams; UINT64 NumOutStreams; } if (DecompressionMethod[0] != 0) { UINT64 PropertiesSize BYTE Properties[PropertiesSize] } } NumBindPairs = NumOutStreamsTotal - 1; for (NumBindPairs) { UINT64 InIndex; UINT64 OutIndex; } NumPackedStreams = NumInStreamsTotal - NumBindPairs; if (NumPackedStreams > 1) for(NumPackedStreams) { UINT64 Index; }; Coders Info ~~~~~~~~~~~ BYTE NID::kUnPackInfo (0x07) BYTE NID::kFolder (0x0B) UINT64 NumFolders BYTE External switch(External) { case 0: Folders[NumFolders] case 1: UINT64 DataStreamIndex } BYTE ID::kCodersUnPackSize (0x0C) for(Folders) for(Folder.NumOutStreams) UINT64 UnPackSize; [] BYTE NID::kCRC (0x0A) UnPackDigests[NumFolders] [] BYTE NID::kEnd SubStreams Info ~~~~~~~~~~~~~~ BYTE NID::kSubStreamsInfo; (0x08) [] BYTE NID::kNumUnPackStream; (0x0D) UINT64 NumUnPackStreamsInFolders[NumFolders]; [] [] BYTE NID::kSize (0x09) UINT64 UnPackSizes[] [] [] BYTE NID::kCRC (0x0A) Digests[Number of streams with unknown CRC] [] BYTE NID::kEnd Streams Info ~~~~~~~~~~~~ [] PackInfo [] [] CodersInfo [] [] SubStreamsInfo [] BYTE NID::kEnd FilesInfo ~~~~~~~~~ BYTE NID::kFilesInfo; (0x05) UINT64 NumFiles for (;;) { BYTE PropertyType; if (aType == 0) break; UINT64 Size; switch(PropertyType) { kEmptyStream: (0x0E) for(NumFiles) BIT IsEmptyStream kEmptyFile: (0x0F) for(EmptyStreams) BIT IsEmptyFile kAnti: (0x10) for(EmptyStreams) BIT IsAntiFile case kCreationTime: (0x12) case kLastAccessTime: (0x13) case kLastWriteTime: (0x14) BYTE AllAreDefined if (AllAreDefined == 0) { for(NumFiles) BIT TimeDefined } BYTE External; if(External != 0) UINT64 DataIndex [] for(Definded Items) UINT32 Time [] kNames: (0x11) BYTE External; if(External != 0) UINT64 DataIndex [] for(Files) { wchar_t Names[NameSize]; wchar_t 0; } [] kAttributes: (0x15) BYTE AllAreDefined if (AllAreDefined == 0) { for(NumFiles) BIT AttributesAreDefined } BYTE External; if(External != 0) UINT64 DataIndex [] for(Definded Attributes) UINT32 Attributes [] } } Header ~~~~~~ BYTE NID::kHeader (0x01) [] ArchiveProperties [] [] BYTE NID::kAdditionalStreamsInfo; (0x03) StreamsInfo [] [] BYTE NID::kMainStreamsInfo; (0x04) StreamsInfo [] [] FilesInfo [] BYTE NID::kEnd HeaderInfo ~~~~~~~~~~ [] BYTE NID::kEncodedHeader; (0x17) StreamsInfo for Encoded Header [] --- End of document lrzip-0.651/lzma/ASM/000077500000000000000000000000001421175057200142545ustar00rootroot00000000000000lrzip-0.651/lzma/ASM/x86/000077500000000000000000000000001421175057200147015ustar00rootroot00000000000000lrzip-0.651/lzma/ASM/x86/7zAsm.asm000066400000000000000000000026761421175057200164170ustar00rootroot00000000000000; 7zAsm.asm -- ASM macros ; 2009-12-12 : Igor Pavlov : Public domain ; 2011-10-12 : P7ZIP : Public domain %define NOT ~ %macro MY_ASM_START 0 SECTION .text %endmacro %macro MY_PROC 2 ; macro name:req, numParams:req align 16 %define proc_numParams %2 ; numParams global %1 global _%1 %1: _%1: %endmacro %macro MY_ENDP 0 %ifdef x64 ret ; proc_name ENDP %else ret ; (proc_numParams - 2) * 4 %endif %endmacro %ifdef x64 REG_SIZE equ 8 %else REG_SIZE equ 4 %endif %define x0 EAX %define x1 ECX %define x2 EDX %define x3 EBX %define x4 ESP %define x5 EBP %define x6 ESI %define x7 EDI %define x0_L AL %define x1_L CL %define x2_L DL %define x3_L BL %define x0_H AH %define x1_H CH %define x2_H DH %define x3_H BH %ifdef x64 %define r0 RAX %define r1 RCX %define r2 RDX %define r3 RBX %define r4 RSP %define r5 RBP %define r6 RSI %define r7 RDI %else %define r0 x0 %define r1 x1 %define r2 x2 %define r3 x3 %define r4 x4 %define r5 x5 %define r6 x6 %define r7 x7 %endif %macro MY_PUSH_4_REGS 0 push r3 push r5 %ifdef x64 %ifdef CYGWIN64 push r6 push r7 %endif %else push r6 push r7 %endif %endmacro %macro MY_POP_4_REGS 0 %ifdef x64 %ifdef CYGWIN64 pop r7 pop r6 %endif %else pop r7 pop r6 %endif pop r5 pop r3 %endmacro lrzip-0.651/lzma/ASM/x86/7zCrcOpt_asm.asm000066400000000000000000000055461421175057200177300ustar00rootroot00000000000000; 7zCrcOpt.asm -- CRC32 calculation : optimized version ; 2009-12-12 : Igor Pavlov : Public domain %include "7zAsm.asm" MY_ASM_START %define rD r2 %define rN r7 %ifdef x64 %define num_VAR r8 %define table_VAR r9 %else data_size equ (REG_SIZE * 7) crc_table equ (REG_SIZE + data_size) %define num_VAR [r4 + data_size] %define table_VAR [r4 + crc_table] %endif %define SRCDAT rN + rD + 4 * %macro CRC 4 ;CRC macro op:req, dest:req, src:req, t:req %1 %2, DWORD [r5 + %3 * 4 + 0400h * %4] ; op dest, DWORD [r5 + src * 4 + 0400h * t] %endmacro %macro CRC_XOR 3 ; CRC_XOR macro dest:req, src:req, t:req CRC xor, %1, %2, %3 %endmacro %macro CRC_MOV 3 ; CRC_MOV macro dest:req, src:req, t:req CRC mov, %1, %2, %3 ; CRC mov, dest, src, t %endmacro %macro CRC1b 0 movzx x6, BYTE [rD] inc rD movzx x3, x0_L xor x6, x3 shr x0, 8 CRC xor, x0, r6, 0 dec rN %endmacro %macro MY_PROLOG 1 ; MY_PROLOG macro crc_end:req MY_PUSH_4_REGS %ifdef x64 %ifdef CYGWIN64 ;ECX=CRC, RDX=buf, R8=size R9=table ; already in R8 : mov num_VAR,R8 ; LEN ; already in RDX : mov rD, RDX ; BUF ; already in R9 : mov table_VAR,R9; table mov x0, ECX ; CRC %else ;EDI=CRC, RSI=buf, RDX=size RCX=table mov num_VAR,RDX ; LEN mov rD, RSI ; BUF mov table_VAR,RCX; table mov x0, EDI ; CRC %endif %else mov x0, [r4 + 20] ; CRC mov rD, [r4 + 24] ; buf %endif mov rN, num_VAR mov r5, table_VAR test rN, rN jz near %1 ; crc_end %%sl: test rD, 7 jz %%sl_end CRC1b jnz %%sl %%sl_end: cmp rN, 16 jb near %1; crc_end add rN, rD mov num_VAR, rN sub rN, 8 and rN, NOT 7 sub rD, rN xor x0, [SRCDAT 0] %endmacro %macro MY_EPILOG 1 ; MY_EPILOG macro crc_end:req xor x0, [SRCDAT 0] mov rD, rN mov rN, num_VAR sub rN, rD %1: ; crc_end: test rN, rN jz %%end ; @F CRC1b jmp %1 ; crc_end %%end: MY_POP_4_REGS %endmacro MY_PROC CrcUpdateT8, 4 MY_PROLOG crc_end_8 mov x1, [SRCDAT 1] align 16 main_loop_8: mov x6, [SRCDAT 2] movzx x3, x1_L CRC_XOR x6, r3, 3 movzx x3, x1_H CRC_XOR x6, r3, 2 shr x1, 16 movzx x3, x1_L movzx x1, x1_H CRC_XOR x6, r3, 1 movzx x3, x0_L CRC_XOR x6, r1, 0 mov x1, [SRCDAT 3] CRC_XOR x6, r3, 7 movzx x3, x0_H shr x0, 16 CRC_XOR x6, r3, 6 movzx x3, x0_L CRC_XOR x6, r3, 5 movzx x3, x0_H CRC_MOV x0, r3, 4 xor x0, x6 add rD, 8 jnz main_loop_8 MY_EPILOG crc_end_8 MY_ENDP ; T4 CRC deleted ; end %ifidn __OUTPUT_FORMAT__,elf section .note.GNU-stack noalloc noexec nowrite progbits %endif lrzip-0.651/lzma/ASM/x86/Makefile.am000066400000000000000000000002011421175057200167260ustar00rootroot00000000000000MAINTAINERCLEANFILES = Makefile.in noinst_LTLIBRARIES = liblzmaasm.la liblzmaasm_la_SOURCES = \ 7zAsm.asm \ 7zCrcOpt_asm.asm lrzip-0.651/lzma/C/000077500000000000000000000000001421175057200140165ustar00rootroot00000000000000lrzip-0.651/lzma/C/7zCrc.c000066400000000000000000000012531421175057200151530ustar00rootroot00000000000000/* 7zCrc.c -- CRC32 calculation 2008-08-05 Igor Pavlov Public domain */ #include "7zCrc.h" #define kCrcPoly 0xEDB88320 UInt32 g_CrcTable[256]; void MY_FAST_CALL CrcGenerateTable(void) { UInt32 i; for (i = 0; i < 256; i++) { UInt32 r = i; int j; for (j = 0; j < 8; j++) r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); g_CrcTable[i] = r; } } UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size) { const Byte *p = (const Byte *)data; for (; size > 0 ; size--, p++) v = CRC_UPDATE_BYTE(v, *p); return v; } UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size) { return CrcUpdate(CRC_INIT_VAL, data, size) ^ 0xFFFFFFFF; } lrzip-0.651/lzma/C/7zCrc.h000066400000000000000000000011341421175057200151560ustar00rootroot00000000000000/* 7zCrc.h -- CRC32 calculation 2009-02-07 : Igor Pavlov : Public domain */ #ifndef __7Z_CRC_H #define __7Z_CRC_H #include #include "Types.h" #ifdef __cplusplus extern "C" { #endif extern UInt32 g_CrcTable[]; void MY_FAST_CALL CrcGenerateTable(void); #define CRC_INIT_VAL 0xFFFFFFFF #define CRC_GET_DIGEST(crc) ((crc) ^ 0xFFFFFFFF) #define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size); UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size); #ifdef __cplusplus } #endif #endif lrzip-0.651/lzma/C/7zCrcT8.c000066400000000000000000000016711421175057200153730ustar00rootroot00000000000000/* 7zCrcT8.c -- CRC32 calculation with 8 tables 2008-03-19 Igor Pavlov Public domain */ #include "7zCrc.h" #define kCrcPoly 0xEDB88320 #define CRC_NUM_TABLES 8 UInt32 g_CrcTable[256 * CRC_NUM_TABLES]; void MY_FAST_CALL CrcGenerateTable() { UInt32 i; for (i = 0; i < 256; i++) { UInt32 r = i; int j; for (j = 0; j < 8; j++) r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); g_CrcTable[i] = r; } #if CRC_NUM_TABLES > 1 for (; i < 256 * CRC_NUM_TABLES; i++) { UInt32 r = g_CrcTable[i - 256]; g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8); } #endif } UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size) { return CrcUpdateT8(v, data, size, g_CrcTable); } UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size) { return CrcUpdateT8(CRC_INIT_VAL, data, size, g_CrcTable) ^ 0xFFFFFFFF; } lrzip-0.651/lzma/C/Alloc.c000066400000000000000000000051741421175057200152230ustar00rootroot00000000000000/* Alloc.c -- Memory allocation functions 2008-09-24 Igor Pavlov Public domain */ #ifdef _WIN32 #include #endif #include #include "Alloc.h" /* #define _SZ_ALLOC_DEBUG */ /* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ #ifdef _SZ_ALLOC_DEBUG #include int g_allocCount = 0; int g_allocCountMid = 0; int g_allocCountBig = 0; #endif void *MyAlloc(size_t size) { if (size == 0) return 0; #ifdef _SZ_ALLOC_DEBUG { void *p = malloc(size); fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p); return p; } #else return malloc(size); #endif } void MyFree(void *address) { #ifdef _SZ_ALLOC_DEBUG if (address != 0) fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address); #endif free(address); } #ifdef _WIN32 void *MidAlloc(size_t size) { if (size == 0) return 0; #ifdef _SZ_ALLOC_DEBUG fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++); #endif return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); } void MidFree(void *address) { #ifdef _SZ_ALLOC_DEBUG if (address != 0) fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid); #endif if (address == 0) return; VirtualFree(address, 0, MEM_RELEASE); } #ifndef MEM_LARGE_PAGES #undef _7ZIP_LARGE_PAGES #endif #ifdef _7ZIP_LARGE_PAGES SIZE_T g_LargePageSize = 0; typedef SIZE_T (WINAPI *GetLargePageMinimumP)(); #endif void SetLargePageSize() { #ifdef _7ZIP_LARGE_PAGES SIZE_T size = 0; GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); if (largePageMinimum == 0) return; size = largePageMinimum(); if (size == 0 || (size & (size - 1)) != 0) return; g_LargePageSize = size; #endif } void *BigAlloc(size_t size) { if (size == 0) return 0; #ifdef _SZ_ALLOC_DEBUG fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++); #endif #ifdef _7ZIP_LARGE_PAGES if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18)) { void *res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)), MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); if (res != 0) return res; } #endif return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); } void BigFree(void *address) { #ifdef _SZ_ALLOC_DEBUG if (address != 0) fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig); #endif if (address == 0) return; VirtualFree(address, 0, MEM_RELEASE); } #endif lrzip-0.651/lzma/C/Alloc.h000066400000000000000000000011561421175057200152240ustar00rootroot00000000000000/* Alloc.h -- Memory allocation functions 2009-02-07 : Igor Pavlov : Public domain */ #ifndef __COMMON_ALLOC_H #define __COMMON_ALLOC_H #include #ifdef __cplusplus extern "C" { #endif void *MyAlloc(size_t size); void MyFree(void *address); #ifdef _WIN32 void SetLargePageSize(); void *MidAlloc(size_t size); void MidFree(void *address); void *BigAlloc(size_t size); void BigFree(void *address); #else #define MidAlloc(size) MyAlloc(size) #define MidFree(address) MyFree(address) #define BigAlloc(size) MyAlloc(size) #define BigFree(address) MyFree(address) #endif #ifdef __cplusplus } #endif #endif lrzip-0.651/lzma/C/LzFind.c000066400000000000000000000464001421175057200153540ustar00rootroot00000000000000/* LzFind.c -- Match finder for LZ algorithms 2009-04-22 : Igor Pavlov : Public domain */ #include #include "LzFind.h" #include "LzHash.h" #define kEmptyHashValue 0 #define kMaxValForNormalize ((UInt32)0xFFFFFFFF) #define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ #define kNormalizeMask (~(kNormalizeStepMin - 1)) #define kMaxHistorySize ((UInt32)3 << 30) #define kStartMaxLen 3 static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc) { if (!p->directInput) { alloc->Free(alloc, p->bufferBase); p->bufferBase = 0; } } /* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc) { UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; if (p->directInput) { p->blockSize = blockSize; return 1; } if (p->bufferBase == 0 || p->blockSize != blockSize) { LzInWindow_Free(p, alloc); p->blockSize = blockSize; p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize); } return (p->bufferBase != 0); } Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; } UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) { p->posLimit -= subValue; p->pos -= subValue; p->streamPos -= subValue; } static void MatchFinder_ReadBlock(CMatchFinder *p) { if (p->streamEndWasReached || p->result != SZ_OK) return; if (p->directInput) { UInt32 curSize = 0xFFFFFFFF - p->streamPos; if (curSize > p->directInputRem) curSize = (UInt32)p->directInputRem; p->directInputRem -= curSize; p->streamPos += curSize; if (p->directInputRem == 0) p->streamEndWasReached = 1; return; } for (;;) { Byte *dest = p->buffer + (p->streamPos - p->pos); size_t size = (p->bufferBase + p->blockSize - dest); if (size == 0) return; p->result = p->stream->Read(p->stream, dest, &size); if (p->result != SZ_OK) return; if (size == 0) { p->streamEndWasReached = 1; return; } p->streamPos += (UInt32)size; if (p->streamPos - p->pos > p->keepSizeAfter) return; } } void MatchFinder_MoveBlock(CMatchFinder *p) { memmove(p->bufferBase, p->buffer - p->keepSizeBefore, (size_t)(p->streamPos - p->pos + p->keepSizeBefore)); p->buffer = p->bufferBase + p->keepSizeBefore; } int MatchFinder_NeedMove(CMatchFinder *p) { if (p->directInput) return 0; /* if (p->streamEndWasReached) return 0; */ return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); } void MatchFinder_ReadIfRequired(CMatchFinder *p) { if (p->streamEndWasReached) return; if (p->keepSizeAfter >= p->streamPos - p->pos) MatchFinder_ReadBlock(p); } static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) { if (MatchFinder_NeedMove(p)) MatchFinder_MoveBlock(p); MatchFinder_ReadBlock(p); } static void MatchFinder_SetDefaultSettings(CMatchFinder *p) { p->cutValue = 32; p->btMode = 1; p->numHashBytes = 4; p->bigHash = 0; } #define kCrcPoly 0xEDB88320 void MatchFinder_Construct(CMatchFinder *p) { UInt32 i; p->bufferBase = 0; p->directInput = 0; p->hash = 0; MatchFinder_SetDefaultSettings(p); for (i = 0; i < 256; i++) { UInt32 r = i; int j; for (j = 0; j < 8; j++) r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); p->crc[i] = r; } } static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc) { alloc->Free(alloc, p->hash); p->hash = 0; } void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc) { MatchFinder_FreeThisClassMemory(p, alloc); LzInWindow_Free(p, alloc); } static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc) { size_t sizeInBytes = (size_t)num * sizeof(CLzRef); if (sizeInBytes / sizeof(CLzRef) != num) return 0; return (CLzRef *)alloc->Alloc(alloc, sizeInBytes); } int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc *alloc) { UInt32 sizeReserv; if (historySize > kMaxHistorySize) { MatchFinder_Free(p, alloc); return 0; } sizeReserv = historySize >> 1; if (historySize > ((UInt32)2 << 30)) sizeReserv = historySize >> 2; sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); p->keepSizeBefore = historySize + keepAddBufferBefore + 1; p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ if (LzInWindow_Create(p, sizeReserv, alloc)) { UInt32 newCyclicBufferSize = historySize + 1; UInt32 hs; p->matchMaxLen = matchMaxLen; { p->fixedHashSize = 0; if (p->numHashBytes == 2) hs = (1 << 16) - 1; else { hs = historySize - 1; hs |= (hs >> 1); hs |= (hs >> 2); hs |= (hs >> 4); hs |= (hs >> 8); hs >>= 1; hs |= 0xFFFF; /* don't change it! It's required for Deflate */ if (hs > (1 << 24)) { if (p->numHashBytes == 3) hs = (1 << 24) - 1; else hs >>= 1; } } p->hashMask = hs; hs++; if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; hs += p->fixedHashSize; } { UInt32 prevSize = p->hashSizeSum + p->numSons; UInt32 newSize; p->historySize = historySize; p->hashSizeSum = hs; p->cyclicBufferSize = newCyclicBufferSize; p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize); newSize = p->hashSizeSum + p->numSons; if (p->hash != 0 && prevSize == newSize) return 1; MatchFinder_FreeThisClassMemory(p, alloc); p->hash = AllocRefs(newSize, alloc); if (p->hash != 0) { p->son = p->hash + p->hashSizeSum; return 1; } } } MatchFinder_Free(p, alloc); return 0; } static void MatchFinder_SetLimits(CMatchFinder *p) { UInt32 limit = kMaxValForNormalize - p->pos; UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; if (limit2 < limit) limit = limit2; limit2 = p->streamPos - p->pos; if (limit2 <= p->keepSizeAfter) { if (limit2 > 0) limit2 = 1; } else limit2 -= p->keepSizeAfter; if (limit2 < limit) limit = limit2; { UInt32 lenLimit = p->streamPos - p->pos; if (lenLimit > p->matchMaxLen) lenLimit = p->matchMaxLen; p->lenLimit = lenLimit; } p->posLimit = p->pos + limit; } void MatchFinder_Init(CMatchFinder *p) { UInt32 i; for (i = 0; i < p->hashSizeSum; i++) p->hash[i] = kEmptyHashValue; p->cyclicBufferPos = 0; p->buffer = p->bufferBase; p->pos = p->streamPos = p->cyclicBufferSize; p->result = SZ_OK; p->streamEndWasReached = 0; MatchFinder_ReadBlock(p); MatchFinder_SetLimits(p); } static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) { return (p->pos - p->historySize - 1) & kNormalizeMask; } void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems) { UInt32 i; for (i = 0; i < numItems; i++) { UInt32 value = items[i]; if (value <= subValue) value = kEmptyHashValue; else value -= subValue; items[i] = value; } } static void MatchFinder_Normalize(CMatchFinder *p) { UInt32 subValue = MatchFinder_GetSubValue(p); MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons); MatchFinder_ReduceOffsets(p, subValue); } static void MatchFinder_CheckLimits(CMatchFinder *p) { if (p->pos == kMaxValForNormalize) MatchFinder_Normalize(p); if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) MatchFinder_CheckAndMoveAndRead(p); if (p->cyclicBufferPos == p->cyclicBufferSize) p->cyclicBufferPos = 0; MatchFinder_SetLimits(p); } static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, UInt32 *distances, UInt32 maxLen) { son[_cyclicBufferPos] = curMatch; for (;;) { UInt32 delta = pos - curMatch; if (cutValue-- == 0 || delta >= _cyclicBufferSize) return distances; { const Byte *pb = cur - delta; curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; if (pb[maxLen] == cur[maxLen] && *pb == *cur) { UInt32 len = 0; while (++len != lenLimit) if (pb[len] != cur[len]) break; if (maxLen < len) { *distances++ = maxLen = len; *distances++ = delta - 1; if (len == lenLimit) return distances; } } } } } UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, UInt32 *distances, UInt32 maxLen) { CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; CLzRef *ptr1 = son + (_cyclicBufferPos << 1); UInt32 len0 = 0, len1 = 0; for (;;) { UInt32 delta = pos - curMatch; if (cutValue-- == 0 || delta >= _cyclicBufferSize) { *ptr0 = *ptr1 = kEmptyHashValue; return distances; } { CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; UInt32 len = (len0 < len1 ? len0 : len1); if (pb[len] == cur[len]) { if (++len != lenLimit && pb[len] == cur[len]) while (++len != lenLimit) if (pb[len] != cur[len]) break; if (maxLen < len) { *distances++ = maxLen = len; *distances++ = delta - 1; if (len == lenLimit) { *ptr1 = pair[0]; *ptr0 = pair[1]; return distances; } } } if (pb[len] < cur[len]) { *ptr1 = curMatch; ptr1 = pair + 1; curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; ptr0 = pair; curMatch = *ptr0; len0 = len; } } } } static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) { CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; CLzRef *ptr1 = son + (_cyclicBufferPos << 1); UInt32 len0 = 0, len1 = 0; for (;;) { UInt32 delta = pos - curMatch; if (cutValue-- == 0 || delta >= _cyclicBufferSize) { *ptr0 = *ptr1 = kEmptyHashValue; return; } { CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; UInt32 len = (len0 < len1 ? len0 : len1); if (pb[len] == cur[len]) { while (++len != lenLimit) if (pb[len] != cur[len]) break; { if (len == lenLimit) { *ptr1 = pair[0]; *ptr0 = pair[1]; return; } } } if (pb[len] < cur[len]) { *ptr1 = curMatch; ptr1 = pair + 1; curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; ptr0 = pair; curMatch = *ptr0; len0 = len; } } } } #define MOVE_POS \ ++p->cyclicBufferPos; \ p->buffer++; \ if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); #define MOVE_POS_RET MOVE_POS return offset; static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } #define GET_MATCHES_HEADER2(minLen, ret_op) \ UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \ lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ cur = p->buffer; #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) #define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue #define GET_MATCHES_FOOTER(offset, maxLen) \ offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \ distances + offset, maxLen) - distances); MOVE_POS_RET; #define SKIP_FOOTER \ SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { UInt32 offset; GET_MATCHES_HEADER(2) HASH2_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; offset = 0; GET_MATCHES_FOOTER(offset, 1) } UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { UInt32 offset; GET_MATCHES_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; offset = 0; GET_MATCHES_FOOTER(offset, 2) } static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { UInt32 hash2Value, delta2, maxLen, offset; GET_MATCHES_HEADER(3) HASH3_CALC; delta2 = p->pos - p->hash[hash2Value]; curMatch = p->hash[kFix3HashSize + hashValue]; p->hash[hash2Value] = p->hash[kFix3HashSize + hashValue] = p->pos; maxLen = 2; offset = 0; if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { for (; maxLen != lenLimit; maxLen++) if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) break; distances[0] = maxLen; distances[1] = delta2 - 1; offset = 2; if (maxLen == lenLimit) { SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS_RET; } } GET_MATCHES_FOOTER(offset, maxLen) } static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; GET_MATCHES_HEADER(4) HASH4_CALC; delta2 = p->pos - p->hash[ hash2Value]; delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; curMatch = p->hash[kFix4HashSize + hashValue]; p->hash[ hash2Value] = p->hash[kFix3HashSize + hash3Value] = p->hash[kFix4HashSize + hashValue] = p->pos; maxLen = 1; offset = 0; if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { distances[0] = maxLen = 2; distances[1] = delta2 - 1; offset = 2; } if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) { maxLen = 3; distances[offset + 1] = delta3 - 1; offset += 2; delta2 = delta3; } if (offset != 0) { for (; maxLen != lenLimit; maxLen++) if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) break; distances[offset - 2] = maxLen; if (maxLen == lenLimit) { SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS_RET; } } if (maxLen < 3) maxLen = 3; GET_MATCHES_FOOTER(offset, maxLen) } static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; GET_MATCHES_HEADER(4) HASH4_CALC; delta2 = p->pos - p->hash[ hash2Value]; delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; curMatch = p->hash[kFix4HashSize + hashValue]; p->hash[ hash2Value] = p->hash[kFix3HashSize + hash3Value] = p->hash[kFix4HashSize + hashValue] = p->pos; maxLen = 1; offset = 0; if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { distances[0] = maxLen = 2; distances[1] = delta2 - 1; offset = 2; } if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) { maxLen = 3; distances[offset + 1] = delta3 - 1; offset += 2; delta2 = delta3; } if (offset != 0) { for (; maxLen != lenLimit; maxLen++) if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) break; distances[offset - 2] = maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; MOVE_POS_RET; } } if (maxLen < 3) maxLen = 3; offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), distances + offset, maxLen) - (distances)); MOVE_POS_RET } UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { UInt32 offset; GET_MATCHES_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), distances, 2) - (distances)); MOVE_POS_RET } static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do { SKIP_HEADER(2) HASH2_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; SKIP_FOOTER } while (--num != 0); } void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do { SKIP_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; SKIP_FOOTER } while (--num != 0); } static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do { UInt32 hash2Value; SKIP_HEADER(3) HASH3_CALC; curMatch = p->hash[kFix3HashSize + hashValue]; p->hash[hash2Value] = p->hash[kFix3HashSize + hashValue] = p->pos; SKIP_FOOTER } while (--num != 0); } static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do { UInt32 hash2Value, hash3Value; SKIP_HEADER(4) HASH4_CALC; curMatch = p->hash[kFix4HashSize + hashValue]; p->hash[ hash2Value] = p->hash[kFix3HashSize + hash3Value] = p->pos; p->hash[kFix4HashSize + hashValue] = p->pos; SKIP_FOOTER } while (--num != 0); } static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do { UInt32 hash2Value, hash3Value; SKIP_HEADER(4) HASH4_CALC; curMatch = p->hash[kFix4HashSize + hashValue]; p->hash[ hash2Value] = p->hash[kFix3HashSize + hash3Value] = p->hash[kFix4HashSize + hashValue] = p->pos; p->son[p->cyclicBufferPos] = curMatch; MOVE_POS } while (--num != 0); } void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { do { SKIP_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; p->son[p->cyclicBufferPos] = curMatch; MOVE_POS } while (--num != 0); } void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) { vTable->Init = (Mf_Init_Func)MatchFinder_Init; vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte; vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; if (!p->btMode) { vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; } else if (p->numHashBytes == 2) { vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; } else if (p->numHashBytes == 3) { vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; } else { vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; } } lrzip-0.651/lzma/C/LzFind.h000066400000000000000000000063321421175057200153610ustar00rootroot00000000000000/* LzFind.h -- Match finder for LZ algorithms 2009-04-22 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_H #define __LZ_FIND_H #include "Types.h" #ifdef __cplusplus extern "C" { #endif typedef UInt32 CLzRef; typedef struct _CMatchFinder { Byte *buffer; UInt32 pos; UInt32 posLimit; UInt32 streamPos; UInt32 lenLimit; UInt32 cyclicBufferPos; UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ UInt32 matchMaxLen; CLzRef *hash; CLzRef *son; UInt32 hashMask; UInt32 cutValue; Byte *bufferBase; ISeqInStream *stream; int streamEndWasReached; UInt32 blockSize; UInt32 keepSizeBefore; UInt32 keepSizeAfter; UInt32 numHashBytes; int directInput; size_t directInputRem; int btMode; int bigHash; UInt32 historySize; UInt32 fixedHashSize; UInt32 hashSizeSum; UInt32 numSons; SRes result; UInt32 crc[256]; } CMatchFinder; #define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) #define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)]) #define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) int MatchFinder_NeedMove(CMatchFinder *p); Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); void MatchFinder_MoveBlock(CMatchFinder *p); void MatchFinder_ReadIfRequired(CMatchFinder *p); void MatchFinder_Construct(CMatchFinder *p); /* Conditions: historySize <= 3 GB keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB */ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc *alloc); void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc); void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems); void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, UInt32 *distances, UInt32 maxLen); /* Conditions: Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. Mf_GetPointerToCurrentPos_Func's result must be used only before any other function */ typedef void (*Mf_Init_Func)(void *object); typedef Byte (*Mf_GetIndexByte_Func)(void *object, Int32 index); typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); typedef void (*Mf_Skip_Func)(void *object, UInt32); typedef struct _IMatchFinder { Mf_Init_Func Init; Mf_GetIndexByte_Func GetIndexByte; Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; Mf_GetMatches_Func GetMatches; Mf_Skip_Func Skip; } IMatchFinder; void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); void MatchFinder_Init(CMatchFinder *p); UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); #ifdef __cplusplus } #endif #endif lrzip-0.651/lzma/C/LzFindMt.c000066400000000000000000000533031421175057200156550ustar00rootroot00000000000000/* LzFindMt.c -- multithreaded Match finder for LZ algorithms 2009-09-20 : Igor Pavlov : Public domain */ #include "LzHash.h" #include "LzFindMt.h" #include "lrzip_core.h" void MtSync_Construct(CMtSync *p) { p->wasCreated = False; p->csWasInitialized = False; p->csWasEntered = False; Thread_Construct(&p->thread); Event_Construct(&p->canStart); Event_Construct(&p->wasStarted); Event_Construct(&p->wasStopped); Semaphore_Construct(&p->freeSemaphore); Semaphore_Construct(&p->filledSemaphore); } void MtSync_GetNextBlock(CMtSync *p) { if (p->needStart) { p->numProcessedBlocks = 1; p->needStart = False; p->stopWriting = False; p->exit = False; Event_Reset(&p->wasStarted); Event_Reset(&p->wasStopped); Event_Set(&p->canStart); Event_Wait(&p->wasStarted); } else { CriticalSection_Leave(&p->cs); p->csWasEntered = False; p->numProcessedBlocks++; Semaphore_Release1(&p->freeSemaphore); } Semaphore_Wait(&p->filledSemaphore); CriticalSection_Enter(&p->cs); p->csWasEntered = True; } /* MtSync_StopWriting must be called if Writing was started */ void MtSync_StopWriting(CMtSync *p) { UInt32 myNumBlocks = p->numProcessedBlocks; if (!Thread_WasCreated(&p->thread) || p->needStart) return; p->stopWriting = True; if (p->csWasEntered) { CriticalSection_Leave(&p->cs); p->csWasEntered = False; } Semaphore_Release1(&p->freeSemaphore); Event_Wait(&p->wasStopped); while (myNumBlocks++ != p->numProcessedBlocks) { Semaphore_Wait(&p->filledSemaphore); Semaphore_Release1(&p->freeSemaphore); } p->needStart = True; } void MtSync_Destruct(CMtSync *p) { if (Thread_WasCreated(&p->thread)) { MtSync_StopWriting(p); p->exit = True; if (p->needStart) Event_Set(&p->canStart); Thread_Wait(&p->thread); Thread_Close(&p->thread); } if (p->csWasInitialized) { CriticalSection_Delete(&p->cs); p->csWasInitialized = False; } Event_Close(&p->canStart); Event_Close(&p->wasStarted); Event_Close(&p->wasStopped); Semaphore_Close(&p->freeSemaphore); Semaphore_Close(&p->filledSemaphore); p->wasCreated = False; } #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } static SRes MtSync_Create2(CMtSync *p, unsigned (MY_STD_CALL *startAddress)(void *), void *obj, UInt32 numBlocks) { if (p->wasCreated) return SZ_OK; RINOK_THREAD(CriticalSection_Init(&p->cs)); p->csWasInitialized = True; RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks)); RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); p->needStart = True; RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); p->wasCreated = True; return SZ_OK; } static SRes MtSync_Create(CMtSync *p, unsigned (MY_STD_CALL *startAddress)(void *), void *obj, UInt32 numBlocks) { SRes res = MtSync_Create2(p, startAddress, obj, numBlocks); if (res != SZ_OK) MtSync_Destruct(p); return res; } void MtSync_Init(CMtSync *p) { p->needStart = True; } #define kMtMaxValForNormalize 0xFFFFFFFF #define DEF_GetHeads2(name, v, action) \ static void GetHeads ## name(const Byte *p, UInt32 pos, \ UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \ { action; for (; numHeads != 0; numHeads--) { \ const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++; } } #define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), hashMask = hashMask; crc = crc; ) DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask) DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask) DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask) /* DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */ void HashThreadFunc(CMatchFinderMt *mt) { CMtSync *p = &mt->hashSync; for (;;) { UInt32 numProcessedBlocks = 0; Event_Wait(&p->canStart); Event_Set(&p->wasStarted); for (;;) { if (p->exit) return; if (p->stopWriting) { p->numProcessedBlocks = numProcessedBlocks; Event_Set(&p->wasStopped); break; } { CMatchFinder *mf = mt->MatchFinder; if (MatchFinder_NeedMove(mf)) { CriticalSection_Enter(&mt->btSync.cs); CriticalSection_Enter(&mt->hashSync.cs); { const Byte *beforePtr = MatchFinder_GetPointerToCurrentPos(mf); const Byte *afterPtr; MatchFinder_MoveBlock(mf); afterPtr = MatchFinder_GetPointerToCurrentPos(mf); mt->pointerToCurPos -= beforePtr - afterPtr; mt->buffer -= beforePtr - afterPtr; } CriticalSection_Leave(&mt->btSync.cs); CriticalSection_Leave(&mt->hashSync.cs); continue; } Semaphore_Wait(&p->freeSemaphore); MatchFinder_ReadIfRequired(mf); if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize)) { UInt32 subValue = (mf->pos - mf->historySize - 1); MatchFinder_ReduceOffsets(mf, subValue); MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, mf->hashMask + 1); } { UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize; UInt32 num = mf->streamPos - mf->pos; heads[0] = 2; heads[1] = num; if (num >= mf->numHashBytes) { num = num - mf->numHashBytes + 1; if (num > kMtHashBlockSize - 2) num = kMtHashBlockSize - 2; mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); heads[0] += num; } mf->pos += num; mf->buffer += num; } } Semaphore_Release1(&p->filledSemaphore); } } } void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p) { MtSync_GetNextBlock(&p->hashSync); p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize; p->hashBufPosLimit += p->hashBuf[p->hashBufPos++]; p->hashNumAvail = p->hashBuf[p->hashBufPos++]; } #define kEmptyHashValue 0 /* #define MFMT_GM_INLINE */ #ifdef MFMT_GM_INLINE #define NO_INLINE MY_FAST_CALL Int32 NO_INLINE GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son, UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, UInt32 *_distances, UInt32 _maxLen, const UInt32 *hash, Int32 limit, UInt32 size, UInt32 *posRes) { do { UInt32 *distances = _distances + 1; UInt32 curMatch = pos - *hash++; CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; CLzRef *ptr1 = son + (_cyclicBufferPos << 1); UInt32 len0 = 0, len1 = 0; UInt32 cutValue = _cutValue; UInt32 maxLen = _maxLen; for (;;) { UInt32 delta = pos - curMatch; if (cutValue-- == 0 || delta >= _cyclicBufferSize) { *ptr0 = *ptr1 = kEmptyHashValue; break; } { CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; UInt32 len = (len0 < len1 ? len0 : len1); if (pb[len] == cur[len]) { if (++len != lenLimit && pb[len] == cur[len]) while (++len != lenLimit) if (pb[len] != cur[len]) break; if (maxLen < len) { *distances++ = maxLen = len; *distances++ = delta - 1; if (len == lenLimit) { *ptr1 = pair[0]; *ptr0 = pair[1]; break; } } } if (pb[len] < cur[len]) { *ptr1 = curMatch; ptr1 = pair + 1; curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; ptr0 = pair; curMatch = *ptr0; len0 = len; } } } pos++; _cyclicBufferPos++; cur++; { UInt32 num = (UInt32)(distances - _distances); *_distances = num - 1; _distances += num; limit -= num; } } while (limit > 0 && --size != 0); *posRes = pos; return limit; } #endif void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) { UInt32 numProcessed = 0; UInt32 curPos = 2; UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); distances[1] = p->hashNumAvail; while (curPos < limit) { if (p->hashBufPos == p->hashBufPosLimit) { MatchFinderMt_GetNextBlock_Hash(p); distances[1] = numProcessed + p->hashNumAvail; if (p->hashNumAvail >= p->numHashBytes) continue; for (; p->hashNumAvail != 0; p->hashNumAvail--) distances[curPos++] = 0; break; } { UInt32 size = p->hashBufPosLimit - p->hashBufPos; UInt32 lenLimit = p->matchMaxLen; UInt32 pos = p->pos; UInt32 cyclicBufferPos = p->cyclicBufferPos; if (lenLimit >= p->hashNumAvail) lenLimit = p->hashNumAvail; { UInt32 size2 = p->hashNumAvail - lenLimit + 1; if (size2 < size) size = size2; size2 = p->cyclicBufferSize - cyclicBufferPos; if (size2 < size) size = size2; } #ifndef MFMT_GM_INLINE while (curPos < limit && size-- != 0) { UInt32 *startDistances = distances + curPos; UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, startDistances + 1, p->numHashBytes - 1) - startDistances); *startDistances = num - 1; curPos += num; cyclicBufferPos++; pos++; p->buffer++; } #else { UInt32 posRes; curPos = limit - GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, (Int32)(limit - curPos) , size, &posRes); p->hashBufPos += posRes - pos; cyclicBufferPos += posRes - pos; p->buffer += posRes - pos; pos = posRes; } #endif numProcessed += pos - p->pos; p->hashNumAvail -= pos - p->pos; p->pos = pos; if (cyclicBufferPos == p->cyclicBufferSize) cyclicBufferPos = 0; p->cyclicBufferPos = cyclicBufferPos; } } distances[0] = curPos; } void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) { CMtSync *sync = &p->hashSync; if (!sync->needStart) { CriticalSection_Enter(&sync->cs); sync->csWasEntered = True; } BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize); if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize) { UInt32 subValue = p->pos - p->cyclicBufferSize; MatchFinder_Normalize3(subValue, p->son, p->cyclicBufferSize * 2); p->pos -= subValue; } if (!sync->needStart) { CriticalSection_Leave(&sync->cs); sync->csWasEntered = False; } } void BtThreadFunc(CMatchFinderMt *mt) { CMtSync *p = &mt->btSync; for (;;) { UInt32 blockIndex = 0; Event_Wait(&p->canStart); Event_Set(&p->wasStarted); for (;;) { if (p->exit) return; if (p->stopWriting) { p->numProcessedBlocks = blockIndex; MtSync_StopWriting(&mt->hashSync); Event_Set(&p->wasStopped); break; } Semaphore_Wait(&p->freeSemaphore); BtFillBlock(mt, blockIndex++); Semaphore_Release1(&p->filledSemaphore); } } } void MatchFinderMt_Construct(CMatchFinderMt *p) { p->hashBuf = 0; MtSync_Construct(&p->hashSync); MtSync_Construct(&p->btSync); } void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAlloc *alloc) { alloc->Free(alloc, p->hashBuf); p->hashBuf = 0; } void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAlloc *alloc) { MtSync_Destruct(&p->hashSync); MtSync_Destruct(&p->btSync); MatchFinderMt_FreeMem(p, alloc); } #define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) #define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) static unsigned MY_STD_CALL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } static unsigned MY_STD_CALL BtThreadFunc2(void *p) { __maybe_unused Byte allocaDummy[0x180]; int i = 0; for (i = 0; i < 16; i++) allocaDummy[i] = (Byte)i; BtThreadFunc((CMatchFinderMt *)p); return 0; } SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc *alloc) { CMatchFinder *mf = p->MatchFinder; p->historySize = historySize; if (kMtBtBlockSize <= matchMaxLen * 4) return SZ_ERROR_PARAM; if (p->hashBuf == 0) { p->hashBuf = (UInt32 *)alloc->Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); if (p->hashBuf == 0) return SZ_ERROR_MEM; p->btBuf = p->hashBuf + kHashBufferSize; } keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); keepAddBufferAfter += kMtHashBlockSize; if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) return SZ_ERROR_MEM; RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks)); RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks)); return SZ_OK; } /* Call it after ReleaseStream / SetStream */ void MatchFinderMt_Init(CMatchFinderMt *p) { CMatchFinder *mf = p->MatchFinder; p->btBufPos = p->btBufPosLimit = 0; p->hashBufPos = p->hashBufPosLimit = 0; MatchFinder_Init(mf); p->pointerToCurPos = MatchFinder_GetPointerToCurrentPos(mf); p->btNumAvailBytes = 0; p->lzPos = p->historySize + 1; p->hash = mf->hash; p->fixedHashSize = mf->fixedHashSize; p->crc = mf->crc; p->son = mf->son; p->matchMaxLen = mf->matchMaxLen; p->numHashBytes = mf->numHashBytes; p->pos = mf->pos; p->buffer = mf->buffer; p->cyclicBufferPos = mf->cyclicBufferPos; p->cyclicBufferSize = mf->cyclicBufferSize; p->cutValue = mf->cutValue; } /* ReleaseStream is required to finish multithreading */ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) { MtSync_StopWriting(&p->btSync); /* p->MatchFinder->ReleaseStream(); */ } void MatchFinderMt_Normalize(CMatchFinderMt *p) { MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); p->lzPos = p->historySize + 1; } void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) { UInt32 blockIndex; MtSync_GetNextBlock(&p->btSync); blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize; p->btBufPosLimit += p->btBuf[p->btBufPos++]; p->btNumAvailBytes = p->btBuf[p->btBufPos++]; if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) MatchFinderMt_Normalize(p); } const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) { return p->pointerToCurPos; } #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) { GET_NEXT_BLOCK_IF_REQUIRED; return p->btNumAvailBytes; } Byte MatchFinderMt_GetIndexByte(CMatchFinderMt *p, Int32 index) { return p->pointerToCurPos[index]; } UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) { UInt32 hash2Value, curMatch2; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; UInt32 lzPos = p->lzPos; MT_HASH2_CALC curMatch2 = hash[hash2Value]; hash[hash2Value] = lzPos; if (curMatch2 >= matchMinPos) if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { *distances++ = 2; *distances++ = lzPos - curMatch2 - 1; } return distances; } UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) { UInt32 hash2Value, hash3Value, curMatch2, curMatch3; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; UInt32 lzPos = p->lzPos; MT_HASH3_CALC curMatch2 = hash[ hash2Value]; curMatch3 = hash[kFix3HashSize + hash3Value]; hash[ hash2Value] = hash[kFix3HashSize + hash3Value] = lzPos; if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { distances[1] = lzPos - curMatch2 - 1; if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) { distances[0] = 3; return distances + 2; } distances[0] = 2; distances += 2; } if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) { *distances++ = 3; *distances++ = lzPos - curMatch3 - 1; } return distances; } /* UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) { UInt32 hash2Value, hash3Value, hash4Value, curMatch2, curMatch3, curMatch4; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; UInt32 lzPos = p->lzPos; MT_HASH4_CALC curMatch2 = hash[ hash2Value]; curMatch3 = hash[kFix3HashSize + hash3Value]; curMatch4 = hash[kFix4HashSize + hash4Value]; hash[ hash2Value] = hash[kFix3HashSize + hash3Value] = hash[kFix4HashSize + hash4Value] = lzPos; if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { distances[1] = lzPos - curMatch2 - 1; if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) { distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3; return distances + 2; } distances[0] = 2; distances += 2; } if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) { distances[1] = lzPos - curMatch3 - 1; if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3]) { distances[0] = 4; return distances + 2; } distances[0] = 3; distances += 2; } if (curMatch4 >= matchMinPos) if ( cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] && cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3] ) { *distances++ = 4; *distances++ = lzPos - curMatch4 - 1; } return distances; } */ #define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances) { const UInt32 *btBuf = p->btBuf + p->btBufPos; UInt32 len = *btBuf++; p->btBufPos += 1 + len; p->btNumAvailBytes--; { UInt32 i; for (i = 0; i < len; i += 2) { *distances++ = *btBuf++; *distances++ = *btBuf++; } } INCREASE_LZ_POS return len; } UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances) { const UInt32 *btBuf = p->btBuf + p->btBufPos; UInt32 len = *btBuf++; p->btBufPos += 1 + len; if (len == 0) { if (p->btNumAvailBytes-- >= 4) len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances)); } else { /* Condition: there are matches in btBuf with length < p->numHashBytes */ UInt32 *distances2; p->btNumAvailBytes--; distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); do { *distances2++ = *btBuf++; *distances2++ = *btBuf++; } while ((len -= 2) != 0); len = (UInt32)(distances2 - (distances)); } INCREASE_LZ_POS return len; } #define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; #define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0); void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) { SKIP_HEADER2_MT { p->btNumAvailBytes--; SKIP_FOOTER_MT } void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) { SKIP_HEADER_MT(2) UInt32 hash2Value; MT_HASH2_CALC hash[hash2Value] = p->lzPos; SKIP_FOOTER_MT } void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) { SKIP_HEADER_MT(3) UInt32 hash2Value, hash3Value; MT_HASH3_CALC hash[kFix3HashSize + hash3Value] = hash[ hash2Value] = p->lzPos; SKIP_FOOTER_MT } /* void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) { SKIP_HEADER_MT(4) UInt32 hash2Value, hash3Value, hash4Value; MT_HASH4_CALC hash[kFix4HashSize + hash4Value] = hash[kFix3HashSize + hash3Value] = hash[ hash2Value] = p->lzPos; SKIP_FOOTER_MT } */ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) { vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinderMt_GetIndexByte; vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; switch(p->MatchFinder->numHashBytes) { case 2: p->GetHeadsFunc = GetHeads2; p->MixMatchesFunc = (Mf_Mix_Matches)0; vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; break; case 3: p->GetHeadsFunc = GetHeads3; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; break; default: /* case 4: */ p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; /* p->GetHeadsFunc = GetHeads4; */ p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; break; /* default: p->GetHeadsFunc = GetHeads5; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; break; */ } } lrzip-0.651/lzma/C/LzFindMt.h000066400000000000000000000046111421175057200156600ustar00rootroot00000000000000/* LzFindMt.h -- multithreaded Match finder for LZ algorithms 2009-02-07 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_MT_H #define __LZ_FIND_MT_H #include "LzFind.h" #include "Threads.h" #ifdef __cplusplus extern "C" { #endif #define kMtHashBlockSize (1 << 13) #define kMtHashNumBlocks (1 << 3) #define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) #define kMtBtBlockSize (1 << 14) #define kMtBtNumBlocks (1 << 6) #define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) typedef struct _CMtSync { Bool wasCreated; Bool needStart; Bool exit; Bool stopWriting; CThread thread; CAutoResetEvent canStart; CAutoResetEvent wasStarted; CAutoResetEvent wasStopped; CSemaphore freeSemaphore; CSemaphore filledSemaphore; Bool csWasInitialized; Bool csWasEntered; CCriticalSection cs; UInt32 numProcessedBlocks; } CMtSync; typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); /* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ #define kMtCacheLineDummy 128 typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); typedef struct _CMatchFinderMt { /* LZ */ const Byte *pointerToCurPos; UInt32 *btBuf; UInt32 btBufPos; UInt32 btBufPosLimit; UInt32 lzPos; UInt32 btNumAvailBytes; UInt32 *hash; UInt32 fixedHashSize; UInt32 historySize; const UInt32 *crc; Mf_Mix_Matches MixMatchesFunc; /* LZ + BT */ CMtSync btSync; Byte btDummy[kMtCacheLineDummy]; /* BT */ UInt32 *hashBuf; UInt32 hashBufPos; UInt32 hashBufPosLimit; UInt32 hashNumAvail; CLzRef *son; UInt32 matchMaxLen; UInt32 numHashBytes; UInt32 pos; Byte *buffer; UInt32 cyclicBufferPos; UInt32 cyclicBufferSize; /* it must be historySize + 1 */ UInt32 cutValue; /* BT + Hash */ CMtSync hashSync; /* Byte hashDummy[kMtCacheLineDummy]; */ /* Hash */ Mf_GetHeads GetHeadsFunc; CMatchFinder *MatchFinder; } CMatchFinderMt; void MatchFinderMt_Construct(CMatchFinderMt *p); void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAlloc *alloc); SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc *alloc); void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable); void MatchFinderMt_ReleaseStream(CMatchFinderMt *p); #ifdef __cplusplus } #endif #endif lrzip-0.651/lzma/C/LzHash.h000066400000000000000000000036541421175057200153700ustar00rootroot00000000000000/* LzHash.h -- HASH functions for LZ algorithms 2009-02-07 : Igor Pavlov : Public domain */ #ifndef __LZ_HASH_H #define __LZ_HASH_H #define kHash2Size (1 << 10) #define kHash3Size (1 << 16) #define kHash4Size (1 << 20) #define kFix3HashSize (kHash2Size) #define kFix4HashSize (kHash2Size + kHash3Size) #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) #define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8); #define HASH3_CALC { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ hash2Value = temp & (kHash2Size - 1); \ hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } #define HASH4_CALC { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ hash2Value = temp & (kHash2Size - 1); \ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; } #define HASH5_CALC { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ hash2Value = temp & (kHash2Size - 1); \ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \ hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \ hash4Value &= (kHash4Size - 1); } /* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ #define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; #define MT_HASH2_CALC \ hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); #define MT_HASH3_CALC { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ hash2Value = temp & (kHash2Size - 1); \ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } #define MT_HASH4_CALC { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ hash2Value = temp & (kHash2Size - 1); \ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } #endif lrzip-0.651/lzma/C/LzmaDec.c000066400000000000000000000650351421175057200155120ustar00rootroot00000000000000/* LzmaDec.c -- LZMA Decoder 2009-09-20 : Igor Pavlov : Public domain */ #include "LzmaDec.h" #include #define kNumTopBits 24 #define kTopValue ((UInt32)1 << kNumTopBits) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5 #define RC_INIT_SIZE 5 #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } #define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ { UPDATE_0(p); i = (i + i); A0; } else \ { UPDATE_1(p); i = (i + i) + 1; A1; } #define GET_BIT(p, i) GET_BIT2(p, i, ; , ;) #define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); } #define TREE_DECODE(probs, limit, i) \ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } /* #define _LZMA_SIZE_OPT */ #ifdef _LZMA_SIZE_OPT #define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) #else #define TREE_6_DECODE(probs, i) \ { i = 1; \ TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i); \ i -= 0x40; } #endif #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } #define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) #define UPDATE_0_CHECK range = bound; #define UPDATE_1_CHECK range -= bound; code -= bound; #define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ { UPDATE_0_CHECK; i = (i + i); A0; } else \ { UPDATE_1_CHECK; i = (i + i) + 1; A1; } #define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) #define TREE_DECODE_CHECK(probs, limit, i) \ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } #define kNumPosBitsMax 4 #define kNumPosStatesMax (1 << kNumPosBitsMax) #define kLenNumLowBits 3 #define kLenNumLowSymbols (1 << kLenNumLowBits) #define kLenNumMidBits 3 #define kLenNumMidSymbols (1 << kLenNumMidBits) #define kLenNumHighBits 8 #define kLenNumHighSymbols (1 << kLenNumHighBits) #define LenChoice 0 #define LenChoice2 (LenChoice + 1) #define LenLow (LenChoice2 + 1) #define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) #define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) #define kNumLenProbs (LenHigh + kLenNumHighSymbols) #define kNumStates 12 #define kNumLitStates 7 #define kStartPosModelIndex 4 #define kEndPosModelIndex 14 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) #define kNumPosSlotBits 6 #define kNumLenToPosStates 4 #define kNumAlignBits 4 #define kAlignTableSize (1 << kNumAlignBits) #define kMatchMinLen 2 #define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) #define IsMatch 0 #define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) #define IsRepG0 (IsRep + kNumStates) #define IsRepG1 (IsRepG0 + kNumStates) #define IsRepG2 (IsRepG1 + kNumStates) #define IsRep0Long (IsRepG2 + kNumStates) #define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) #define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) #define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) #define LenCoder (Align + kAlignTableSize) #define RepLenCoder (LenCoder + kNumLenProbs) #define Literal (RepLenCoder + kNumLenProbs) #define LZMA_BASE_SIZE 1846 #define LZMA_LIT_SIZE 768 #define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) #if Literal != LZMA_BASE_SIZE StopCompilingDueBUG #endif #define LZMA_DIC_MIN (1 << 12) /* First LZMA-symbol is always decoded. And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization Out: Result: SZ_OK - OK SZ_ERROR_DATA - Error p->remainLen: < kMatchSpecLenStart : normal remain = kMatchSpecLenStart : finished = kMatchSpecLenStart + 1 : Flush marker = kMatchSpecLenStart + 2 : State Init Marker */ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { CLzmaProb *probs = p->probs; unsigned state = p->state; UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; unsigned lc = p->prop.lc; Byte *dic = p->dic; SizeT dicBufSize = p->dicBufSize; SizeT dicPos = p->dicPos; UInt32 processedPos = p->processedPos; UInt32 checkDicSize = p->checkDicSize; unsigned len = 0; const Byte *buf = p->buf; UInt32 range = p->range; UInt32 code = p->code; do { CLzmaProb *prob; UInt32 bound; unsigned ttt; unsigned posState = processedPos & pbMask; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; IF_BIT_0(prob) { unsigned symbol; UPDATE_0(prob); prob = probs + Literal; if (checkDicSize != 0 || processedPos != 0) prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); if (state < kNumLitStates) { state -= (state < 4) ? state : 3; symbol = 1; do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); } else { unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; unsigned offs = 0x100; state -= (state < 10) ? 3 : 6; symbol = 1; do { unsigned bit; CLzmaProb *probLit; matchByte <<= 1; bit = (matchByte & offs); probLit = prob + offs + bit + symbol; GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) } while (symbol < 0x100); } dic[dicPos++] = (Byte)symbol; processedPos++; continue; } else { UPDATE_1(prob); prob = probs + IsRep + state; IF_BIT_0(prob) { UPDATE_0(prob); state += kNumStates; prob = probs + LenCoder; } else { UPDATE_1(prob); if (checkDicSize == 0 && processedPos == 0) return SZ_ERROR_DATA; prob = probs + IsRepG0 + state; IF_BIT_0(prob) { UPDATE_0(prob); prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; IF_BIT_0(prob) { UPDATE_0(prob); dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; dicPos++; processedPos++; state = state < kNumLitStates ? 9 : 11; continue; } UPDATE_1(prob); } else { UInt32 distance; UPDATE_1(prob); prob = probs + IsRepG1 + state; IF_BIT_0(prob) { UPDATE_0(prob); distance = rep1; } else { UPDATE_1(prob); prob = probs + IsRepG2 + state; IF_BIT_0(prob) { UPDATE_0(prob); distance = rep2; } else { UPDATE_1(prob); distance = rep3; rep3 = rep2; } rep2 = rep1; } rep1 = rep0; rep0 = distance; } state = state < kNumLitStates ? 8 : 11; prob = probs + RepLenCoder; } { unsigned limit, offset; CLzmaProb *probLen = prob + LenChoice; IF_BIT_0(probLen) { UPDATE_0(probLen); probLen = prob + LenLow + (posState << kLenNumLowBits); offset = 0; limit = (1 << kLenNumLowBits); } else { UPDATE_1(probLen); probLen = prob + LenChoice2; IF_BIT_0(probLen) { UPDATE_0(probLen); probLen = prob + LenMid + (posState << kLenNumMidBits); offset = kLenNumLowSymbols; limit = (1 << kLenNumMidBits); } else { UPDATE_1(probLen); probLen = prob + LenHigh; offset = kLenNumLowSymbols + kLenNumMidSymbols; limit = (1 << kLenNumHighBits); } } TREE_DECODE(probLen, limit, len); len += offset; } if (state >= kNumStates) { UInt32 distance; prob = probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); TREE_6_DECODE(prob, distance); if (distance >= kStartPosModelIndex) { unsigned posSlot = (unsigned)distance; int numDirectBits = (int)(((distance >> 1) - 1)); distance = (2 | (distance & 1)); if (posSlot < kEndPosModelIndex) { distance <<= numDirectBits; prob = probs + SpecPos + distance - posSlot - 1; { UInt32 mask = 1; unsigned i = 1; do { GET_BIT2(prob + i, i, ; , distance |= mask); mask <<= 1; } while (--numDirectBits != 0); } } else { numDirectBits -= kNumAlignBits; do { NORMALIZE range >>= 1; { UInt32 t; code -= range; t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ distance = (distance << 1) + (t + 1); code += range & t; } /* distance <<= 1; if (code >= range) { code -= range; distance |= 1; } */ } while (--numDirectBits != 0); prob = probs + Align; distance <<= kNumAlignBits; { unsigned i = 1; GET_BIT2(prob + i, i, ; , distance |= 1); GET_BIT2(prob + i, i, ; , distance |= 2); GET_BIT2(prob + i, i, ; , distance |= 4); GET_BIT2(prob + i, i, ; , distance |= 8); } if (distance == (UInt32)0xFFFFFFFF) { len += kMatchSpecLenStart; state -= kNumStates; break; } } } rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance + 1; if (checkDicSize == 0) { if (distance >= processedPos) return SZ_ERROR_DATA; } else if (distance >= checkDicSize) return SZ_ERROR_DATA; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; } len += kMatchMinLen; if (limit == dicPos) return SZ_ERROR_DATA; { SizeT rem = limit - dicPos; unsigned curLen = ((rem < len) ? (unsigned)rem : len); SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); processedPos += curLen; len -= curLen; if (pos + curLen <= dicBufSize) { Byte *dest = dic + dicPos; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; const Byte *lim = dest + curLen; dicPos += curLen; do *(dest) = (Byte)*(dest + src); while (++dest != lim); } else { do { dic[dicPos++] = dic[pos]; if (++pos == dicBufSize) pos = 0; } while (--curLen != 0); } } } } while (dicPos < limit && buf < bufLimit); NORMALIZE; p->buf = buf; p->range = range; p->code = code; p->remainLen = len; p->dicPos = dicPos; p->processedPos = processedPos; p->reps[0] = rep0; p->reps[1] = rep1; p->reps[2] = rep2; p->reps[3] = rep3; p->state = state; return SZ_OK; } static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) { if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) { Byte *dic = p->dic; SizeT dicPos = p->dicPos; SizeT dicBufSize = p->dicBufSize; unsigned len = p->remainLen; UInt32 rep0 = p->reps[0]; if (limit - dicPos < len) len = (unsigned)(limit - dicPos); if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) p->checkDicSize = p->prop.dicSize; p->processedPos += len; p->remainLen -= len; while (len-- != 0) { dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; dicPos++; } p->dicPos = dicPos; } } static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { do { SizeT limit2 = limit; if (p->checkDicSize == 0) { UInt32 rem = p->prop.dicSize - p->processedPos; if (limit - p->dicPos > rem) limit2 = p->dicPos + rem; } RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); if (p->processedPos >= p->prop.dicSize) p->checkDicSize = p->prop.dicSize; LzmaDec_WriteRem(p, limit); } while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); if (p->remainLen > kMatchSpecLenStart) { p->remainLen = kMatchSpecLenStart; } return 0; } typedef enum { DUMMY_ERROR, /* unexpected end of input stream */ DUMMY_LIT, DUMMY_MATCH, DUMMY_REP } ELzmaDummy; static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) { UInt32 range = p->range; UInt32 code = p->code; const Byte *bufLimit = buf + inSize; CLzmaProb *probs = p->probs; unsigned state = p->state; ELzmaDummy res; { CLzmaProb *prob; UInt32 bound; unsigned ttt; unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ prob = probs + Literal; if (p->checkDicSize != 0 || p->processedPos != 0) prob += (LZMA_LIT_SIZE * ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); if (state < kNumLitStates) { unsigned symbol = 1; do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); } else { unsigned matchByte = p->dic[p->dicPos - p->reps[0] + ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; unsigned offs = 0x100; unsigned symbol = 1; do { unsigned bit; CLzmaProb *probLit; matchByte <<= 1; bit = (matchByte & offs); probLit = prob + offs + bit + symbol; GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) } while (symbol < 0x100); } res = DUMMY_LIT; } else { unsigned len; UPDATE_1_CHECK; prob = probs + IsRep + state; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; state = 0; prob = probs + LenCoder; res = DUMMY_MATCH; } else { UPDATE_1_CHECK; res = DUMMY_REP; prob = probs + IsRepG0 + state; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; NORMALIZE_CHECK; return DUMMY_REP; } else { UPDATE_1_CHECK; } } else { UPDATE_1_CHECK; prob = probs + IsRepG1 + state; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; } else { UPDATE_1_CHECK; prob = probs + IsRepG2 + state; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; } else { UPDATE_1_CHECK; } } } state = kNumStates; prob = probs + RepLenCoder; } { unsigned limit, offset; CLzmaProb *probLen = prob + LenChoice; IF_BIT_0_CHECK(probLen) { UPDATE_0_CHECK; probLen = prob + LenLow + (posState << kLenNumLowBits); offset = 0; limit = 1 << kLenNumLowBits; } else { UPDATE_1_CHECK; probLen = prob + LenChoice2; IF_BIT_0_CHECK(probLen) { UPDATE_0_CHECK; probLen = prob + LenMid + (posState << kLenNumMidBits); offset = kLenNumLowSymbols; limit = 1 << kLenNumMidBits; } else { UPDATE_1_CHECK; probLen = prob + LenHigh; offset = kLenNumLowSymbols + kLenNumMidSymbols; limit = 1 << kLenNumHighBits; } } TREE_DECODE_CHECK(probLen, limit, len); len += offset; } if (state < 4) { unsigned posSlot; prob = probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); if (posSlot >= kStartPosModelIndex) { int numDirectBits = ((posSlot >> 1) - 1); /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ if (posSlot < kEndPosModelIndex) { prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; } else { numDirectBits -= kNumAlignBits; do { NORMALIZE_CHECK range >>= 1; code -= range & (((code - range) >> 31) - 1); /* if (code >= range) code -= range; */ } while (--numDirectBits != 0); prob = probs + Align; numDirectBits = kNumAlignBits; } { unsigned i = 1; do { GET_BIT_CHECK(prob + i, i); } while (--numDirectBits != 0); } } } } } NORMALIZE_CHECK; return res; } static void LzmaDec_InitRc(CLzmaDec *p, const Byte *data) { p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]); p->range = 0xFFFFFFFF; p->needFlush = 0; } void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState) { p->needFlush = 1; p->remainLen = 0; p->tempBufSize = 0; if (initDic) { p->processedPos = 0; p->checkDicSize = 0; p->needInitState = 1; } if (initState) p->needInitState = 1; } void LzmaDec_Init(CLzmaDec *p) { p->dicPos = 0; LzmaDec_InitDicAndState(p, True, True); } static void LzmaDec_InitStateReal(CLzmaDec *p) { UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); UInt32 i; CLzmaProb *probs = p->probs; for (i = 0; i < numProbs; i++) probs[i] = kBitModelTotal >> 1; p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; p->state = 0; p->needInitState = 0; } SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT inSize = *srcLen; (*srcLen) = 0; LzmaDec_WriteRem(p, dicLimit); *status = LZMA_STATUS_NOT_SPECIFIED; while (p->remainLen != kMatchSpecLenStart) { int checkEndMarkNow; if (p->needFlush != 0) { for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) p->tempBuf[p->tempBufSize++] = *src++; if (p->tempBufSize < RC_INIT_SIZE) { *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } if (p->tempBuf[0] != 0) return SZ_ERROR_DATA; LzmaDec_InitRc(p, p->tempBuf); p->tempBufSize = 0; } checkEndMarkNow = 0; if (p->dicPos >= dicLimit) { if (p->remainLen == 0 && p->code == 0) { *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; return SZ_OK; } if (finishMode == LZMA_FINISH_ANY) { *status = LZMA_STATUS_NOT_FINISHED; return SZ_OK; } if (p->remainLen != 0) { *status = LZMA_STATUS_NOT_FINISHED; return SZ_ERROR_DATA; } checkEndMarkNow = 1; } if (p->needInitState) LzmaDec_InitStateReal(p); if (p->tempBufSize == 0) { SizeT processed; const Byte *bufLimit; if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { int dummyRes = LzmaDec_TryDummy(p, src, inSize); if (dummyRes == DUMMY_ERROR) { memcpy(p->tempBuf, src, inSize); p->tempBufSize = (unsigned)inSize; (*srcLen) += inSize; *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { *status = LZMA_STATUS_NOT_FINISHED; return SZ_ERROR_DATA; } bufLimit = src; } else bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; p->buf = src; if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) return SZ_ERROR_DATA; processed = (SizeT)(p->buf - src); (*srcLen) += processed; src += processed; inSize -= processed; } else { unsigned rem = p->tempBufSize, lookAhead = 0; while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) p->tempBuf[rem++] = src[lookAhead++]; p->tempBufSize = rem; if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); if (dummyRes == DUMMY_ERROR) { (*srcLen) += lookAhead; *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { *status = LZMA_STATUS_NOT_FINISHED; return SZ_ERROR_DATA; } } p->buf = p->tempBuf; if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) return SZ_ERROR_DATA; lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); (*srcLen) += lookAhead; src += lookAhead; inSize -= lookAhead; p->tempBufSize = 0; } } if (p->code == 0) *status = LZMA_STATUS_FINISHED_WITH_MARK; return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; } SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT outSize = *destLen; SizeT inSize = *srcLen; *srcLen = *destLen = 0; for (;;) { SizeT inSizeCur = inSize, outSizeCur, dicPos; ELzmaFinishMode curFinishMode; SRes res; if (p->dicPos == p->dicBufSize) p->dicPos = 0; dicPos = p->dicPos; if (outSize > p->dicBufSize - dicPos) { outSizeCur = p->dicBufSize; curFinishMode = LZMA_FINISH_ANY; } else { outSizeCur = dicPos + outSize; curFinishMode = finishMode; } res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); src += inSizeCur; inSize -= inSizeCur; *srcLen += inSizeCur; outSizeCur = p->dicPos - dicPos; memcpy(dest, p->dic + dicPos, outSizeCur); dest += outSizeCur; outSize -= outSizeCur; *destLen += outSizeCur; if (res != 0) return res; if (outSizeCur == 0 || outSize == 0) return SZ_OK; } } void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) { alloc->Free(alloc, p->probs); p->probs = 0; } static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc) { alloc->Free(alloc, p->dic); p->dic = 0; } void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc) { LzmaDec_FreeProbs(p, alloc); LzmaDec_FreeDict(p, alloc); } SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) { UInt32 dicSize; Byte d; if (size < LZMA_PROPS_SIZE) return SZ_ERROR_UNSUPPORTED; else dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); if (dicSize < LZMA_DIC_MIN) dicSize = LZMA_DIC_MIN; p->dicSize = dicSize; d = data[0]; if (d >= (9 * 5 * 5)) return SZ_ERROR_UNSUPPORTED; p->lc = d % 9; d /= 9; p->pb = d / 5; p->lp = d % 5; return SZ_OK; } static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc) { UInt32 numProbs = LzmaProps_GetNumProbs(propNew); if (p->probs == 0 || numProbs != p->numProbs) { LzmaDec_FreeProbs(p, alloc); p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); p->numProbs = numProbs; if (p->probs == 0) return SZ_ERROR_MEM; } return SZ_OK; } SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) { CLzmaProps propNew; RINOK(LzmaProps_Decode(&propNew, props, propsSize)); RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); p->prop = propNew; return SZ_OK; } SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) { CLzmaProps propNew; SizeT dicBufSize; RINOK(LzmaProps_Decode(&propNew, props, propsSize)); RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); dicBufSize = propNew.dicSize; if (p->dic == 0 || dicBufSize != p->dicBufSize) { LzmaDec_FreeDict(p, alloc); p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize); if (p->dic == 0) { LzmaDec_FreeProbs(p, alloc); return SZ_ERROR_MEM; } } p->dicBufSize = dicBufSize; p->prop = propNew; return SZ_OK; } SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc) { CLzmaDec p; SRes res; SizeT inSize = *srcLen; SizeT outSize = *destLen; *srcLen = *destLen = 0; if (inSize < RC_INIT_SIZE) return SZ_ERROR_INPUT_EOF; LzmaDec_Construct(&p); res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc); if (res != 0) return res; p.dic = dest; p.dicBufSize = outSize; LzmaDec_Init(&p); *srcLen = inSize; res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) res = SZ_ERROR_INPUT_EOF; (*destLen) = p.dicPos; LzmaDec_FreeProbs(&p, alloc); return res; } lrzip-0.651/lzma/C/LzmaDec.h000066400000000000000000000153211421175057200155100ustar00rootroot00000000000000/* LzmaDec.h -- LZMA Decoder 2009-02-07 : Igor Pavlov : Public domain */ #ifndef __LZMA_DEC_H #define __LZMA_DEC_H #include "Types.h" #ifdef __cplusplus extern "C" { #endif /* #define _LZMA_PROB32 */ /* _LZMA_PROB32 can increase the speed on some CPUs, but memory usage for CLzmaDec::probs will be doubled in that case */ #ifdef _LZMA_PROB32 #define CLzmaProb UInt32 #else #define CLzmaProb UInt16 #endif /* ---------- LZMA Properties ---------- */ #define LZMA_PROPS_SIZE 5 typedef struct _CLzmaProps { unsigned lc, lp, pb; UInt32 dicSize; } CLzmaProps; /* LzmaProps_Decode - decodes properties Returns: SZ_OK SZ_ERROR_UNSUPPORTED - Unsupported properties */ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); /* ---------- LZMA Decoder state ---------- */ /* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ #define LZMA_REQUIRED_INPUT_MAX 20 typedef struct { CLzmaProps prop; CLzmaProb *probs; Byte *dic; const Byte *buf; UInt32 range, code; SizeT dicPos; SizeT dicBufSize; UInt32 processedPos; UInt32 checkDicSize; unsigned state; UInt32 reps[4]; unsigned remainLen; int needFlush; int needInitState; UInt32 numProbs; unsigned tempBufSize; Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; } CLzmaDec; #define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } void LzmaDec_Init(CLzmaDec *p); /* There are two types of LZMA streams: 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ typedef enum { LZMA_FINISH_ANY, /* finish at any point */ LZMA_FINISH_END /* block must be finished at the end */ } ELzmaFinishMode; /* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! You must use LZMA_FINISH_END, when you know that current output buffer covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, and output value of destLen will be less than output buffer size limit. You can check status result also. You can use multiple checks to test data integrity after full decompression: 1) Check Result and "status" variable. 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. You must use correct finish mode in that case. */ typedef enum { LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ } ELzmaStatus; /* ELzmaStatus is used only as output value for function call */ /* ---------- Interfaces ---------- */ /* There are 3 levels of interfaces: 1) Dictionary Interface 2) Buffer Interface 3) One Call Interface You can select any of these interfaces, but don't mix functions from different groups for same object. */ /* There are two variants to allocate state for Dictionary Interface: 1) LzmaDec_Allocate / LzmaDec_Free 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs You can use variant 2, if you set dictionary buffer manually. For Buffer Interface you must always use variant 1. LzmaDec_Allocate* can return: SZ_OK SZ_ERROR_MEM - Memory allocation error SZ_ERROR_UNSUPPORTED - Unsupported properties */ SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc); void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc); SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc); void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); /* ---------- Dictionary Interface ---------- */ /* You can use it, if you want to eliminate the overhead for data copying from dictionary to some other external buffer. You must work with CLzmaDec variables directly in this interface. STEPS: LzmaDec_Constr() LzmaDec_Allocate() for (each new stream) { LzmaDec_Init() while (it needs more decompression) { LzmaDec_DecodeToDic() use data from CLzmaDec::dic and update CLzmaDec::dicPos } } LzmaDec_Free() */ /* LzmaDec_DecodeToDic The decoding to internal dictionary buffer (CLzmaDec::dic). You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! finishMode: It has meaning only if the decoding reaches output limit (dicLimit). LZMA_FINISH_ANY - Decode just dicLimit bytes. LZMA_FINISH_END - Stream must be finished after dicLimit. Returns: SZ_OK status: LZMA_STATUS_FINISHED_WITH_MARK LZMA_STATUS_NOT_FINISHED LZMA_STATUS_NEEDS_MORE_INPUT LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK SZ_ERROR_DATA - Data error */ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); /* ---------- Buffer Interface ---------- */ /* It's zlib-like interface. See LzmaDec_DecodeToDic description for information about STEPS and return results, but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need to work with CLzmaDec variables manually. finishMode: It has meaning only if the decoding reaches output limit (*destLen). LZMA_FINISH_ANY - Decode just destLen bytes. LZMA_FINISH_END - Stream must be finished after (*destLen). */ SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); /* ---------- One Call Interface ---------- */ /* LzmaDecode finishMode: It has meaning only if the decoding reaches output limit (*destLen). LZMA_FINISH_ANY - Decode just destLen bytes. LZMA_FINISH_END - Stream must be finished after (*destLen). Returns: SZ_OK status: LZMA_STATUS_FINISHED_WITH_MARK LZMA_STATUS_NOT_FINISHED LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK SZ_ERROR_DATA - Data error SZ_ERROR_MEM - Memory allocation error SZ_ERROR_UNSUPPORTED - Unsupported properties SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). */ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc); #ifdef __cplusplus } #endif #endif lrzip-0.651/lzma/C/LzmaEnc.c000066400000000000000000001721011421175057200155150ustar00rootroot00000000000000/* LzmaEnc.c -- LZMA Encoder 2010-04-16 : Igor Pavlov : Public domain */ #include /* #define SHOW_STAT */ /* #define SHOW_STAT2 */ #if defined(SHOW_STAT) || defined(SHOW_STAT2) #include #endif #include "LzmaEnc.h" #include "LzFind.h" #ifndef _7ZIP_ST #include "LzFindMt.h" #endif #include "lrzip_core.h" #ifdef SHOW_STAT static int ttt = 0; #endif #define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1) #define kBlockSize (9 << 10) #define kUnpackBlockSize (1 << 18) #define kMatchArraySize (1 << 21) #define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX) #define kNumMaxDirectBits (31) #define kNumTopBits 24 #define kTopValue ((UInt32)1 << kNumTopBits) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5 #define kProbInitValue (kBitModelTotal >> 1) #define kNumMoveReducingBits 4 #define kNumBitPriceShiftBits 4 #define kBitPrice (1 << kNumBitPriceShiftBits) void LzmaEncProps_Init(CLzmaEncProps *p) { p->level = 5; p->dictSize = p->mc = 0; p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; p->writeEndMark = 0; } void LzmaEncProps_Normalize(CLzmaEncProps *p) { int level = p->level; if (level < 0) level = 5; p->level = level; if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26))); if (p->lc < 0) p->lc = 3; if (p->lp < 0) p->lp = 0; if (p->pb < 0) p->pb = 2; if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); if (p->numHashBytes < 0) p->numHashBytes = 4; if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); if (p->numThreads < 0) p->numThreads = #ifndef _7ZIP_ST ((p->btMode && p->algo) ? 2 : 1); #else 1; #endif } UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) { CLzmaEncProps props = *props2; LzmaEncProps_Normalize(&props); return props.dictSize; } /* #define LZMA_LOG_BSR */ /* Define it for Intel's CPU */ #ifdef LZMA_LOG_BSR #define kDicLogSizeMaxCompress 30 #define BSR2_RET(pos, res) { unsigned long i; _BitScanReverse(&i, (pos)); res = (i + i) + ((pos >> (i - 1)) & 1); } UInt32 GetPosSlot1(UInt32 pos) { UInt32 res; BSR2_RET(pos, res); return res; } #define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } #define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } #else #define kNumLogBits (9 + (int)sizeof(size_t) / 2) #define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) void LzmaEnc_FastPosInit(Byte *g_FastPos) { int c = 2, slotFast; g_FastPos[0] = 0; g_FastPos[1] = 1; for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++) { UInt32 k = (1 << ((slotFast >> 1) - 1)); UInt32 j; for (j = 0; j < k; j++, c++) g_FastPos[c] = (Byte)slotFast; } } #define BSR2_RET(pos, res) { UInt32 i = 6 + ((kNumLogBits - 1) & \ (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ res = p->g_FastPos[pos >> i] + (i * 2); } /* #define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ p->g_FastPos[pos >> 6] + 12 : \ p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } */ #define GetPosSlot1(pos) p->g_FastPos[pos] #define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } #define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); } #endif #define LZMA_NUM_REPS 4 typedef unsigned CState; typedef struct { UInt32 price; CState state; int prev1IsChar; int prev2; UInt32 posPrev2; UInt32 backPrev2; UInt32 posPrev; UInt32 backPrev; UInt32 backs[LZMA_NUM_REPS]; } COptimal; #define kNumOpts (1 << 12) #define kNumLenToPosStates 4 #define kNumPosSlotBits 6 #define kDicLogSizeMin 0 #define kDicLogSizeMax 32 #define kDistTableSizeMax (kDicLogSizeMax * 2) #define kNumAlignBits 4 #define kAlignTableSize (1 << kNumAlignBits) #define kAlignMask (kAlignTableSize - 1) #define kStartPosModelIndex 4 #define kEndPosModelIndex 14 #define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) #ifdef _LZMA_PROB32 #define CLzmaProb UInt32 #else #define CLzmaProb UInt16 #endif #define LZMA_PB_MAX 4 #define LZMA_LC_MAX 8 #define LZMA_LP_MAX 4 #define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) #define kLenNumLowBits 3 #define kLenNumLowSymbols (1 << kLenNumLowBits) #define kLenNumMidBits 3 #define kLenNumMidSymbols (1 << kLenNumMidBits) #define kLenNumHighBits 8 #define kLenNumHighSymbols (1 << kLenNumHighBits) #define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) #define LZMA_MATCH_LEN_MIN 2 #define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) #define kNumStates 12 typedef struct { CLzmaProb choice; CLzmaProb choice2; CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; CLzmaProb high[kLenNumHighSymbols]; } CLenEnc; typedef struct { CLenEnc p; UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; UInt32 tableSize; UInt32 counters[LZMA_NUM_PB_STATES_MAX]; } CLenPriceEnc; typedef struct { UInt32 range; Byte cache; UInt64 low; UInt64 cacheSize; Byte *buf; Byte *bufLim; Byte *bufBase; ISeqOutStream *outStream; UInt64 processed; SRes res; } CRangeEnc; typedef struct { CLzmaProb *litProbs; CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb isRep[kNumStates]; CLzmaProb isRepG0[kNumStates]; CLzmaProb isRepG1[kNumStates]; CLzmaProb isRepG2[kNumStates]; CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; CLzmaProb posAlignEncoder[1 << kNumAlignBits]; CLenPriceEnc lenEnc; CLenPriceEnc repLenEnc; UInt32 reps[LZMA_NUM_REPS]; UInt32 state; } CSaveState; typedef struct { IMatchFinder matchFinder; void *matchFinderObj; #ifndef _7ZIP_ST Bool mtMode; CMatchFinderMt matchFinderMt; #endif CMatchFinder matchFinderBase; #ifndef _7ZIP_ST Byte pad[128]; #endif UInt32 optimumEndIndex; UInt32 optimumCurrentIndex; UInt32 longestMatchLength; UInt32 numPairs; UInt32 numAvail; COptimal opt[kNumOpts]; #ifndef LZMA_LOG_BSR Byte g_FastPos[1 << kNumLogBits]; #endif UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; UInt32 numFastBytes; UInt32 additionalOffset; UInt32 reps[LZMA_NUM_REPS]; UInt32 state; UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; UInt32 alignPrices[kAlignTableSize]; UInt32 alignPriceCount; UInt32 distTableSize; unsigned lc, lp, pb; unsigned lpMask, pbMask; CLzmaProb *litProbs; CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb isRep[kNumStates]; CLzmaProb isRepG0[kNumStates]; CLzmaProb isRepG1[kNumStates]; CLzmaProb isRepG2[kNumStates]; CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; CLzmaProb posAlignEncoder[1 << kNumAlignBits]; CLenPriceEnc lenEnc; CLenPriceEnc repLenEnc; unsigned lclp; Bool fastMode; CRangeEnc rc; Bool writeEndMark; UInt64 nowPos64; UInt32 matchPriceCount; Bool finished; Bool multiThread; SRes result; UInt32 dictSize; UInt32 matchFinderCycles; int needInit; CSaveState saveState; } CLzmaEnc; void LzmaEnc_SaveState(CLzmaEncHandle pp) { CLzmaEnc *p = (CLzmaEnc *)pp; CSaveState *dest = &p->saveState; int i; dest->lenEnc = p->lenEnc; dest->repLenEnc = p->repLenEnc; dest->state = p->state; for (i = 0; i < kNumStates; i++) { memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); } for (i = 0; i < kNumLenToPosStates; i++) memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); memcpy(dest->reps, p->reps, sizeof(p->reps)); memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb)); } void LzmaEnc_RestoreState(CLzmaEncHandle pp) { CLzmaEnc *dest = (CLzmaEnc *)pp; const CSaveState *p = &dest->saveState; int i; dest->lenEnc = p->lenEnc; dest->repLenEnc = p->repLenEnc; dest->state = p->state; for (i = 0; i < kNumStates; i++) { memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); } for (i = 0; i < kNumLenToPosStates; i++) memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); memcpy(dest->reps, p->reps, sizeof(p->reps)); memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb)); } SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) { CLzmaEnc *p = (CLzmaEnc *)pp; CLzmaEncProps props = *props2; LzmaEncProps_Normalize(&props); if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX || props.dictSize > ((UInt32)1 << kDicLogSizeMaxCompress) || props.dictSize > ((UInt32)1 << 30)) return SZ_ERROR_PARAM; p->dictSize = props.dictSize; p->matchFinderCycles = props.mc; { unsigned fb = props.fb; if (fb < 5) fb = 5; if (fb > LZMA_MATCH_LEN_MAX) fb = LZMA_MATCH_LEN_MAX; p->numFastBytes = fb; } p->lc = props.lc; p->lp = props.lp; p->pb = props.pb; p->fastMode = (props.algo == 0); p->matchFinderBase.btMode = props.btMode; { UInt32 numHashBytes = 4; if (props.btMode) { if (props.numHashBytes < 2) numHashBytes = 2; else if (props.numHashBytes < 4) numHashBytes = props.numHashBytes; } p->matchFinderBase.numHashBytes = numHashBytes; } p->matchFinderBase.cutValue = props.mc; p->writeEndMark = props.writeEndMark; #ifndef _7ZIP_ST /* if (newMultiThread != _multiThread) { ReleaseMatchFinder(); _multiThread = newMultiThread; } */ p->multiThread = (props.numThreads > 1); #endif return SZ_OK; } static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; #define IsCharState(s) ((s) < 7) #define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) #define kInfinityPrice (1 << 30) static void RangeEnc_Construct(CRangeEnc *p) { p->outStream = 0; p->bufBase = 0; } #define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) #define RC_BUF_SIZE (1 << 16) static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc) { if (p->bufBase == 0) { p->bufBase = (Byte *)alloc->Alloc(alloc, RC_BUF_SIZE); if (p->bufBase == 0) return 0; p->bufLim = p->bufBase + RC_BUF_SIZE; } return 1; } static void RangeEnc_Free(CRangeEnc *p, ISzAlloc *alloc) { alloc->Free(alloc, p->bufBase); p->bufBase = 0; } static void RangeEnc_Init(CRangeEnc *p) { /* Stream.Init(); */ p->low = 0; p->range = 0xFFFFFFFF; p->cacheSize = 1; p->cache = 0; p->buf = p->bufBase; p->processed = 0; p->res = SZ_OK; } static void RangeEnc_FlushStream(CRangeEnc *p) { size_t num; if (p->res != SZ_OK) return; num = p->buf - p->bufBase; if (num != p->outStream->Write(p->outStream, p->bufBase, num)) p->res = SZ_ERROR_WRITE; p->processed += num; p->buf = p->bufBase; } static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) { if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0) { Byte temp = p->cache; do { Byte *buf = p->buf; *buf++ = (Byte)(temp + (Byte)(p->low >> 32)); p->buf = buf; if (buf == p->bufLim) RangeEnc_FlushStream(p); temp = 0xFF; } while (--p->cacheSize != 0); p->cache = (Byte)((UInt32)p->low >> 24); } p->cacheSize++; p->low = (UInt32)p->low << 8; } static void RangeEnc_FlushData(CRangeEnc *p) { int i; for (i = 0; i < 5; i++) RangeEnc_ShiftLow(p); } static void RangeEnc_EncodeDirectBits(CRangeEnc *p, UInt32 value, int numBits) { do { p->range >>= 1; p->low += p->range & (0 - ((value >> --numBits) & 1)); if (p->range < kTopValue) { p->range <<= 8; RangeEnc_ShiftLow(p); } } while (numBits != 0); } static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, UInt32 symbol) { UInt32 ttt = *prob; UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt; if (symbol == 0) { p->range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } else { p->low += newBound; p->range -= newBound; ttt -= ttt >> kNumMoveBits; } *prob = (CLzmaProb)ttt; if (p->range < kTopValue) { p->range <<= 8; RangeEnc_ShiftLow(p); } } static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol) { symbol |= 0x100; do { RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); symbol <<= 1; } while (symbol < 0x10000); } static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol, UInt32 matchByte) { UInt32 offs = 0x100; symbol |= 0x100; do { matchByte <<= 1; RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); symbol <<= 1; offs &= ~(matchByte ^ symbol); } while (symbol < 0x10000); } void LzmaEnc_InitPriceTables(UInt32 *ProbPrices) { UInt32 i; for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) { const int kCyclesBits = kNumBitPriceShiftBits; UInt32 w = i; UInt32 bitCount = 0; int j; for (j = 0; j < kCyclesBits; j++) { w = w * w; bitCount <<= 1; while (w >= ((UInt32)1 << 16)) { w >>= 1; bitCount++; } } ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); } } #define GET_PRICE(prob, symbol) \ p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; #define GET_PRICEa(prob, symbol) \ ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] #define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, UInt32 *ProbPrices) { UInt32 price = 0; symbol |= 0x100; do { price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); symbol <<= 1; } while (symbol < 0x10000); return price; } static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, UInt32 *ProbPrices) { UInt32 price = 0; UInt32 offs = 0x100; symbol |= 0x100; do { matchByte <<= 1; price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1); symbol <<= 1; offs &= ~(matchByte ^ symbol); } while (symbol < 0x10000); return price; } static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol) { UInt32 m = 1; int i; for (i = numBitLevels; i != 0;) { UInt32 bit; i--; bit = (symbol >> i) & 1; RangeEnc_EncodeBit(rc, probs + m, bit); m = (m << 1) | bit; } } static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol) { UInt32 m = 1; int i; for (i = 0; i < numBitLevels; i++) { UInt32 bit = symbol & 1; RangeEnc_EncodeBit(rc, probs + m, bit); m = (m << 1) | bit; symbol >>= 1; } } static UInt32 RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices) { UInt32 price = 0; symbol |= (1 << numBitLevels); while (symbol != 1) { price += GET_PRICEa(probs[symbol >> 1], symbol & 1); symbol >>= 1; } return price; } static UInt32 RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices) { UInt32 price = 0; UInt32 m = 1; int i; for (i = numBitLevels; i != 0; i--) { UInt32 bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) | bit; } return price; } static void LenEnc_Init(CLenEnc *p) { unsigned i; p->choice = p->choice2 = kProbInitValue; for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) p->low[i] = kProbInitValue; for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) p->mid[i] = kProbInitValue; for (i = 0; i < kLenNumHighSymbols; i++) p->high[i] = kProbInitValue; } static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState) { if (symbol < kLenNumLowSymbols) { RangeEnc_EncodeBit(rc, &p->choice, 0); RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); } else { RangeEnc_EncodeBit(rc, &p->choice, 1); if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) { RangeEnc_EncodeBit(rc, &p->choice2, 0); RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols); } else { RangeEnc_EncodeBit(rc, &p->choice2, 1); RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols); } } } static void LenEnc_SetPrices(CLenEnc *p, UInt32 posState, UInt32 numSymbols, UInt32 *prices, UInt32 *ProbPrices) { UInt32 a0 = GET_PRICE_0a(p->choice); UInt32 a1 = GET_PRICE_1a(p->choice); UInt32 b0 = a1 + GET_PRICE_0a(p->choice2); UInt32 b1 = a1 + GET_PRICE_1a(p->choice2); UInt32 i = 0; for (i = 0; i < kLenNumLowSymbols; i++) { if (i >= numSymbols) return; prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices); } for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) { if (i >= numSymbols) return; prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices); } for (; i < numSymbols; i++) prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); } static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, UInt32 posState, UInt32 *ProbPrices) { LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); p->counters[posState] = p->tableSize; } static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, UInt32 numPosStates, UInt32 *ProbPrices) { UInt32 posState; for (posState = 0; posState < numPosStates; posState++) LenPriceEnc_UpdateTable(p, posState, ProbPrices); } static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32 *ProbPrices) { LenEnc_Encode(&p->p, rc, symbol, posState); if (updatePrice) if (--p->counters[posState] == 0) LenPriceEnc_UpdateTable(p, posState, ProbPrices); } static void MovePos(CLzmaEnc *p, UInt32 num) { #ifdef SHOW_STAT ttt += num; printf("\n MovePos %d", num); #endif if (num != 0) { p->additionalOffset += num; p->matchFinder.Skip(p->matchFinderObj, num); } } static UInt32 ReadMatchDistances(CLzmaEnc *p, UInt32 *numDistancePairsRes) { UInt32 lenRes = 0, numPairs; p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); #ifdef SHOW_STAT printf("\n i = %d numPairs = %d ", ttt, numPairs / 2); ttt++; { UInt32 i; for (i = 0; i < numPairs; i += 2) printf("%2d %6d | ", p->matches[i], p->matches[i + 1]); } #endif if (numPairs > 0) { lenRes = p->matches[numPairs - 2]; if (lenRes == p->numFastBytes) { const Byte *pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; UInt32 distance = p->matches[numPairs - 1] + 1; UInt32 numAvail = p->numAvail; if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; { const Byte *pby2 = pby - distance; for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++); } } } p->additionalOffset++; *numDistancePairsRes = numPairs; return lenRes; } #define MakeAsChar(p) (p)->backPrev = (UInt32)(-1); (p)->prev1IsChar = False; #define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False; #define IsShortRep(p) ((p)->backPrev == 0) static UInt32 GetRepLen1Price(CLzmaEnc *p, UInt32 state, UInt32 posState) { return GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]); } static UInt32 GetPureRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 state, UInt32 posState) { UInt32 price; if (repIndex == 0) { price = GET_PRICE_0(p->isRepG0[state]); price += GET_PRICE_1(p->isRep0Long[state][posState]); } else { price = GET_PRICE_1(p->isRepG0[state]); if (repIndex == 1) price += GET_PRICE_0(p->isRepG1[state]); else { price += GET_PRICE_1(p->isRepG1[state]); price += GET_PRICE(p->isRepG2[state], repIndex - 2); } } return price; } static UInt32 GetRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState) { return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] + GetPureRepPrice(p, repIndex, state, posState); } static UInt32 Backward(CLzmaEnc *p, UInt32 *backRes, UInt32 cur) { UInt32 posMem = p->opt[cur].posPrev; UInt32 backMem = p->opt[cur].backPrev; p->optimumEndIndex = cur; do { if (p->opt[cur].prev1IsChar) { MakeAsChar(&p->opt[posMem]) p->opt[posMem].posPrev = posMem - 1; if (p->opt[cur].prev2) { p->opt[posMem - 1].prev1IsChar = False; p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2; p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2; } } { UInt32 posPrev = posMem; UInt32 backCur = backMem; backMem = p->opt[posPrev].backPrev; posMem = p->opt[posPrev].posPrev; p->opt[posPrev].backPrev = backCur; p->opt[posPrev].posPrev = cur; cur = posPrev; } } while (cur != 0); *backRes = p->opt[0].backPrev; p->optimumCurrentIndex = p->opt[0].posPrev; return p->optimumCurrentIndex; } #define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300) static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) { UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur; UInt32 matchPrice, repMatchPrice, normalMatchPrice; UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS]; UInt32 *matches; const Byte *data; Byte curByte, matchByte; if (p->optimumEndIndex != p->optimumCurrentIndex) { const COptimal *opt = &p->opt[p->optimumCurrentIndex]; UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex; *backRes = opt->backPrev; p->optimumCurrentIndex = opt->posPrev; return lenRes; } p->optimumCurrentIndex = p->optimumEndIndex = 0; if (p->additionalOffset == 0) mainLen = ReadMatchDistances(p, &numPairs); else { mainLen = p->longestMatchLength; numPairs = p->numPairs; } numAvail = p->numAvail; if (numAvail < 2) { *backRes = (UInt32)(-1); return 1; } if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; repMaxIndex = 0; for (i = 0; i < LZMA_NUM_REPS; i++) { UInt32 lenTest; const Byte *data2; reps[i] = p->reps[i]; data2 = data - (reps[i] + 1); if (data[0] != data2[0] || data[1] != data2[1]) { repLens[i] = 0; continue; } for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++); repLens[i] = lenTest; if (lenTest > repLens[repMaxIndex]) repMaxIndex = i; } if (repLens[repMaxIndex] >= p->numFastBytes) { UInt32 lenRes; *backRes = repMaxIndex; lenRes = repLens[repMaxIndex]; MovePos(p, lenRes - 1); return lenRes; } matches = p->matches; if (mainLen >= p->numFastBytes) { *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; MovePos(p, mainLen - 1); return mainLen; } curByte = *data; matchByte = *(data - (reps[0] + 1)); if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) { *backRes = (UInt32)-1; return 1; } p->opt[0].state = (CState)p->state; posState = (position & p->pbMask); { const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + (!IsCharState(p->state) ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); } MakeAsChar(&p->opt[1]); matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); if (matchByte == curByte) { UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); if (shortRepPrice < p->opt[1].price) { p->opt[1].price = shortRepPrice; MakeAsShortRep(&p->opt[1]); } } lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); if (lenEnd < 2) { *backRes = p->opt[1].backPrev; return 1; } p->opt[1].posPrev = 0; for (i = 0; i < LZMA_NUM_REPS; i++) p->opt[0].backs[i] = reps[i]; len = lenEnd; do p->opt[len--].price = kInfinityPrice; while (len >= 2); for (i = 0; i < LZMA_NUM_REPS; i++) { UInt32 repLen = repLens[i]; UInt32 price; if (repLen < 2) continue; price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); do { UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2]; COptimal *opt = &p->opt[repLen]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = 0; opt->backPrev = i; opt->prev1IsChar = False; } } while (--repLen >= 2); } normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); if (len <= mainLen) { UInt32 offs = 0; while (len > matches[offs]) offs += 2; for (; ; len++) { COptimal *opt; UInt32 distance = matches[offs + 1]; UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN]; UInt32 lenToPosState = GetLenToPosState(len); if (distance < kNumFullDistances) curAndLenPrice += p->distancesPrices[lenToPosState][distance]; else { UInt32 slot; GetPosSlot2(distance, slot); curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot]; } opt = &p->opt[len]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = 0; opt->backPrev = distance + LZMA_NUM_REPS; opt->prev1IsChar = False; } if (len == matches[offs]) { offs += 2; if (offs == numPairs) break; } } } cur = 0; #ifdef SHOW_STAT2 if (position >= 0) { unsigned i; printf("\n pos = %4X", position); for (i = cur; i <= lenEnd; i++) printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price); } #endif for (;;) { UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen; UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice; Bool nextIsChar; Byte curByte, matchByte; const Byte *data; COptimal *curOpt; COptimal *nextOpt; cur++; if (cur == lenEnd) return Backward(p, backRes, cur); newLen = ReadMatchDistances(p, &numPairs); if (newLen >= p->numFastBytes) { p->numPairs = numPairs; p->longestMatchLength = newLen; return Backward(p, backRes, cur); } position++; curOpt = &p->opt[cur]; posPrev = curOpt->posPrev; if (curOpt->prev1IsChar) { posPrev--; if (curOpt->prev2) { state = p->opt[curOpt->posPrev2].state; if (curOpt->backPrev2 < LZMA_NUM_REPS) state = kRepNextStates[state]; else state = kMatchNextStates[state]; } else state = p->opt[posPrev].state; state = kLiteralNextStates[state]; } else state = p->opt[posPrev].state; if (posPrev == cur - 1) { if (IsShortRep(curOpt)) state = kShortRepNextStates[state]; else state = kLiteralNextStates[state]; } else { UInt32 pos; const COptimal *prevOpt; if (curOpt->prev1IsChar && curOpt->prev2) { posPrev = curOpt->posPrev2; pos = curOpt->backPrev2; state = kRepNextStates[state]; } else { pos = curOpt->backPrev; if (pos < LZMA_NUM_REPS) state = kRepNextStates[state]; else state = kMatchNextStates[state]; } prevOpt = &p->opt[posPrev]; if (pos < LZMA_NUM_REPS) { UInt32 i; reps[0] = prevOpt->backs[pos]; for (i = 1; i <= pos; i++) reps[i] = prevOpt->backs[i - 1]; for (; i < LZMA_NUM_REPS; i++) reps[i] = prevOpt->backs[i]; } else { UInt32 i; reps[0] = (pos - LZMA_NUM_REPS); for (i = 1; i < LZMA_NUM_REPS; i++) reps[i] = prevOpt->backs[i - 1]; } } curOpt->state = (CState)state; curOpt->backs[0] = reps[0]; curOpt->backs[1] = reps[1]; curOpt->backs[2] = reps[2]; curOpt->backs[3] = reps[3]; curPrice = curOpt->price; nextIsChar = False; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; curByte = *data; matchByte = *(data - (reps[0] + 1)); posState = (position & p->pbMask); curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); { const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); curAnd1Price += (!IsCharState(state) ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); } nextOpt = &p->opt[cur + 1]; if (curAnd1Price < nextOpt->price) { nextOpt->price = curAnd1Price; nextOpt->posPrev = cur; MakeAsChar(nextOpt); nextIsChar = True; } matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) { UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); if (shortRepPrice <= nextOpt->price) { nextOpt->price = shortRepPrice; nextOpt->posPrev = cur; MakeAsShortRep(nextOpt); nextIsChar = True; } } numAvailFull = p->numAvail; { UInt32 temp = kNumOpts - 1 - cur; if (temp < numAvailFull) numAvailFull = temp; } if (numAvailFull < 2) continue; numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); if (!nextIsChar && matchByte != curByte) /* speed optimization */ { /* try Literal + rep0 */ UInt32 temp; UInt32 lenTest2; const Byte *data2 = data - (reps[0] + 1); UInt32 limit = p->numFastBytes + 1; if (limit > numAvailFull) limit = numAvailFull; for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++); lenTest2 = temp - 1; if (lenTest2 >= 2) { UInt32 state2 = kLiteralNextStates[state]; UInt32 posStateNext = (position + 1) & p->pbMask; UInt32 nextRepMatchPrice = curAnd1Price + GET_PRICE_1(p->isMatch[state2][posStateNext]) + GET_PRICE_1(p->isRep[state2]); /* for (; lenTest2 >= 2; lenTest2--) */ { UInt32 curAndLenPrice; COptimal *opt; UInt32 offset = cur + 1 + lenTest2; while (lenEnd < offset) p->opt[++lenEnd].price = kInfinityPrice; curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); opt = &p->opt[offset]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = cur + 1; opt->backPrev = 0; opt->prev1IsChar = True; opt->prev2 = False; } } } } startLen = 2; /* speed optimization */ { UInt32 repIndex; for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) { UInt32 lenTest; UInt32 lenTestTemp; UInt32 price; const Byte *data2 = data - (reps[repIndex] + 1); if (data[0] != data2[0] || data[1] != data2[1]) continue; for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++); while (lenEnd < cur + lenTest) p->opt[++lenEnd].price = kInfinityPrice; lenTestTemp = lenTest; price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); do { UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2]; COptimal *opt = &p->opt[cur + lenTest]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = cur; opt->backPrev = repIndex; opt->prev1IsChar = False; } } while (--lenTest >= 2); lenTest = lenTestTemp; if (repIndex == 0) startLen = lenTest + 1; /* if (_maxMode) */ { UInt32 lenTest2 = lenTest + 1; UInt32 limit = lenTest2 + p->numFastBytes; UInt32 nextRepMatchPrice; if (limit > numAvailFull) limit = numAvailFull; for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++); lenTest2 -= lenTest + 1; if (lenTest2 >= 2) { UInt32 state2 = kRepNextStates[state]; UInt32 posStateNext = (position + lenTest) & p->pbMask; UInt32 curAndLenCharPrice = price + p->repLenEnc.prices[posState][lenTest - 2] + GET_PRICE_0(p->isMatch[state2][posStateNext]) + LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), data[lenTest], data2[lenTest], p->ProbPrices); state2 = kLiteralNextStates[state2]; posStateNext = (position + lenTest + 1) & p->pbMask; nextRepMatchPrice = curAndLenCharPrice + GET_PRICE_1(p->isMatch[state2][posStateNext]) + GET_PRICE_1(p->isRep[state2]); /* for (; lenTest2 >= 2; lenTest2--) */ { UInt32 curAndLenPrice; COptimal *opt; UInt32 offset = cur + lenTest + 1 + lenTest2; while (lenEnd < offset) p->opt[++lenEnd].price = kInfinityPrice; curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); opt = &p->opt[offset]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = cur + lenTest + 1; opt->backPrev = 0; opt->prev1IsChar = True; opt->prev2 = True; opt->posPrev2 = cur; opt->backPrev2 = repIndex; } } } } } } /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */ if (newLen > numAvail) { newLen = numAvail; for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2); matches[numPairs] = newLen; numPairs += 2; } if (newLen >= startLen) { UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); UInt32 offs, curBack, posSlot; UInt32 lenTest; while (lenEnd < cur + newLen) p->opt[++lenEnd].price = kInfinityPrice; offs = 0; while (startLen > matches[offs]) offs += 2; curBack = matches[offs + 1]; GetPosSlot2(curBack, posSlot); for (lenTest = /*2*/ startLen; ; lenTest++) { UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN]; UInt32 lenToPosState = GetLenToPosState(lenTest); COptimal *opt; if (curBack < kNumFullDistances) curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; else curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask]; opt = &p->opt[cur + lenTest]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = cur; opt->backPrev = curBack + LZMA_NUM_REPS; opt->prev1IsChar = False; } if (/*_maxMode && */lenTest == matches[offs]) { /* Try Match + Literal + Rep0 */ const Byte *data2 = data - (curBack + 1); UInt32 lenTest2 = lenTest + 1; UInt32 limit = lenTest2 + p->numFastBytes; UInt32 nextRepMatchPrice; if (limit > numAvailFull) limit = numAvailFull; for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++); lenTest2 -= lenTest + 1; if (lenTest2 >= 2) { UInt32 state2 = kMatchNextStates[state]; UInt32 posStateNext = (position + lenTest) & p->pbMask; UInt32 curAndLenCharPrice = curAndLenPrice + GET_PRICE_0(p->isMatch[state2][posStateNext]) + LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), data[lenTest], data2[lenTest], p->ProbPrices); state2 = kLiteralNextStates[state2]; posStateNext = (posStateNext + 1) & p->pbMask; nextRepMatchPrice = curAndLenCharPrice + GET_PRICE_1(p->isMatch[state2][posStateNext]) + GET_PRICE_1(p->isRep[state2]); /* for (; lenTest2 >= 2; lenTest2--) */ { UInt32 offset = cur + lenTest + 1 + lenTest2; UInt32 curAndLenPrice; COptimal *opt; while (lenEnd < offset) p->opt[++lenEnd].price = kInfinityPrice; curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); opt = &p->opt[offset]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = cur + lenTest + 1; opt->backPrev = 0; opt->prev1IsChar = True; opt->prev2 = True; opt->posPrev2 = cur; opt->backPrev2 = curBack + LZMA_NUM_REPS; } } } offs += 2; if (offs == numPairs) break; curBack = matches[offs + 1]; if (curBack >= kNumFullDistances) GetPosSlot2(curBack, posSlot); } } } } } #define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes) { UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i; const Byte *data; const UInt32 *matches; if (p->additionalOffset == 0) mainLen = ReadMatchDistances(p, &numPairs); else { mainLen = p->longestMatchLength; numPairs = p->numPairs; } numAvail = p->numAvail; *backRes = (UInt32)-1; if (numAvail < 2) return 1; if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; repLen = repIndex = 0; for (i = 0; i < LZMA_NUM_REPS; i++) { UInt32 len; const Byte *data2 = data - (p->reps[i] + 1); if (data[0] != data2[0] || data[1] != data2[1]) continue; for (len = 2; len < numAvail && data[len] == data2[len]; len++); if (len >= p->numFastBytes) { *backRes = i; MovePos(p, len - 1); return len; } if (len > repLen) { repIndex = i; repLen = len; } } matches = p->matches; if (mainLen >= p->numFastBytes) { *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; MovePos(p, mainLen - 1); return mainLen; } mainDist = 0; /* for GCC */ if (mainLen >= 2) { mainDist = matches[numPairs - 1]; while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1) { if (!ChangePair(matches[numPairs - 3], mainDist)) break; numPairs -= 2; mainLen = matches[numPairs - 2]; mainDist = matches[numPairs - 1]; } if (mainLen == 2 && mainDist >= 0x80) mainLen = 1; } if (repLen >= 2 && ( (repLen + 1 >= mainLen) || (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) { *backRes = repIndex; MovePos(p, repLen - 1); return repLen; } if (mainLen < 2 || numAvail <= 2) return 1; p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); if (p->longestMatchLength >= 2) { UInt32 newDistance = matches[p->numPairs - 1]; if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || (p->longestMatchLength > mainLen + 1) || (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist))) return 1; } data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; for (i = 0; i < LZMA_NUM_REPS; i++) { UInt32 len, limit; const Byte *data2 = data - (p->reps[i] + 1); if (data[0] != data2[0] || data[1] != data2[1]) continue; limit = mainLen - 1; for (len = 2; len < limit && data[len] == data2[len]; len++); if (len >= limit) return 1; } *backRes = mainDist + LZMA_NUM_REPS; MovePos(p, mainLen - 2); return mainLen; } static void WriteEndMarker(CLzmaEnc *p, UInt32 posState) { UInt32 len; RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); p->state = kMatchNextStates[p->state]; len = LZMA_MATCH_LEN_MIN; LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); } static SRes CheckErrors(CLzmaEnc *p) { if (p->result != SZ_OK) return p->result; if (p->rc.res != SZ_OK) p->result = SZ_ERROR_WRITE; if (p->matchFinderBase.result != SZ_OK) p->result = SZ_ERROR_READ; if (p->result != SZ_OK) p->finished = True; return p->result; } static SRes Flush(CLzmaEnc *p, UInt32 nowPos) { /* ReleaseMFStream(); */ p->finished = True; if (p->writeEndMark) WriteEndMarker(p, nowPos & p->pbMask); RangeEnc_FlushData(&p->rc); RangeEnc_FlushStream(&p->rc); return CheckErrors(p); } static void FillAlignPrices(CLzmaEnc *p) { UInt32 i; for (i = 0; i < kAlignTableSize; i++) p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); p->alignPriceCount = 0; } static void FillDistancesPrices(CLzmaEnc *p) { UInt32 tempPrices[kNumFullDistances]; UInt32 i, lenToPosState; for (i = kStartPosModelIndex; i < kNumFullDistances; i++) { UInt32 posSlot = GetPosSlot1(i); UInt32 footerBits = ((posSlot >> 1) - 1); UInt32 base = ((2 | (posSlot & 1)) << footerBits); tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices); } for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) { UInt32 posSlot; const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState]; UInt32 *posSlotPrices = p->posSlotPrices[lenToPosState]; for (posSlot = 0; posSlot < p->distTableSize; posSlot++) posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); { UInt32 *distancesPrices = p->distancesPrices[lenToPosState]; UInt32 i; for (i = 0; i < kStartPosModelIndex; i++) distancesPrices[i] = posSlotPrices[i]; for (; i < kNumFullDistances; i++) distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; } } p->matchPriceCount = 0; } void LzmaEnc_Construct(CLzmaEnc *p) { RangeEnc_Construct(&p->rc); MatchFinder_Construct(&p->matchFinderBase); #ifndef _7ZIP_ST MatchFinderMt_Construct(&p->matchFinderMt); p->matchFinderMt.MatchFinder = &p->matchFinderBase; #endif { CLzmaEncProps props; LzmaEncProps_Init(&props); LzmaEnc_SetProps(p, &props); } #ifndef LZMA_LOG_BSR LzmaEnc_FastPosInit(p->g_FastPos); #endif LzmaEnc_InitPriceTables(p->ProbPrices); p->litProbs = 0; p->saveState.litProbs = 0; } CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc) { void *p; p = alloc->Alloc(alloc, sizeof(CLzmaEnc)); if (p != 0) LzmaEnc_Construct((CLzmaEnc *)p); return p; } void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc) { alloc->Free(alloc, p->litProbs); alloc->Free(alloc, p->saveState.litProbs); p->litProbs = 0; p->saveState.litProbs = 0; } void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig) { #ifndef _7ZIP_ST MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); #endif MatchFinder_Free(&p->matchFinderBase, allocBig); LzmaEnc_FreeLits(p, alloc); RangeEnc_Free(&p->rc, alloc); } void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig) { LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); alloc->Free(alloc, p); } static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize) { UInt32 nowPos32, startPos32; if (p->needInit) { p->matchFinder.Init(p->matchFinderObj); p->needInit = 0; } if (p->finished) return p->result; RINOK(CheckErrors(p)); nowPos32 = (UInt32)p->nowPos64; startPos32 = nowPos32; if (p->nowPos64 == 0) { UInt32 numPairs; Byte curByte; if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) return Flush(p, nowPos32); ReadMatchDistances(p, &numPairs); RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); p->state = kLiteralNextStates[p->state]; curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset); LitEnc_Encode(&p->rc, p->litProbs, curByte); p->additionalOffset--; nowPos32++; } if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) for (;;) { UInt32 pos, len, posState; if (p->fastMode) len = GetOptimumFast(p, &pos); else len = GetOptimum(p, nowPos32, &pos); #ifdef SHOW_STAT2 printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos); #endif posState = nowPos32 & p->pbMask; if (len == 1 && pos == (UInt32)-1) { Byte curByte; CLzmaProb *probs; const Byte *data; RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; curByte = *data; probs = LIT_PROBS(nowPos32, *(data - 1)); if (IsCharState(p->state)) LitEnc_Encode(&p->rc, probs, curByte); else LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); p->state = kLiteralNextStates[p->state]; } else { RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); if (pos < LZMA_NUM_REPS) { RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); if (pos == 0) { RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1)); } else { UInt32 distance = p->reps[pos]; RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); if (pos == 1) RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); else { RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); if (pos == 3) p->reps[3] = p->reps[2]; p->reps[2] = p->reps[1]; } p->reps[1] = p->reps[0]; p->reps[0] = distance; } if (len == 1) p->state = kShortRepNextStates[p->state]; else { LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); p->state = kRepNextStates[p->state]; } } else { UInt32 posSlot; RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); p->state = kMatchNextStates[p->state]; LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); pos -= LZMA_NUM_REPS; GetPosSlot(pos, posSlot); RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot); if (posSlot >= kStartPosModelIndex) { UInt32 footerBits = ((posSlot >> 1) - 1); UInt32 base = ((2 | (posSlot & 1)) << footerBits); UInt32 posReduced = pos - base; if (posSlot < kEndPosModelIndex) RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced); else { RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); p->alignPriceCount++; } } p->reps[3] = p->reps[2]; p->reps[2] = p->reps[1]; p->reps[1] = p->reps[0]; p->reps[0] = pos; p->matchPriceCount++; } } p->additionalOffset -= len; nowPos32 += len; if (p->additionalOffset == 0) { UInt32 processed; if (!p->fastMode) { if (p->matchPriceCount >= (1 << 7)) FillDistancesPrices(p); if (p->alignPriceCount >= kAlignTableSize) FillAlignPrices(p); } if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) break; processed = nowPos32 - startPos32; if (useLimits) { if (processed + kNumOpts + 300 >= maxUnpackSize || RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize) break; } else if (processed >= (1 << 15)) { p->nowPos64 += nowPos32 - startPos32; return CheckErrors(p); } } } p->nowPos64 += nowPos32 - startPos32; return Flush(p, nowPos32); } #define kBigHashDicLimit ((UInt32)1 << 24) static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) { UInt32 beforeSize = kNumOpts; Bool btMode; if (!RangeEnc_Alloc(&p->rc, alloc)) return SZ_ERROR_MEM; btMode = (p->matchFinderBase.btMode != 0); #ifndef _7ZIP_ST p->mtMode = (p->multiThread && !p->fastMode && btMode); #endif { unsigned lclp = p->lc + p->lp; if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp) { LzmaEnc_FreeLits(p, alloc); p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); if (p->litProbs == 0 || p->saveState.litProbs == 0) { LzmaEnc_FreeLits(p, alloc); return SZ_ERROR_MEM; } p->lclp = lclp; } } p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit); if (beforeSize + p->dictSize < keepWindowSize) beforeSize = keepWindowSize - p->dictSize; #ifndef _7ZIP_ST if (p->mtMode) { RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)); p->matchFinderObj = &p->matchFinderMt; MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); } else #endif { if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) return SZ_ERROR_MEM; p->matchFinderObj = &p->matchFinderBase; MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); } return SZ_OK; } void LzmaEnc_Init(CLzmaEnc *p) { UInt32 i; p->state = 0; for (i = 0 ; i < LZMA_NUM_REPS; i++) p->reps[i] = 0; RangeEnc_Init(&p->rc); for (i = 0; i < kNumStates; i++) { UInt32 j; for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) { p->isMatch[i][j] = kProbInitValue; p->isRep0Long[i][j] = kProbInitValue; } p->isRep[i] = kProbInitValue; p->isRepG0[i] = kProbInitValue; p->isRepG1[i] = kProbInitValue; p->isRepG2[i] = kProbInitValue; } { UInt32 num = 0x300 << (p->lp + p->lc); for (i = 0; i < num; i++) p->litProbs[i] = kProbInitValue; } { for (i = 0; i < kNumLenToPosStates; i++) { CLzmaProb *probs = p->posSlotEncoder[i]; UInt32 j; for (j = 0; j < (1 << kNumPosSlotBits); j++) probs[j] = kProbInitValue; } } { for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) p->posEncoders[i] = kProbInitValue; } LenEnc_Init(&p->lenEnc.p); LenEnc_Init(&p->repLenEnc.p); for (i = 0; i < (1 << kNumAlignBits); i++) p->posAlignEncoder[i] = kProbInitValue; p->optimumEndIndex = 0; p->optimumCurrentIndex = 0; p->additionalOffset = 0; p->pbMask = (1 << p->pb) - 1; p->lpMask = (1 << p->lp) - 1; } void LzmaEnc_InitPrices(CLzmaEnc *p) { if (!p->fastMode) { FillDistancesPrices(p); FillAlignPrices(p); } p->lenEnc.tableSize = p->repLenEnc.tableSize = p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices); LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices); } static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) { UInt32 i; for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++) if (p->dictSize <= ((UInt32)1 << i)) break; p->distTableSize = i * 2; p->finished = False; p->result = SZ_OK; RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); LzmaEnc_Init(p); LzmaEnc_InitPrices(p); p->nowPos64 = 0; return SZ_OK; } static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ISzAlloc *alloc, ISzAlloc *allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; p->matchFinderBase.stream = inStream; p->needInit = 1; p->rc.outStream = outStream; return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); } SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; p->matchFinderBase.stream = inStream; p->needInit = 1; return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) { p->matchFinderBase.directInput = 1; p->matchFinderBase.bufferBase = (Byte *)src; p->matchFinderBase.directInputRem = srcLen; } SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; LzmaEnc_SetInputBuf(p, src, srcLen); p->needInit = 1; return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } void LzmaEnc_Finish(CLzmaEncHandle pp) { #ifndef _7ZIP_ST CLzmaEnc *p = (CLzmaEnc *)pp; if (p->mtMode) MatchFinderMt_ReleaseStream(&p->matchFinderMt); #else pp = pp; #endif } typedef struct { ISeqOutStream funcTable; Byte *data; SizeT rem; Bool overflow; } CSeqOutStreamBuf; static size_t MyWrite(void *pp, const void *data, size_t size) { CSeqOutStreamBuf *p = (CSeqOutStreamBuf *)pp; if (p->rem < size) { size = p->rem; p->overflow = True; } memcpy(p->data, data, size); p->rem -= size; p->data += size; return size; } UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) { const CLzmaEnc *p = (CLzmaEnc *)pp; return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); } const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) { const CLzmaEnc *p = (CLzmaEnc *)pp; return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; } SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) { CLzmaEnc *p = (CLzmaEnc *)pp; UInt64 nowPos64; SRes res; CSeqOutStreamBuf outStream; outStream.funcTable.Write = MyWrite; outStream.data = dest; outStream.rem = *destLen; outStream.overflow = False; p->writeEndMark = False; p->finished = False; p->result = SZ_OK; if (reInit) LzmaEnc_Init(p); LzmaEnc_InitPrices(p); nowPos64 = p->nowPos64; RangeEnc_Init(&p->rc); p->rc.outStream = &outStream.funcTable; res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize); *unpackSize = (UInt32)(p->nowPos64 - nowPos64); *destLen -= outStream.rem; if (outStream.overflow) return SZ_ERROR_OUTPUT_EOF; return res; } static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) { SRes res = SZ_OK; #ifndef _7ZIP_ST __maybe_unused Byte allocaDummy[0x300]; int i = 0; for (i = 0; i < 16; i++) allocaDummy[i] = (Byte)i; #endif for (;;) { res = LzmaEnc_CodeOneBlock(p, False, 0, 0); if (res != SZ_OK || p->finished != 0) break; if (progress != 0) { res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); if (res != SZ_OK) { res = SZ_ERROR_PROGRESS; break; } } } LzmaEnc_Finish(p); return res; } SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig) { RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); } SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) { CLzmaEnc *p = (CLzmaEnc *)pp; int i; UInt32 dictSize = p->dictSize; if (*size < LZMA_PROPS_SIZE) return SZ_ERROR_PARAM; *size = LZMA_PROPS_SIZE; props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); for (i = 11; i <= 30; i++) { if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; } if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; } } for (i = 0; i < 4; i++) props[1 + i] = (Byte)(dictSize >> (8 * i)); return SZ_OK; } SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig) { SRes res; CLzmaEnc *p = (CLzmaEnc *)pp; CSeqOutStreamBuf outStream; LzmaEnc_SetInputBuf(p, src, srcLen); outStream.funcTable.Write = MyWrite; outStream.data = dest; outStream.rem = *destLen; outStream.overflow = False; p->writeEndMark = writeEndMark; p->rc.outStream = &outStream.funcTable; res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); if (res == SZ_OK) res = LzmaEnc_Encode2(p, progress); *destLen -= outStream.rem; if (outStream.overflow) return SZ_ERROR_OUTPUT_EOF; return res; } SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig) { CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); SRes res; if (p == 0) return SZ_ERROR_MEM; res = LzmaEnc_SetProps(p, props); if (res == SZ_OK) { res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); if (res == SZ_OK) res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, writeEndMark, progress, alloc, allocBig); } LzmaEnc_Destroy(p, alloc, allocBig); return res; } lrzip-0.651/lzma/C/LzmaEnc.h000066400000000000000000000055311421175057200155240ustar00rootroot00000000000000/* LzmaEnc.h -- LZMA Encoder 2009-02-07 : Igor Pavlov : Public domain */ #ifndef __LZMA_ENC_H #define __LZMA_ENC_H #include "Types.h" #ifdef __cplusplus extern "C" { #endif #define LZMA_PROPS_SIZE 5 typedef struct _CLzmaEncProps { int level; /* 0 <= level <= 9 */ UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version (1 << 12) <= dictSize <= (1 << 30) for 64-bit version default = (1 << 24) */ int lc; /* 0 <= lc <= 8, default = 3 */ int lp; /* 0 <= lp <= 4, default = 0 */ int pb; /* 0 <= pb <= 4, default = 2 */ int algo; /* 0 - fast, 1 - normal, default = 1 */ int fb; /* 5 <= fb <= 273, default = 32 */ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ int numHashBytes; /* 2, 3 or 4, default = 4 */ UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ int numThreads; /* 1 or 2, default = 2 */ } CLzmaEncProps; void LzmaEncProps_Init(CLzmaEncProps *p); void LzmaEncProps_Normalize(CLzmaEncProps *p); UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); /* ---------- CLzmaEncHandle Interface ---------- */ /* LzmaEnc_* functions can return the following exit codes: Returns: SZ_OK - OK SZ_ERROR_MEM - Memory allocation error SZ_ERROR_PARAM - Incorrect paramater in props SZ_ERROR_WRITE - Write callback error. SZ_ERROR_PROGRESS - some break from progress callback SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) */ typedef void * CLzmaEncHandle; CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc); void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig); SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); /* ---------- One Call Interface ---------- */ /* LzmaEncode Return code: SZ_OK - OK SZ_ERROR_MEM - Memory allocation error SZ_ERROR_PARAM - Incorrect paramater SZ_ERROR_OUTPUT_EOF - output buffer overflow SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) */ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); #ifdef __cplusplus } #endif #endif lrzip-0.651/lzma/C/LzmaLib.c000066400000000000000000000027501421175057200155200ustar00rootroot00000000000000/* LzmaLib.c -- LZMA library wrapper 2008-08-05 Igor Pavlov Public domain */ #include "LzmaEnc.h" #include "LzmaDec.h" #include "Alloc.h" #include "LzmaLib.h" static void *SzAlloc(void __attribute__((unused)) *p, size_t size) { return MyAlloc(size); } static void SzFree(void __attribute__((unused)) *p, void *address) { MyFree(address); } static ISzAlloc g_Alloc = { SzAlloc, SzFree }; MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, unsigned char *outProps, size_t *outPropsSize, int level, /* 0 <= level <= 9, default = 5 */ unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ int lc, /* 0 <= lc <= 8, default = 3 */ int lp, /* 0 <= lp <= 4, default = 0 */ int pb, /* 0 <= pb <= 4, default = 2 */ int fb, /* 5 <= fb <= 273, default = 32 */ int numThreads /* 1 or 2, default = 2 */ ) { CLzmaEncProps props; LzmaEncProps_Init(&props); props.level = level; props.dictSize = dictSize; props.lc = lc; props.lp = lp; props.pb = pb; props.fb = fb; props.numThreads = numThreads; return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0, NULL, &g_Alloc, &g_Alloc); } MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, const unsigned char *props, size_t propsSize) { ELzmaStatus status; return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc); } lrzip-0.651/lzma/C/LzmaLib.h000066400000000000000000000103701421175057200155220ustar00rootroot00000000000000/* LzmaLib.h -- LZMA library interface 2009-04-07 : Igor Pavlov : Public domain */ #ifndef __LZMA_LIB_H #define __LZMA_LIB_H #include "Types.h" #ifdef __cplusplus extern "C" { #endif #define MY_STDAPI int MY_STD_CALL #define LZMA_PROPS_SIZE 5 /* RAM requirements for LZMA: for compression: (dictSize * 11.5 + 6 MB) + state_size for decompression: dictSize + state_size state_size = (4 + (1.5 << (lc + lp))) KB by default (lc=3, lp=0), state_size = 16 KB. LZMA properties (5 bytes) format Offset Size Description 0 1 lc, lp and pb in encoded form. 1 4 dictSize (little endian). */ /* LzmaCompress ------------ outPropsSize - In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. LZMA Encoder will use defult values for any parameter, if it is -1 for any from: level, loc, lp, pb, fb, numThreads 0 for dictSize level - compression level: 0 <= level <= 9; level dictSize algo fb 0: 16 KB 0 32 1: 64 KB 0 32 2: 256 KB 0 32 3: 1 MB 0 32 4: 4 MB 0 32 5: 16 MB 1 32 6: 32 MB 1 32 7+: 64 MB 1 64 The default value for "level" is 5. algo = 0 means fast method algo = 1 means normal method dictSize - The dictionary size in bytes. The maximum value is 128 MB = (1 << 27) bytes for 32-bit version 1 GB = (1 << 30) bytes for 64-bit version The default value is 16 MB = (1 << 24) bytes. It's recommended to use the dictionary that is larger than 4 KB and that can be calculated as (1 << N) or (3 << N) sizes. lc - The number of literal context bits (high bits of previous literal). It can be in the range from 0 to 8. The default value is 3. Sometimes lc=4 gives the gain for big files. lp - The number of literal pos bits (low bits of current position for literals). It can be in the range from 0 to 4. The default value is 0. The lp switch is intended for periodical data when the period is equal to 2^lp. For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's better to set lc=0, if you change lp switch. pb - The number of pos bits (low bits of current position). It can be in the range from 0 to 4. The default value is 2. The pb switch is intended for periodical data when the period is equal 2^pb. fb - Word size (the number of fast bytes). It can be in the range from 5 to 273. The default value is 32. Usually, a big number gives a little bit better compression ratio and slower compression process. numThreads - The number of thereads. 1 or 2. The default value is 2. Fast mode (algo = 0) can use only 1 thread. Out: destLen - processed output size Returns: SZ_OK - OK SZ_ERROR_MEM - Memory allocation error SZ_ERROR_PARAM - Incorrect paramater SZ_ERROR_OUTPUT_EOF - output buffer overflow SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) */ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ int level, /* 0 <= level <= 9, default = 5 */ unsigned dictSize, /* default = (1 << 24) */ int lc, /* 0 <= lc <= 8, default = 3 */ int lp, /* 0 <= lp <= 4, default = 0 */ int pb, /* 0 <= pb <= 4, default = 2 */ int fb, /* 5 <= fb <= 273, default = 32 */ int numThreads /* 1 or 2, default = 2 */ ); /* LzmaUncompress -------------- In: dest - output data destLen - output data size src - input data srcLen - input data size Out: destLen - processed output size srcLen - processed input size Returns: SZ_OK - OK SZ_ERROR_DATA - Data error SZ_ERROR_MEM - Memory allocation arror SZ_ERROR_UNSUPPORTED - Unsupported properties SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) */ MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, const unsigned char *props, size_t propsSize); #ifdef __cplusplus } #endif #endif lrzip-0.651/lzma/C/Makefile.am000066400000000000000000000022551421175057200160560ustar00rootroot00000000000000MAINTAINERCLEANFILES = Makefile.in # Update -D AM_CFLAGS = \ -D_REENTRANT \ -I@top_builddir@ \ -I@top_srcdir@ ASM_S = ASM_7z = C_S = if USE_ASM ASM_7z += 7zCrcOpt_asm ASM_S += @abs_top_srcdir@/lzma/ASM/x86/$(ASM_7z).asm C_S += 7zCrcT8.c else C_S += 7zCrc.c endif noinst_LTLIBRARIES = liblzma.la # need separate variable for ASM so that make will compile later # to prevent an error even if -j## is used. liblzma_la_SOURCES = \ $(C_S) \ 7zCrc.h \ LzmaDec.h \ LzmaEnc.h \ LzFind.c \ LzFind.h \ LzFindMt.c \ LzFindMt.h \ LzmaDec.c \ LzmaEnc.c \ LzmaLib.c \ LzmaLib.h \ Alloc.c \ Alloc.h \ Threads.c \ Threads.h \ Types.h \ LzHash.h \ windows.h \ basetyps.h \ MyWindows.h \ MyGuidDef.h ## hack to force asm compilation and to trick libtool with .lo file if USE_ASM liblzma_la_LIBADD = $(ASM_7z).lo 7ZIPASMLOFILE := \ \# $(ASM_7z).lo - a libtool object file\ \n\# Generated by libtool -- hack to allow asm linking\ \n\# Peter Hyman\ \npic_object='.libs/$(ASM_7z).o'\ \nnon_pic_object='$(ASM_7z).o' $(ASM_7z).lo: $(ASM_S) $(ASM_PROG) $(ASM_OPT) -o $(ASM_7z).o $(ASM_S) mkdir -p .libs cp $(ASM_7z).o .libs/ @echo -e "$(7ZIPASMLOFILE)" > $(ASM_7z).lo endif lrzip-0.651/lzma/C/MyGuidDef.h000066400000000000000000000021221421175057200160010ustar00rootroot00000000000000// Common/MyGuidDef.h #ifndef GUID_DEFINED #define GUID_DEFINED #include "Types.h" typedef int HRes; // from Types.h typedef struct { UInt32 Data1; UInt16 Data2; UInt16 Data3; unsigned char Data4[8]; } GUID; #ifdef __cplusplus #define REFGUID const GUID & #else #define REFGUID const GUID * #endif #define REFCLSID REFGUID #define REFIID REFGUID #ifdef __cplusplus inline int operator==(REFGUID g1, REFGUID g2) { for (int i = 0; i < (int)sizeof(g1); i++) if (((unsigned char *)&g1)[i] != ((unsigned char *)&g2)[i]) return 0; return 1; } inline int operator!=(REFGUID g1, REFGUID g2) { return !(g1 == g2); } #endif #ifdef __cplusplus #define MY_EXTERN_C extern "C" #else #define MY_EXTERN_C extern #endif #endif // GUID_DEFINED #ifdef DEFINE_GUID #undef DEFINE_GUID #endif #ifdef INITGUID #define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ MY_EXTERN_C const GUID name = { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 } } #else #define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ MY_EXTERN_C const GUID name #endif lrzip-0.651/lzma/C/MyWindows.h000066400000000000000000000110011421175057200161200ustar00rootroot00000000000000// MyWindows.h #ifndef __MYWINDOWS_H #define __MYWINDOWS_H #ifdef _WIN32 #include #define CHAR_PATH_SEPARATOR '\\' #define WCHAR_PATH_SEPARATOR L'\\' #define STRING_PATH_SEPARATOR "\\" #define WSTRING_PATH_SEPARATOR L"\\" #else #define CHAR_PATH_SEPARATOR '/' #define WCHAR_PATH_SEPARATOR L'/' #define STRING_PATH_SEPARATOR "/" #define WSTRING_PATH_SEPARATOR L"/" #include // for wchar_t #include #include "MyGuidDef.h" typedef char CHAR; typedef unsigned char UCHAR; #undef BYTE typedef unsigned char BYTE; typedef short SHORT; typedef unsigned short USHORT; #undef WORD typedef unsigned short WORD; typedef short VARIANT_BOOL; typedef int INT; typedef Int32 INT32; typedef unsigned int UINT; typedef UInt32 UINT32; typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit typedef UINT32 ULONG; #undef DWORD typedef UINT32 DWORD; typedef Int64 LONGLONG; typedef UInt64 ULONGLONG; typedef struct LARGE_INTEGER { LONGLONG QuadPart; }LARGE_INTEGER; typedef struct _ULARGE_INTEGER { ULONGLONG QuadPart;} ULARGE_INTEGER; typedef const CHAR *LPCSTR; typedef wchar_t WCHAR; #ifdef _UNICODE typedef WCHAR TCHAR; #define lstrcpy wcscpy #define lstrcat wcscat #define lstrlen wcslen #else typedef CHAR TCHAR; #define lstrcpy strcpy #define lstrcat strcat #define lstrlen strlen #endif typedef const TCHAR *LPCTSTR; typedef WCHAR OLECHAR; typedef const WCHAR *LPCWSTR; typedef OLECHAR *BSTR; typedef const OLECHAR *LPCOLESTR; typedef OLECHAR *LPOLESTR; typedef struct _FILETIME { DWORD dwLowDateTime; DWORD dwHighDateTime; }FILETIME; #define HRESULT LONG #define FAILED(Status) ((HRESULT)(Status)<0) typedef ULONG PROPID; typedef LONG SCODE; #define S_OK ((HRESULT)0x00000000L) #define S_FALSE ((HRESULT)0x00000001L) #define E_NOTIMPL ((HRESULT)0x80004001L) #define E_NOINTERFACE ((HRESULT)0x80004002L) #define E_ABORT ((HRESULT)0x80004004L) #define E_FAIL ((HRESULT)0x80004005L) #define STG_E_INVALIDFUNCTION ((HRESULT)0x80030001L) #define E_OUTOFMEMORY ((HRESULT)0x8007000EL) #define E_INVALIDARG ((HRESULT)0x80070057L) #ifdef _MSC_VER #define STDMETHODCALLTYPE __stdcall #else #define STDMETHODCALLTYPE #endif #define STDMETHOD_(t, f) virtual t STDMETHODCALLTYPE f #define STDMETHOD(f) STDMETHOD_(HRESULT, f) #define STDMETHODIMP_(type) type STDMETHODCALLTYPE #define STDMETHODIMP STDMETHODIMP_(HRESULT) #define PURE = 0 #define MIDL_INTERFACE(x) struct #ifdef __cplusplus DEFINE_GUID(IID_IUnknown, 0x00000000, 0x0000, 0x0000, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46); struct IUnknown { STDMETHOD(QueryInterface) (REFIID iid, void **outObject) PURE; STDMETHOD_(ULONG, AddRef)() PURE; STDMETHOD_(ULONG, Release)() PURE; #ifndef _WIN32 virtual ~IUnknown() {} #endif }; typedef IUnknown *LPUNKNOWN; #endif #define VARIANT_TRUE ((VARIANT_BOOL)-1) #define VARIANT_FALSE ((VARIANT_BOOL)0) enum VARENUM { VT_EMPTY = 0, VT_NULL = 1, VT_I2 = 2, VT_I4 = 3, VT_R4 = 4, VT_R8 = 5, VT_CY = 6, VT_DATE = 7, VT_BSTR = 8, VT_DISPATCH = 9, VT_ERROR = 10, VT_BOOL = 11, VT_VARIANT = 12, VT_UNKNOWN = 13, VT_DECIMAL = 14, VT_I1 = 16, VT_UI1 = 17, VT_UI2 = 18, VT_UI4 = 19, VT_I8 = 20, VT_UI8 = 21, VT_INT = 22, VT_UINT = 23, VT_VOID = 24, VT_HRESULT = 25, VT_FILETIME = 64 }; typedef unsigned short VARTYPE; typedef WORD PROPVAR_PAD1; typedef WORD PROPVAR_PAD2; typedef WORD PROPVAR_PAD3; #ifdef __cplusplus typedef struct tagPROPVARIANT { VARTYPE vt; PROPVAR_PAD1 wReserved1; PROPVAR_PAD2 wReserved2; PROPVAR_PAD3 wReserved3; union { CHAR cVal; UCHAR bVal; SHORT iVal; USHORT uiVal; LONG lVal; ULONG ulVal; INT intVal; UINT uintVal; LARGE_INTEGER hVal; ULARGE_INTEGER uhVal; VARIANT_BOOL boolVal; SCODE scode; FILETIME filetime; BSTR bstrVal; }; } PROPVARIANT; typedef PROPVARIANT tagVARIANT; typedef tagVARIANT VARIANT; typedef VARIANT VARIANTARG; MY_EXTERN_C HRESULT VariantClear(VARIANTARG *prop); MY_EXTERN_C HRESULT VariantCopy(VARIANTARG *dest, VARIANTARG *src); #endif MY_EXTERN_C BSTR SysAllocStringByteLen(LPCSTR psz, UINT len); MY_EXTERN_C BSTR SysAllocString(const OLECHAR *sz); MY_EXTERN_C void SysFreeString(BSTR bstr); MY_EXTERN_C UINT SysStringByteLen(BSTR bstr); MY_EXTERN_C UINT SysStringLen(BSTR bstr); /* MY_EXTERN_C DWORD GetLastError(); */ MY_EXTERN_C LONG CompareFileTime(const FILETIME* ft1, const FILETIME* ft2); #define CP_ACP 0 #define CP_OEMCP 1 typedef enum tagSTREAM_SEEK { STREAM_SEEK_SET = 0, STREAM_SEEK_CUR = 1, STREAM_SEEK_END = 2 } STREAM_SEEK; #endif #endif lrzip-0.651/lzma/C/Threads.c000066400000000000000000000340611421175057200155600ustar00rootroot00000000000000/* Threads.c */ #include "Threads.h" #ifdef ENV_BEOS #include #else #include #include #endif #include #if defined(__linux__) #define PTHREAD_MUTEX_ERRORCHECK PTHREAD_MUTEX_ERRORCHECK_NP #endif #ifdef ENV_BEOS /* TODO : optimize the code and verify the returned values */ WRes Thread_Create(CThread *thread, THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE *startAddress)(void *), LPVOID parameter) { thread->_tid = spawn_thread((int32 (*)(void *))startAddress, "CThread", B_LOW_PRIORITY, parameter); if (thread->_tid >= B_OK) { resume_thread(thread->_tid); } else { thread->_tid = B_BAD_THREAD_ID; } thread->_created = 1; return 0; // SZ_OK; } WRes Thread_Wait(CThread *thread) { int ret; if (thread->_created == 0) return EINVAL; if (thread->_tid >= B_OK) { status_t exit_value; wait_for_thread(thread->_tid, &exit_value); thread->_tid = B_BAD_THREAD_ID; } else { return EINVAL; } thread->_created = 0; return 0; } WRes Thread_Close(CThread *thread) { if (!thread->_created) return SZ_OK; thread->_tid = B_BAD_THREAD_ID; thread->_created = 0; return SZ_OK; } WRes Event_Create(CEvent *p, BOOL manualReset, int initialSignaled) { p->_index_waiting = 0; p->_manual_reset = manualReset; p->_state = (initialSignaled ? TRUE : FALSE); p->_created = 1; p->_sem = create_sem(1,"event"); return 0; } WRes Event_Set(CEvent *p) { int index; acquire_sem(p->_sem); p->_state = TRUE; for(index = 0 ; index < p->_index_waiting ; index++) { send_data(p->_waiting[index], '7zCN', NULL, 0); } p->_index_waiting = 0; release_sem(p->_sem); return 0; } WRes Event_Reset(CEvent *p) { acquire_sem(p->_sem); p->_state = FALSE; release_sem(p->_sem); return 0; } WRes Event_Wait(CEvent *p) { acquire_sem(p->_sem); while (p->_state == FALSE) { thread_id sender; p->_waiting[p->_index_waiting++] = find_thread(NULL); release_sem(p->_sem); /* int msg = */ receive_data(&sender, NULL, 0); acquire_sem(p->_sem); } if (p->_manual_reset == FALSE) { p->_state = FALSE; } release_sem(p->_sem); return 0; } WRes Event_Close(CEvent *p) { if (p->_created) { p->_created = 0; delete_sem(p->_sem); } return 0; } WRes Semaphore_Create(CSemaphore *p, UInt32 initiallyCount, UInt32 maxCount) { p->_index_waiting = 0; p->_count = initiallyCount; p->_maxCount = maxCount; p->_created = 1; p->_sem = create_sem(1,"sem"); return 0; } WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) { UInt32 newCount; int index; if (releaseCount < 1) return EINVAL; acquire_sem(p->_sem); newCount = p->_count + releaseCount; if (newCount > p->_maxCount) { release_sem(p->_sem); return EINVAL; } p->_count = newCount; for(index = 0 ; index < p->_index_waiting ; index++) { send_data(p->_waiting[index], '7zCN', NULL, 0); } p->_index_waiting = 0; release_sem(p->_sem); return 0; } WRes Semaphore_Wait(CSemaphore *p) { acquire_sem(p->_sem); while (p->_count < 1) { thread_id sender; p->_waiting[p->_index_waiting++] = find_thread(NULL); release_sem(p->_sem); /* int msg = */ receive_data(&sender, NULL, 0); acquire_sem(p->_sem); } p->_count--; release_sem(p->_sem); return 0; } WRes Semaphore_Close(CSemaphore *p) { if (p->_created) { p->_created = 0; delete_sem(p->_sem); } return 0; } WRes CriticalSection_Init(CCriticalSection * lpCriticalSection) { lpCriticalSection->_sem = create_sem(1,"cc"); return 0; } #else /* !ENV_BEOS */ WRes Thread_Create(CThread *thread, THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE *startAddress)(void *), LPVOID parameter) { pthread_attr_t attr; int ret; thread->_created = 0; ret = pthread_attr_init(&attr); if (ret) return ret; ret = pthread_attr_setdetachstate(&attr,PTHREAD_CREATE_JOINABLE); if (ret) return ret; ret = pthread_create(&thread->_tid, &attr, (void *)startAddress, parameter); /* ret2 = */ pthread_attr_destroy(&attr); if (ret) return ret; thread->_created = 1; return 0; // SZ_OK; } WRes Thread_Wait(CThread *thread) { void *thread_return; int ret; if (thread->_created == 0) return EINVAL; ret = pthread_join(thread->_tid,&thread_return); thread->_created = 0; return ret; } WRes Thread_Close(CThread *thread) { if (!thread->_created) return SZ_OK; pthread_detach(thread->_tid); thread->_tid = 0; thread->_created = 0; return SZ_OK; } #ifdef DEBUG_SYNCHRO #include static void dump_error(int ligne,int ret,const char *text,void *param) { fprintf(stderr, "\n##T%d#ERROR2 (l=%d) %s : param=%p ret = %d (%s)##\n",(int)pthread_self(),ligne,text,param,ret,strerror(ret)); // abort(); } WRes Event_Create(CEvent *p, BOOL manualReset, int initialSignaled) { int ret; pthread_mutexattr_t mutexattr; memset(&mutexattr,0,sizeof(mutexattr)); ret = pthread_mutexattr_init(&mutexattr); if (ret != 0) dump_error(__LINE__,ret,"Event_Create::pthread_mutexattr_init",&mutexattr); ret = pthread_mutexattr_settype(&mutexattr,PTHREAD_MUTEX_ERRORCHECK); if (ret != 0) dump_error(__LINE__,ret,"Event_Create::pthread_mutexattr_settype",&mutexattr); ret = pthread_mutex_init(&p->_mutex,&mutexattr); if (ret != 0) dump_error(__LINE__,ret,"Event_Create::pthread_mutexattr_init",&p->_mutex); if (ret == 0) { ret = pthread_cond_init(&p->_cond,0); if (ret != 0) dump_error(__LINE__,ret,"Event_Create::pthread_cond_init",&p->_cond); p->_manual_reset = manualReset; p->_state = (initialSignaled ? TRUE : FALSE); p->_created = 1; } return ret; } WRes Event_Set(CEvent *p) { int ret = pthread_mutex_lock(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"ES::pthread_mutex_lock",&p->_mutex); if (ret == 0) { p->_state = TRUE; ret = pthread_cond_broadcast(&p->_cond); if (ret != 0) dump_error(__LINE__,ret,"ES::pthread_cond_broadcast",&p->_cond); if (ret == 0) { ret = pthread_mutex_unlock(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"ES::pthread_mutex_unlock",&p->_mutex); } } return ret; } WRes Event_Reset(CEvent *p) { int ret = pthread_mutex_lock(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"ER::pthread_mutex_lock",&p->_mutex); if (ret == 0) { p->_state = FALSE; ret = pthread_mutex_unlock(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"ER::pthread_mutex_unlock",&p->_mutex); } return ret; } WRes Event_Wait(CEvent *p) { int ret = pthread_mutex_lock(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"EW::pthread_mutex_lock",&p->_mutex); if (ret == 0) { while ((p->_state == FALSE) && (ret == 0)) { ret = pthread_cond_wait(&p->_cond, &p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"EW::pthread_cond_wait",&p->_mutex); } if (ret == 0) { if (p->_manual_reset == FALSE) { p->_state = FALSE; } ret = pthread_mutex_unlock(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"EW::pthread_mutex_unlock",&p->_mutex); } } return ret; } WRes Event_Close(CEvent *p) { if (p->_created) { int ret; p->_created = 0; ret = pthread_mutex_destroy(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"EC::pthread_mutex_destroy",&p->_mutex); ret = pthread_cond_destroy(&p->_cond); if (ret != 0) dump_error(__LINE__,ret,"EC::pthread_cond_destroy",&p->_cond); } return 0; } WRes Semaphore_Create(CSemaphore *p, UInt32 initiallyCount, UInt32 maxCount) { int ret; pthread_mutexattr_t mutexattr; memset(&mutexattr,0,sizeof(mutexattr)); ret = pthread_mutexattr_init(&mutexattr); if (ret != 0) dump_error(__LINE__,ret,"SemC::pthread_mutexattr_init",&mutexattr); ret = pthread_mutexattr_settype(&mutexattr,PTHREAD_MUTEX_ERRORCHECK); if (ret != 0) dump_error(__LINE__,ret,"SemC::pthread_mutexattr_settype",&mutexattr); ret = pthread_mutex_init(&p->_mutex,&mutexattr); if (ret != 0) dump_error(__LINE__,ret,"SemC::pthread_mutexattr_init",&p->_mutex); if (ret == 0) { ret = pthread_cond_init(&p->_cond,0); if (ret != 0) dump_error(__LINE__,ret,"SemC::pthread_cond_init",&p->_mutex); p->_count = initiallyCount; p->_maxCount = maxCount; p->_created = 1; } return ret; } WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) { int ret; if (releaseCount < 1) return EINVAL; ret = pthread_mutex_lock(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"SemR::pthread_mutex_lock",&p->_mutex); if (ret == 0) { UInt32 newCount = p->_count + releaseCount; if (newCount > p->_maxCount) { ret = pthread_mutex_unlock(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"SemR::pthread_mutex_unlock",&p->_mutex); return EINVAL; } p->_count = newCount; ret = pthread_cond_broadcast(&p->_cond); if (ret != 0) dump_error(__LINE__,ret,"SemR::pthread_cond_broadcast",&p->_cond); if (ret == 0) { ret = pthread_mutex_unlock(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"SemR::pthread_mutex_unlock",&p->_mutex); } } return ret; } WRes Semaphore_Wait(CSemaphore *p) { int ret = pthread_mutex_lock(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"SemW::pthread_mutex_lock",&p->_mutex); if (ret == 0) { while ((p->_count < 1) && (ret == 0)) { ret = pthread_cond_wait(&p->_cond, &p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"SemW::pthread_cond_wait",&p->_mutex); } if (ret == 0) { p->_count--; ret = pthread_mutex_unlock(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"SemW::pthread_mutex_unlock",&p->_mutex); } } return ret; } WRes Semaphore_Close(CSemaphore *p) { if (p->_created) { int ret; p->_created = 0; ret = pthread_mutex_destroy(&p->_mutex); if (ret != 0) dump_error(__LINE__,ret,"Semc::pthread_mutex_destroy",&p->_mutex); ret = pthread_cond_destroy(&p->_cond); if (ret != 0) dump_error(__LINE__,ret,"Semc::pthread_cond_destroy",&p->_cond); } return 0; } WRes CriticalSection_Init(CCriticalSection * lpCriticalSection) { if (lpCriticalSection) { int ret; pthread_mutexattr_t mutexattr; memset(&mutexattr,0,sizeof(mutexattr)); ret = pthread_mutexattr_init(&mutexattr); if (ret != 0) dump_error(__LINE__,ret,"CS I::pthread_mutexattr_init",&mutexattr); ret = pthread_mutexattr_settype(&mutexattr,PTHREAD_MUTEX_ERRORCHECK); if (ret != 0) dump_error(__LINE__,ret,"CS I::pthread_mutexattr_settype",&mutexattr); ret = pthread_mutex_init(&lpCriticalSection->_mutex,&mutexattr); if (ret != 0) dump_error(__LINE__,ret,"CS I::pthread_mutexattr_init",&lpCriticalSection->_mutex); return ret; } return EINTR; } void CriticalSection_Enter(CCriticalSection * lpCriticalSection) { if (lpCriticalSection) { int ret = pthread_mutex_lock(&(lpCriticalSection->_mutex)); if (ret != 0) dump_error(__LINE__,ret,"CS::pthread_mutex_lock",&(lpCriticalSection->_mutex)); } } void CriticalSection_Leave(CCriticalSection * lpCriticalSection) { if (lpCriticalSection) { int ret = pthread_mutex_unlock(&(lpCriticalSection->_mutex)); if (ret != 0) dump_error(__LINE__,ret,"CS::pthread_mutex_unlock",&(lpCriticalSection->_mutex)); } } void CriticalSection_Delete(CCriticalSection * lpCriticalSection) { if (lpCriticalSection) { int ret = pthread_mutex_destroy(&(lpCriticalSection->_mutex)); if (ret != 0) dump_error(__LINE__,ret,"CS::pthread_mutex_destroy",&(lpCriticalSection->_mutex)); } } #else WRes Event_Create(CEvent *p, BOOL manualReset, int initialSignaled) { pthread_mutex_init(&p->_mutex,0); pthread_cond_init(&p->_cond,0); p->_manual_reset = manualReset; p->_state = (initialSignaled ? TRUE : FALSE); p->_created = 1; return 0; } WRes Event_Set(CEvent *p) { pthread_mutex_lock(&p->_mutex); p->_state = TRUE; pthread_cond_broadcast(&p->_cond); pthread_mutex_unlock(&p->_mutex); return 0; } WRes Event_Reset(CEvent *p) { pthread_mutex_lock(&p->_mutex); p->_state = FALSE; pthread_mutex_unlock(&p->_mutex); return 0; } WRes Event_Wait(CEvent *p) { pthread_mutex_lock(&p->_mutex); while (p->_state == FALSE) { pthread_cond_wait(&p->_cond, &p->_mutex); } if (p->_manual_reset == FALSE) { p->_state = FALSE; } pthread_mutex_unlock(&p->_mutex); return 0; } WRes Event_Close(CEvent *p) { if (p->_created) { p->_created = 0; pthread_mutex_destroy(&p->_mutex); pthread_cond_destroy(&p->_cond); } return 0; } WRes Semaphore_Create(CSemaphore *p, UInt32 initiallyCount, UInt32 maxCount) { pthread_mutex_init(&p->_mutex,0); pthread_cond_init(&p->_cond,0); p->_count = initiallyCount; p->_maxCount = maxCount; p->_created = 1; return 0; } WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) { UInt32 newCount; if (releaseCount < 1) return EINVAL; pthread_mutex_lock(&p->_mutex); newCount = p->_count + releaseCount; if (newCount > p->_maxCount) { pthread_mutex_unlock(&p->_mutex); return EINVAL; } p->_count = newCount; pthread_cond_broadcast(&p->_cond); pthread_mutex_unlock(&p->_mutex); return 0; } WRes Semaphore_Wait(CSemaphore *p) { pthread_mutex_lock(&p->_mutex); while (p->_count < 1) { pthread_cond_wait(&p->_cond, &p->_mutex); } p->_count--; pthread_mutex_unlock(&p->_mutex); return 0; } WRes Semaphore_Close(CSemaphore *p) { if (p->_created) { p->_created = 0; pthread_mutex_destroy(&p->_mutex); pthread_cond_destroy(&p->_cond); } return 0; } WRes CriticalSection_Init(CCriticalSection * lpCriticalSection) { return pthread_mutex_init(&(lpCriticalSection->_mutex),0); } #endif /* DEBUG_SYNCHRO */ #endif /* ENV_BEOS */ WRes ManualResetEvent_Create(CManualResetEvent *p, int initialSignaled) { return Event_Create(p, TRUE, initialSignaled); } WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) { return ManualResetEvent_Create(p, 0); } WRes AutoResetEvent_Create(CAutoResetEvent *p, int initialSignaled) { return Event_Create(p, FALSE, initialSignaled); } WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); } lrzip-0.651/lzma/C/Threads.h000066400000000000000000000057331421175057200155710ustar00rootroot00000000000000/* Threads.h -- multithreading library 2008-11-22 : Igor Pavlov : Public domain */ #ifndef __7Z_THRESDS_H #define __7Z_THRESDS_H #include "Types.h" #include "windows.h" #ifdef ENV_BEOS #include #define MAX_THREAD 256 #else #include #endif /* #define DEBUG_SYNCHRO 1 */ typedef struct _CThread { #ifdef ENV_BEOS thread_id _tid; #else pthread_t _tid; #endif int _created; } CThread; #define Thread_Construct(thread) (thread)->_created = 0 #define Thread_WasCreated(thread) ((thread)->_created != 0) typedef unsigned THREAD_FUNC_RET_TYPE; #define THREAD_FUNC_CALL_TYPE MY_STD_CALL #define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE WRes Thread_Create(CThread *thread, THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE *startAddress)(void *), LPVOID parameter); WRes Thread_Wait(CThread *thread); WRes Thread_Close(CThread *thread); typedef struct _CEvent { int _created; int _manual_reset; int _state; #ifdef ENV_BEOS thread_id _waiting[MAX_THREAD]; int _index_waiting; sem_id _sem; #else pthread_mutex_t _mutex; pthread_cond_t _cond; #endif } CEvent; typedef CEvent CAutoResetEvent; typedef CEvent CManualResetEvent; #define Event_Construct(event) (event)->_created = 0 #define Event_IsCreated(event) ((event)->_created) WRes ManualResetEvent_Create(CManualResetEvent *event, int initialSignaled); WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *event); WRes AutoResetEvent_Create(CAutoResetEvent *event, int initialSignaled); WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *event); WRes Event_Set(CEvent *event); WRes Event_Reset(CEvent *event); WRes Event_Wait(CEvent *event); WRes Event_Close(CEvent *event); typedef struct _CSemaphore { int _created; UInt32 _count; UInt32 _maxCount; #ifdef ENV_BEOS thread_id _waiting[MAX_THREAD]; int _index_waiting; sem_id _sem; #else pthread_mutex_t _mutex; pthread_cond_t _cond; #endif } CSemaphore; #define Semaphore_Construct(p) (p)->_created = 0 WRes Semaphore_Create(CSemaphore *p, UInt32 initiallyCount, UInt32 maxCount); WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); #define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1) WRes Semaphore_Wait(CSemaphore *p); WRes Semaphore_Close(CSemaphore *p); typedef struct { #ifdef ENV_BEOS sem_id _sem; #else pthread_mutex_t _mutex; #endif } CCriticalSection; WRes CriticalSection_Init(CCriticalSection *p); #ifdef ENV_BEOS #define CriticalSection_Delete(p) delete_sem((p)->_sem) #define CriticalSection_Enter(p) acquire_sem((p)->_sem) #define CriticalSection_Leave(p) release_sem((p)->_sem) #else #ifdef DEBUG_SYNCHRO void CriticalSection_Delete(CCriticalSection *); void CriticalSection_Enter(CCriticalSection *); void CriticalSection_Leave(CCriticalSection *); #else #define CriticalSection_Delete(p) pthread_mutex_destroy(&((p)->_mutex)) #define CriticalSection_Enter(p) pthread_mutex_lock(&((p)->_mutex)) #define CriticalSection_Leave(p) pthread_mutex_unlock(&((p)->_mutex)) #endif #endif #endif lrzip-0.651/lzma/C/Types.h000066400000000000000000000116011421175057200152720ustar00rootroot00000000000000/* Types.h -- Basic types 2009-08-14 : Igor Pavlov : Public domain */ #ifndef __7Z_TYPES_H #define __7Z_TYPES_H #include #ifdef _WIN32 #include #endif #ifndef EXTERN_C_BEGIN #ifdef __cplusplus #define EXTERN_C_BEGIN extern "C" { #define EXTERN_C_END } #else #define EXTERN_C_BEGIN #define EXTERN_C_END #endif #endif EXTERN_C_BEGIN #define SZ_OK 0 #define SZ_ERROR_DATA 1 #define SZ_ERROR_MEM 2 #define SZ_ERROR_CRC 3 #define SZ_ERROR_UNSUPPORTED 4 #define SZ_ERROR_PARAM 5 #define SZ_ERROR_INPUT_EOF 6 #define SZ_ERROR_OUTPUT_EOF 7 #define SZ_ERROR_READ 8 #define SZ_ERROR_WRITE 9 #define SZ_ERROR_PROGRESS 10 #define SZ_ERROR_FAIL 11 #define SZ_ERROR_THREAD 12 #define SZ_ERROR_ARCHIVE 16 #define SZ_ERROR_NO_ARCHIVE 17 typedef int SRes; #ifdef _WIN32 typedef DWORD WRes; #else typedef int WRes; typedef void * HANDLE; #endif #ifndef RINOK #define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } #endif /* zconf.h defines Byte. Don't redefine if it's included. */ #ifndef ZCONF_H #ifndef _ZCONF_H typedef unsigned char Byte; #endif #endif typedef short Int16; typedef unsigned short UInt16; #ifdef _LZMA_UINT32_IS_ULONG typedef long Int32; typedef unsigned long UInt32; #else typedef int Int32; typedef unsigned int UInt32; #endif #ifdef _SZ_NO_INT_64 /* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. NOTES: Some code will work incorrectly in that case! */ typedef long Int64; typedef unsigned long UInt64; #else #if defined(_MSC_VER) || defined(__BORLANDC__) typedef __int64 Int64; typedef unsigned __int64 UInt64; #else typedef long long int Int64; typedef unsigned long long int UInt64; #endif #endif #ifdef _LZMA_NO_SYSTEM_SIZE_T typedef UInt32 SizeT; #else typedef size_t SizeT; #endif typedef int Bool; #define True 1 #define False 0 #ifdef _MSC_VER #if _MSC_VER >= 1300 #define MY_NO_INLINE __declspec(noinline) #else #define MY_NO_INLINE #endif #define MY_CDECL __cdecl #define MY_STD_CALL __stdcall #define MY_FAST_CALL MY_NO_INLINE __fastcall #else #define MY_CDECL #define MY_STD_CALL #define MY_FAST_CALL #endif /* The following interfaces use first parameter as pointer to structure */ typedef struct { SRes (*Read)(void *p, void *buf, size_t *size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) < input(*size)) is allowed */ } ISeqInStream; /* it can return SZ_ERROR_INPUT_EOF */ SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size); SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType); SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf); typedef struct { size_t (*Write)(void *p, const void *buf, size_t size); /* Returns: result - the number of actually written bytes. (result < size) means error */ } ISeqOutStream; typedef enum { SZ_SEEK_SET = 0, SZ_SEEK_CUR = 1, SZ_SEEK_END = 2 } ESzSeek; typedef struct { SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); } ISeekInStream; typedef struct { SRes (*Look)(void *p, void **buf, size_t *size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) > input(*size)) is not allowed (output(*size) < input(*size)) is allowed */ SRes (*Skip)(void *p, size_t offset); /* offset must be <= output(*size) of Look */ SRes (*Read)(void *p, void *buf, size_t *size); /* reads directly (without buffer). It's same as ISeqInStream::Read */ SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); } ILookInStream; SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size); SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset); /* reads via ILookInStream::Read */ SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType); SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size); #define LookToRead_BUF_SIZE (1 << 14) typedef struct { ILookInStream s; ISeekInStream *realStream; size_t pos; size_t size; Byte buf[LookToRead_BUF_SIZE]; } CLookToRead; void LookToRead_CreateVTable(CLookToRead *p, int lookahead); void LookToRead_Init(CLookToRead *p); typedef struct { ISeqInStream s; ILookInStream *realStream; } CSecToLook; void SecToLook_CreateVTable(CSecToLook *p); typedef struct { ISeqInStream s; ILookInStream *realStream; } CSecToRead; void SecToRead_CreateVTable(CSecToRead *p); typedef struct { SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize); /* Returns: result. (result != SZ_OK) means break. Value (UInt64)(Int64)-1 for size means unknown value. */ } ICompressProgress; typedef struct { void *(*Alloc)(void *p, size_t size); void (*Free)(void *p, void *address); /* address can be 0 */ } ISzAlloc; #define IAlloc_Alloc(p, size) (p)->Alloc((p), size) #define IAlloc_Free(p, a) (p)->Free((p), a) EXTERN_C_END #endif lrzip-0.651/lzma/C/basetyps.h000066400000000000000000000005441421175057200160240ustar00rootroot00000000000000#ifndef _BASETYPS_H #define _BASETYPS_H #ifdef HAVE_GCCVISIBILITYPATCH #define DLLEXPORT __attribute__ ((visibility("default"))) #else #define DLLEXPORT #endif #ifdef __cplusplus #define STDAPI extern "C" DLLEXPORT HRESULT #else #define STDAPI extern DLLEXPORT HRESULT #endif /* __cplusplus */ typedef GUID IID; typedef GUID CLSID; #endif lrzip-0.651/lzma/C/windows.h000066400000000000000000000113341421175057200156630ustar00rootroot00000000000000/* windows.h - main header file for the Win32 API Written by Anders Norlander This file is part of a free library for the Win32 API. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef _WINDOWS_H #define _WINDOWS_H #include /* BEGIN #include */ #include "MyWindows.h" // FIXED #ifndef CONST #define CONST const #endif #undef MAX_PATH #define MAX_PATH 4096 /* Linux : 4096 - Windows : 260 */ #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE #define TRUE 1 #endif #define WINAPI #undef BOOL typedef int BOOL; /* BEGIN #include */ /* BEGIN */ #define NO_ERROR 0L #define ERROR_ALREADY_EXISTS EEXIST #define ERROR_FILE_EXISTS EEXIST #define ERROR_INVALID_HANDLE EBADF #define ERROR_PATH_NOT_FOUND ENOENT #define ERROR_DISK_FULL ENOSPC #define ERROR_NO_MORE_FILES 0x100123 // FIXME /* see Common/WyWindows.h #define S_OK ((HRESULT)0x00000000L) #define S_FALSE ((HRESULT)0x00000001L) #define E_INVALIDARG ((HRESULT)0x80070057L) #define E_NOTIMPL ((HRESULT)0x80004001L) #define E_NOINTERFACE ((HRESULT)0x80004002L) #define E_ABORT ((HRESULT)0x80004004L) #define E_FAIL ((HRESULT)0x80004005L) #define E_OUTOFMEMORY ((HRESULT)0x8007000EL) #define STG_E_INVALIDFUNCTION ((HRESULT)0x80030001L) #define SUCCEEDED(Status) ((HRESULT)(Status) >= 0) #define FAILED(Status) ((HRESULT)(Status)<0) */ #ifndef VOID #define VOID void #endif typedef void *PVOID,*LPVOID; typedef WCHAR *LPWSTR; typedef CHAR *LPSTR; typedef TCHAR *LPTSTR; #ifdef UNICODE /* * P7ZIP_TEXT is a private macro whose specific use is to force the expansion of a * macro passed as an argument to the macro TEXT. DO NOT use this * macro within your programs. It's name and function could change without * notice. */ #define P7ZIP_TEXT(q) L##q #else #define P7ZIP_TEXT(q) q #endif /* * UNICODE a constant string when UNICODE is defined, else returns the string * unmodified. * The corresponding macros _TEXT() and _T() for mapping _UNICODE strings * passed to C runtime functions are defined in mingw/tchar.h */ #define TEXT(q) P7ZIP_TEXT(q) typedef BYTE BOOLEAN; /* BEGIN #include */ #ifndef __int64 #define __int64 long long #endif typedef unsigned __int64 UINT64; typedef __int64 INT64; /* END #include */ #define FILE_ATTRIBUTE_READONLY 1 #define FILE_ATTRIBUTE_HIDDEN 2 #define FILE_ATTRIBUTE_SYSTEM 4 #define FILE_ATTRIBUTE_DIRECTORY 16 #define FILE_ATTRIBUTE_ARCHIVE 32 #define FILE_ATTRIBUTE_DEVICE 64 #define FILE_ATTRIBUTE_NORMAL 128 #define FILE_ATTRIBUTE_TEMPORARY 256 #define FILE_ATTRIBUTE_SPARSE_FILE 512 #define FILE_ATTRIBUTE_REPARSE_POINT 1024 #define FILE_ATTRIBUTE_COMPRESSED 2048 #define FILE_ATTRIBUTE_OFFLINE 0x1000 #define FILE_ATTRIBUTE_ENCRYPTED 0x4000 #define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */ /* END */ #include #include /* END #include */ /* END #include */ /* BEGIN #include */ #define WAIT_OBJECT_0 0 #define INFINITE 0xFFFFFFFF typedef struct _SYSTEMTIME { WORD wYear; WORD wMonth; WORD wDayOfWeek; WORD wDay; WORD wHour; WORD wMinute; WORD wSecond; WORD wMilliseconds; } SYSTEMTIME; #ifdef __cplusplus extern "C" { #endif BOOL WINAPI DosDateTimeToFileTime(WORD,WORD,FILETIME *); BOOL WINAPI FileTimeToDosDateTime(CONST FILETIME *,WORD *, WORD *); BOOL WINAPI FileTimeToLocalFileTime(CONST FILETIME *,FILETIME *); BOOL WINAPI FileTimeToSystemTime(CONST FILETIME *,SYSTEMTIME *); BOOL WINAPI LocalFileTimeToFileTime(CONST FILETIME *,FILETIME *); VOID WINAPI GetSystemTime(SYSTEMTIME *); BOOL WINAPI SystemTimeToFileTime(const SYSTEMTIME*,FILETIME *); DWORD WINAPI GetTickCount(VOID); #ifdef __cplusplus } #endif /* END #include */ /* BEGIN #include */ #define CP_ACP 0 #define CP_OEMCP 1 #define CP_UTF8 65001 /* #include */ #include struct IEnumSTATPROPSTG; typedef struct tagSTATPROPSTG { LPOLESTR lpwstrName; PROPID propid; VARTYPE vt; } STATPROPSTG; #ifdef __cplusplus extern "C" const IID IID_ISequentialStream; struct ISequentialStream : public IUnknown { STDMETHOD(QueryInterface)(REFIID,PVOID*) PURE; STDMETHOD_(ULONG,AddRef)(void) PURE; STDMETHOD_(ULONG,Release)(void) PURE; STDMETHOD(Read)(void*,ULONG,ULONG*) PURE; STDMETHOD(Write)(void const*,ULONG,ULONG*) PURE; }; #else extern const IID IID_ISequentialStream; #endif /* __cplusplus */ /* END #include */ #endif lrzip-0.651/lzma/Makefile.am000066400000000000000000000003461421175057200156730ustar00rootroot00000000000000SUBDIRS = C ASM/x86 MAINTAINERCLEANFILES = Makefile.in lzmadocdir = @docdir@/lzma lzmadoc_DATA = \ 7zC.txt \ 7zFormat.txt \ Methods.txt \ history.txt \ lzma.txt \ README \ README-Alloc EXTRA_DIST = $(lzmadoc_DATA) lrzip-0.651/lzma/Methods.txt000066400000000000000000000053501421175057200160030ustar00rootroot000000000000007-Zip method IDs (9.18) ----------------------- Each compression or crypto method in 7z has unique binary value (ID). The length of ID in bytes is arbitrary but it can not exceed 63 bits (8 bytes). If you want to add some new ID, you have two ways: 1) Write request for allocating IDs to 7-zip developers. 2) Generate 8-bytes ID: 3F ZZ ZZ ZZ ZZ ZZ MM MM 3F - Prefix for random IDs (1 byte) ZZ ZZ ZZ ZZ ZZ - Developer ID (5 bytes). Use real random bytes. MM MM - Method ID (2 bytes) You can notify 7-Zip developers about your Developer ID / Method ID. Note: Use new ID only if old codec can not decode data encoded with new version. List of defined IDs ------------------- 00 - Copy 03 - Delta 04 - x86 (BCJ) 05 - PPC (Big Endian) 06 - IA64 07 - ARM (little endian) 08 - ARM Thumb (little endian) 09 - SPARC 21 - LZMA2 02.. - Common 03 Swap - 2 Swap2 - 4 Swap4 03.. - 7z 01 - LZMA 01 - Version 03 - Branch 01 - x86 03 - BCJ 1B - BCJ2 02 - PPC 05 - PPC (Big Endian) 03 - Alpha 01 - Alpha 04 - IA64 01 - IA64 05 - ARM 01 - ARM 06 - M68 05 - M68 (Big Endian) 07 - ARM Thumb 01 - ARMT 08 - SPARC 05 - SPARC 04 - PPMD 01 - Version 7F - 01 - experimental methods. 04.. - Misc 00 - Reserved 01 - Zip 00 - Copy (not used). Use {00} instead 01 - Shrink 06 - Implode 08 - Deflate 09 - Deflate64 10 - Imploding 12 - BZip2 (not used). Use {04 02 02} instead 14 - LZMA 60 - Jpeg 61 - WavPack 62 - PPMd 63 - wzAES 02 - BZip 02 - BZip2 03 - Rar 01 - Rar15 02 - Rar20 03 - Rar29 04 - Arj 01 - Arj (1,2,3) 02 - Arj 4 05 - Z 06 - Lzh 07 - Reserved for 7z 08 - Cab 09 - NSIS 01 - DeflateNSIS 02 - BZip2NSIS 06.. - Crypto 00 - 01 - AES 0x - AES-128 4x - AES-192 8x - AES-256 Cx - AES x0 - ECB x1 - CBC x2 - CFB x3 - OFB 07 - Reserved 0F - Reserved F0 - Misc Ciphers (Real Ciphers without hashing algo) F1 - Misc Ciphers (Combine) 01 - Zip 01 - Main Zip crypto algo 03 - RAR 02 - 03 - Rar29 AES-128 + (modified SHA-1) 07 - 7z 01 - AES-256 + SHA-256 07.. - Hash (subject to change) 00 - 01 - CRC 02 - SHA-1 03 - SHA-256 04 - SHA-384 05 - SHA-512 F0 - Misc Hash F1 - Misc 03 - RAR 03 - Rar29 Password Hashing (modified SHA1) 07 - 7z 01 - SHA-256 Password Hashing --- End of document lrzip-0.651/lzma/README000066400000000000000000000022211421175057200145110ustar00rootroot00000000000000JANUARY 2009 This is an updated LZMA library wrapper provided with SDK 4.63. The SDK is available here: http://www.7-zip.org/sdk.html. It is written completely in C and compilation and integration is much simpler. To enable multithreading support, compile with COMPRESS_MF_MT and _REENTRANT defined. MF=Match Finder, MT=Multi Thread. In addition, link in pthread. This is default behavior in lrzip. For single thread support, remove these defines in the Makefile. Some additional documentation is provided from the SDK. File ./C/7zCrcT8.c is added to support ASM CRC code. Taken from p7zip.org. Original README text follows. This is a zlib like library for the lzma encoder/decoder originally created by Oleg I. Vdovikin and modified for lrzip by Con Kolivas It is based on a stripped down source tree of the lzma SDK by Igor Pavlov. http://www.7-zip.org You can build a standalone library called liblzma.a which gives functions equivalent to compress2() and uncompress() called lzma_compress() and lzma_uncompress(). Updated for recent SDK 4.57 and added assembler routines for crc using p7zip.org variant by Peter Hyman lrzip-0.651/lzma/README-Alloc000066400000000000000000000023041421175057200155430ustar00rootroot00000000000000README for Memory Allocation Debugging If it is necessary or desired to debug the memory allocation process in LZMA, edit the file C/Alloc.c and uncomment the line: /* #define _SZ_ALLOC_DEBUG */ Then, add this to the Makefile and relink. This output will show chunks of memory Alloc uses during LZMA compression. Output will appear similar to this: Alloc 284484 bytes, count = 0, addr = 44251008 Alloc 65536 bytes, count = 1, addr = 80636F0 Alloc 12288 bytes, count = 2, addr = 80736F8 Alloc 12288 bytes, count = 3, addr = 8076700 Alloc 4456448 bytes, count = 4, addr = 43E10008 Alloc 102877690 bytes, count = 5, addr = 3DBF3008 Alloc 604246024 bytes, count = 6, addr = 19BB1008 Free; count = 6, addr = 43E10008 Free; count = 5, addr = 19BB1008 Free; count = 4, addr = 3DBF3008 Free; count = 3, addr = 80736F8 Free; count = 2, addr = 8076700 Free; count = 1, addr = 80636F0 Free; count = 0, addr = 44251008 As you can see, LZMA takes large chunks of ram and sometimes it can use more than what is available and return an SZ_ERROR_MEM (2) code. lrzip-0.651/lzma/history.txt000066400000000000000000000156741421175057200161130ustar00rootroot00000000000000HISTORY of the LZMA SDK ----------------------- 9.18 beta 2010-11-02 ------------------------- - New small SFX module for installers (SfxSetup). 9.12 beta 2010-03-24 ------------------------- - The BUG in LZMA SDK 9.* was fixed: LZMA2 codec didn't work, if more than 10 threads were used (or more than 20 threads in some modes). 9.11 beta 2010-03-15 ------------------------- - PPMd compression method support 9.09 2009-12-12 ------------------------- - The bug was fixed: Utf16_To_Utf8 funstions in UTFConvert.cpp and 7zMain.c incorrectly converted surrogate characters (the code >= 0x10000) to UTF-8. - Some bugs were fixed 9.06 2009-08-17 ------------------------- - Some changes in ANSI-C 7z Decoder interfaces. 9.04 2009-05-30 ------------------------- - LZMA2 compression method support - xz format support 4.65 2009-02-03 ------------------------- - Some minor fixes 4.63 2008-12-31 ------------------------- - Some minor fixes 4.61 beta 2008-11-23 ------------------------- - The bug in ANSI-C LZMA Decoder was fixed: If encoded stream was corrupted, decoder could access memory outside of allocated range. - Some changes in ANSI-C 7z Decoder interfaces. - LZMA SDK is placed in the public domain. 4.60 beta 2008-08-19 ------------------------- - Some minor fixes. 4.59 beta 2008-08-13 ------------------------- - The bug was fixed: LZMA Encoder in fast compression mode could access memory outside of allocated range in some rare cases. 4.58 beta 2008-05-05 ------------------------- - ANSI-C LZMA Decoder was rewritten for speed optimizations. - ANSI-C LZMA Encoder was included to LZMA SDK. - C++ LZMA code now is just wrapper over ANSI-C code. 4.57 2007-12-12 ------------------------- - Speed optimizations in Ñ++ LZMA Decoder. - Small changes for more compatibility with some C/C++ compilers. 4.49 beta 2007-07-05 ------------------------- - .7z ANSI-C Decoder: - now it supports BCJ and BCJ2 filters - now it supports files larger than 4 GB. - now it supports "Last Write Time" field for files. - C++ code for .7z archives compressing/decompressing from 7-zip was included to LZMA SDK. 4.43 2006-06-04 ------------------------- - Small changes for more compatibility with some C/C++ compilers. 4.42 2006-05-15 ------------------------- - Small changes in .h files in ANSI-C version. 4.39 beta 2006-04-14 ------------------------- - The bug in versions 4.33b:4.38b was fixed: C++ version of LZMA encoder could not correctly compress files larger than 2 GB with HC4 match finder (-mfhc4). 4.37 beta 2005-04-06 ------------------------- - Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined. 4.35 beta 2005-03-02 ------------------------- - The bug was fixed in C++ version of LZMA Decoder: If encoded stream was corrupted, decoder could access memory outside of allocated range. 4.34 beta 2006-02-27 ------------------------- - Compressing speed and memory requirements for compressing were increased - LZMA now can use only these match finders: HC4, BT2, BT3, BT4 4.32 2005-12-09 ------------------------- - Java version of LZMA SDK was included 4.30 2005-11-20 ------------------------- - Compression ratio was improved in -a2 mode - Speed optimizations for compressing in -a2 mode - -fb switch now supports values up to 273 - The bug in 7z_C (7zIn.c) was fixed: It used Alloc/Free functions from different memory pools. So if program used two memory pools, it worked incorrectly. - 7z_C: .7z format supporting was improved - LZMA# SDK (C#.NET version) was included 4.27 (Updated) 2005-09-21 ------------------------- - Some GUIDs/interfaces in C++ were changed. IStream.h: ISequentialInStream::Read now works as old ReadPart ISequentialOutStream::Write now works as old WritePart 4.27 2005-08-07 ------------------------- - The bug in LzmaDecodeSize.c was fixed: if _LZMA_IN_CB and _LZMA_OUT_READ were defined, decompressing worked incorrectly. 4.26 2005-08-05 ------------------------- - Fixes in 7z_C code and LzmaTest.c: previous versions could work incorrectly, if malloc(0) returns 0 4.23 2005-06-29 ------------------------- - Small fixes in C++ code 4.22 2005-06-10 ------------------------- - Small fixes 4.21 2005-06-08 ------------------------- - Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed - New additional version of ANSI-C LZMA Decoder with zlib-like interface: - LzmaStateDecode.h - LzmaStateDecode.c - LzmaStateTest.c - ANSI-C LZMA Decoder now can decompress files larger than 4 GB 4.17 2005-04-18 ------------------------- - New example for RAM->RAM compressing/decompressing: LZMA + BCJ (filter for x86 code): - LzmaRam.h - LzmaRam.cpp - LzmaRamDecode.h - LzmaRamDecode.c - -f86 switch for lzma.exe 4.16 2005-03-29 ------------------------- - The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder): If _LZMA_OUT_READ was defined, and if encoded stream was corrupted, decoder could access memory outside of allocated range. - Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster). Old version of LZMA Decoder now is in file LzmaDecodeSize.c. LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c - Small speed optimization in LZMA C++ code - filter for SPARC's code was added - Simplified version of .7z ANSI-C Decoder was included 4.06 2004-09-05 ------------------------- - The bug in v4.05 was fixed: LZMA-Encoder didn't release output stream in some cases. 4.05 2004-08-25 ------------------------- - Source code of filters for x86, IA-64, ARM, ARM-Thumb and PowerPC code was included to SDK - Some internal minor changes 4.04 2004-07-28 ------------------------- - More compatibility with some C++ compilers 4.03 2004-06-18 ------------------------- - "Benchmark" command was added. It measures compressing and decompressing speed and shows rating values. Also it checks hardware errors. 4.02 2004-06-10 ------------------------- - C++ LZMA Encoder/Decoder code now is more portable and it can be compiled by GCC on Linux. 4.01 2004-02-15 ------------------------- - Some detection of data corruption was enabled. LzmaDecode.c / RangeDecoderReadByte ..... { rd->ExtraBytes = 1; return 0xFF; } 4.00 2004-02-13 ------------------------- - Original version of LZMA SDK HISTORY of the LZMA ------------------- 2001-2008: Improvements to LZMA compressing/decompressing code, keeping compatibility with original LZMA format 1996-2001: Development of LZMA compression format Some milestones: 2001-08-30: LZMA compression was added to 7-Zip 1999-01-02: First version of 7-Zip was released End of document lrzip-0.651/lzma/lzma.txt000066400000000000000000000465301421175057200153500ustar00rootroot00000000000000LZMA SDK 9.20 ------------- LZMA SDK provides the documentation, samples, header files, libraries, and tools you need to develop applications that use LZMA compression. LZMA is default and general compression method of 7z format in 7-Zip compression program (www.7-zip.org). LZMA provides high compression ratio and very fast decompression. LZMA is an improved version of famous LZ77 compression algorithm. It was improved in way of maximum increasing of compression ratio, keeping high decompression speed and low memory requirements for decompressing. LICENSE ------- LZMA SDK is written and placed in the public domain by Igor Pavlov. Some code in LZMA SDK is based on public domain code from another developers: 1) PPMd var.H (2001): Dmitry Shkarin 2) SHA-256: Wei Dai (Crypto++ library) LZMA SDK Contents ----------------- LZMA SDK includes: - ANSI-C/C++/C#/Java source code for LZMA compressing and decompressing - Compiled file->file LZMA compressing/decompressing program for Windows system UNIX/Linux version ------------------ To compile C++ version of file->file LZMA encoding, go to directory CPP/7zip/Bundles/LzmaCon and call make to recompile it: make -f makefile.gcc clean all In some UNIX/Linux versions you must compile LZMA with static libraries. To compile with static libraries, you can use LIB = -lm -static Files --------------------- lzma.txt - LZMA SDK description (this file) 7zFormat.txt - 7z Format description 7zC.txt - 7z ANSI-C Decoder description methods.txt - Compression method IDs for .7z lzma.exe - Compiled file->file LZMA encoder/decoder for Windows 7zr.exe - 7-Zip with 7z/lzma/xz support. history.txt - history of the LZMA SDK Source code structure --------------------- C/ - C files 7zCrc*.* - CRC code Alloc.* - Memory allocation functions Bra*.* - Filters for x86, IA-64, ARM, ARM-Thumb, PowerPC and SPARC code LzFind.* - Match finder for LZ (LZMA) encoders LzFindMt.* - Match finder for LZ (LZMA) encoders for multithreading encoding LzHash.h - Additional file for LZ match finder LzmaDec.* - LZMA decoding LzmaEnc.* - LZMA encoding LzmaLib.* - LZMA Library for DLL calling Types.h - Basic types for another .c files Threads.* - The code for multithreading. LzmaLib - LZMA Library (.DLL for Windows) LzmaUtil - LZMA Utility (file->file LZMA encoder/decoder). Archive - files related to archiving 7z - 7z ANSI-C Decoder CPP/ -- CPP files Common - common files for C++ projects Windows - common files for Windows related code 7zip - files related to 7-Zip Project Common - common files for 7-Zip Compress - files related to compression/decompression Archive - files related to archiving Common - common files for archive handling 7z - 7z C++ Encoder/Decoder Bundles - Modules that are bundles of other modules Alone7z - 7zr.exe: Standalone version of 7z.exe that supports only 7z/LZMA/BCJ/BCJ2 LzmaCon - lzma.exe: LZMA compression/decompression Format7zR - 7zr.dll: Reduced version of 7za.dll: extracting/compressing to 7z/LZMA/BCJ/BCJ2 Format7zExtractR - 7zxr.dll: Reduced version of 7zxa.dll: extracting from 7z/LZMA/BCJ/BCJ2. UI - User Interface files Client7z - Test application for 7za.dll, 7zr.dll, 7zxr.dll Common - Common UI files Console - Code for console archiver CS/ - C# files 7zip Common - some common files for 7-Zip Compress - files related to compression/decompression LZ - files related to LZ (Lempel-Ziv) compression algorithm LZMA - LZMA compression/decompression LzmaAlone - file->file LZMA compression/decompression RangeCoder - Range Coder (special code of compression/decompression) Java/ - Java files SevenZip Compression - files related to compression/decompression LZ - files related to LZ (Lempel-Ziv) compression algorithm LZMA - LZMA compression/decompression RangeCoder - Range Coder (special code of compression/decompression) C/C++ source code of LZMA SDK is part of 7-Zip project. 7-Zip source code can be downloaded from 7-Zip's SourceForge page: http://sourceforge.net/projects/sevenzip/ LZMA features ------------- - Variable dictionary size (up to 1 GB) - Estimated compressing speed: about 2 MB/s on 2 GHz CPU - Estimated decompressing speed: - 20-30 MB/s on 2 GHz Core 2 or AMD Athlon 64 - 1-2 MB/s on 200 MHz ARM, MIPS, PowerPC or other simple RISC - Small memory requirements for decompressing (16 KB + DictionarySize) - Small code size for decompressing: 5-8 KB LZMA decoder uses only integer operations and can be implemented in any modern 32-bit CPU (or on 16-bit CPU with some conditions). Some critical operations that affect the speed of LZMA decompression: 1) 32*16 bit integer multiply 2) Misspredicted branches (penalty mostly depends from pipeline length) 3) 32-bit shift and arithmetic operations The speed of LZMA decompressing mostly depends from CPU speed. Memory speed has no big meaning. But if your CPU has small data cache, overall weight of memory speed will slightly increase. How To Use ---------- Using LZMA encoder/decoder executable -------------------------------------- Usage: LZMA inputFile outputFile [...] e: encode file d: decode file b: Benchmark. There are two tests: compressing and decompressing with LZMA method. Benchmark shows rating in MIPS (million instructions per second). Rating value is calculated from measured speed and it is normalized with Intel's Core 2 results. Also Benchmark checks possible hardware errors (RAM errors in most cases). Benchmark uses these settings: (-a1, -d21, -fb32, -mfbt4). You can change only -d parameter. Also you can change the number of iterations. Example for 30 iterations: LZMA b 30 Default number of iterations is 10. -a{N}: set compression mode 0 = fast, 1 = normal default: 1 (normal) d{N}: Sets Dictionary size - [0, 30], default: 23 (8MB) The maximum value for dictionary size is 1 GB = 2^30 bytes. Dictionary size is calculated as DictionarySize = 2^N bytes. For decompressing file compressed by LZMA method with dictionary size D = 2^N you need about D bytes of memory (RAM). -fb{N}: set number of fast bytes - [5, 273], default: 128 Usually big number gives a little bit better compression ratio and slower compression process. -lc{N}: set number of literal context bits - [0, 8], default: 3 Sometimes lc=4 gives gain for big files. -lp{N}: set number of literal pos bits - [0, 4], default: 0 lp switch is intended for periodical data when period is equal 2^N. For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's better to set lc0, if you change lp switch. -pb{N}: set number of pos bits - [0, 4], default: 2 pb switch is intended for periodical data when period is equal 2^N. -mf{MF_ID}: set Match Finder. Default: bt4. Algorithms from hc* group doesn't provide good compression ratio, but they often works pretty fast in combination with fast mode (-a0). Memory requirements depend from dictionary size (parameter "d" in table below). MF_ID Memory Description bt2 d * 9.5 + 4MB Binary Tree with 2 bytes hashing. bt3 d * 11.5 + 4MB Binary Tree with 3 bytes hashing. bt4 d * 11.5 + 4MB Binary Tree with 4 bytes hashing. hc4 d * 7.5 + 4MB Hash Chain with 4 bytes hashing. -eos: write End Of Stream marker. By default LZMA doesn't write eos marker, since LZMA decoder knows uncompressed size stored in .lzma file header. -si: Read data from stdin (it will write End Of Stream marker). -so: Write data to stdout Examples: 1) LZMA e file.bin file.lzma -d16 -lc0 compresses file.bin to file.lzma with 64 KB dictionary (2^16=64K) and 0 literal context bits. -lc0 allows to reduce memory requirements for decompression. 2) LZMA e file.bin file.lzma -lc0 -lp2 compresses file.bin to file.lzma with settings suitable for 32-bit periodical data (for example, ARM or MIPS code). 3) LZMA d file.lzma file.bin decompresses file.lzma to file.bin. Compression ratio hints ----------------------- Recommendations --------------- To increase the compression ratio for LZMA compressing it's desirable to have aligned data (if it's possible) and also it's desirable to locate data in such order, where code is grouped in one place and data is grouped in other place (it's better than such mixing: code, data, code, data, ...). Filters ------- You can increase the compression ratio for some data types, using special filters before compressing. For example, it's possible to increase the compression ratio on 5-10% for code for those CPU ISAs: x86, IA-64, ARM, ARM-Thumb, PowerPC, SPARC. You can find C source code of such filters in C/Bra*.* files You can check the compression ratio gain of these filters with such 7-Zip commands (example for ARM code): No filter: 7z a a1.7z a.bin -m0=lzma With filter for little-endian ARM code: 7z a a2.7z a.bin -m0=arm -m1=lzma It works in such manner: Compressing = Filter_encoding + LZMA_encoding Decompressing = LZMA_decoding + Filter_decoding Compressing and decompressing speed of such filters is very high, so it will not increase decompressing time too much. Moreover, it reduces decompression time for LZMA_decoding, since compression ratio with filtering is higher. These filters convert CALL (calling procedure) instructions from relative offsets to absolute addresses, so such data becomes more compressible. For some ISAs (for example, for MIPS) it's impossible to get gain from such filter. LZMA compressed file format --------------------------- Offset Size Description 0 1 Special LZMA properties (lc,lp, pb in encoded form) 1 4 Dictionary size (little endian) 5 8 Uncompressed size (little endian). -1 means unknown size 13 Compressed data ANSI-C LZMA Decoder ~~~~~~~~~~~~~~~~~~~ Please note that interfaces for ANSI-C code were changed in LZMA SDK 4.58. If you want to use old interfaces you can download previous version of LZMA SDK from sourceforge.net site. To use ANSI-C LZMA Decoder you need the following files: 1) LzmaDec.h + LzmaDec.c + Types.h LzmaUtil/LzmaUtil.c is example application that uses these files. Memory requirements for LZMA decoding ------------------------------------- Stack usage of LZMA decoding function for local variables is not larger than 200-400 bytes. LZMA Decoder uses dictionary buffer and internal state structure. Internal state structure consumes state_size = (4 + (1.5 << (lc + lp))) KB by default (lc=3, lp=0), state_size = 16 KB. How To decompress data ---------------------- LZMA Decoder (ANSI-C version) now supports 2 interfaces: 1) Single-call Decompressing 2) Multi-call State Decompressing (zlib-like interface) You must use external allocator: Example: void *SzAlloc(void *p, size_t size) { p = p; return malloc(size); } void SzFree(void *p, void *address) { p = p; free(address); } ISzAlloc alloc = { SzAlloc, SzFree }; You can use p = p; operator to disable compiler warnings. Single-call Decompressing ------------------------- When to use: RAM->RAM decompressing Compile files: LzmaDec.h + LzmaDec.c + Types.h Compile defines: no defines Memory Requirements: - Input buffer: compressed size - Output buffer: uncompressed size - LZMA Internal Structures: state_size (16 KB for default settings) Interface: int LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc); In: dest - output data destLen - output data size src - input data srcLen - input data size propData - LZMA properties (5 bytes) propSize - size of propData buffer (5 bytes) finishMode - It has meaning only if the decoding reaches output limit (*destLen). LZMA_FINISH_ANY - Decode just destLen bytes. LZMA_FINISH_END - Stream must be finished after (*destLen). You can use LZMA_FINISH_END, when you know that current output buffer covers last bytes of stream. alloc - Memory allocator. Out: destLen - processed output size srcLen - processed input size Output: SZ_OK status: LZMA_STATUS_FINISHED_WITH_MARK LZMA_STATUS_NOT_FINISHED LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK SZ_ERROR_DATA - Data error SZ_ERROR_MEM - Memory allocation error SZ_ERROR_UNSUPPORTED - Unsupported properties SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). If LZMA decoder sees end_marker before reaching output limit, it returns OK result, and output value of destLen will be less than output buffer size limit. You can use multiple checks to test data integrity after full decompression: 1) Check Result and "status" variable. 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. You must use correct finish mode in that case. */ Multi-call State Decompressing (zlib-like interface) ---------------------------------------------------- When to use: file->file decompressing Compile files: LzmaDec.h + LzmaDec.c + Types.h Memory Requirements: - Buffer for input stream: any size (for example, 16 KB) - Buffer for output stream: any size (for example, 16 KB) - LZMA Internal Structures: state_size (16 KB for default settings) - LZMA dictionary (dictionary size is encoded in LZMA properties header) 1) read LZMA properties (5 bytes) and uncompressed size (8 bytes, little-endian) to header: unsigned char header[LZMA_PROPS_SIZE + 8]; ReadFile(inFile, header, sizeof(header) 2) Allocate CLzmaDec structures (state + dictionary) using LZMA properties CLzmaDec state; LzmaDec_Constr(&state); res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc); if (res != SZ_OK) return res; 3) Init LzmaDec structure before any new LZMA stream. And call LzmaDec_DecodeToBuf in loop LzmaDec_Init(&state); for (;;) { ... int res = LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode); ... } 4) Free all allocated structures LzmaDec_Free(&state, &g_Alloc); For full code example, look at C/LzmaUtil/LzmaUtil.c code. How To compress data -------------------- Compile files: LzmaEnc.h + LzmaEnc.c + Types.h + LzFind.c + LzFind.h + LzFindMt.c + LzFindMt.h + LzHash.h Memory Requirements: - (dictSize * 11.5 + 6 MB) + state_size Lzma Encoder can use two memory allocators: 1) alloc - for small arrays. 2) allocBig - for big arrays. For example, you can use Large RAM Pages (2 MB) in allocBig allocator for better compression speed. Note that Windows has bad implementation for Large RAM Pages. It's OK to use same allocator for alloc and allocBig. Single-call Compression with callbacks -------------------------------------- Check C/LzmaUtil/LzmaUtil.c as example, When to use: file->file decompressing 1) you must implement callback structures for interfaces: ISeqInStream ISeqOutStream ICompressProgress ISzAlloc static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); } static void SzFree(void *p, void *address) { p = p; MyFree(address); } static ISzAlloc g_Alloc = { SzAlloc, SzFree }; CFileSeqInStream inStream; CFileSeqOutStream outStream; inStream.funcTable.Read = MyRead; inStream.file = inFile; outStream.funcTable.Write = MyWrite; outStream.file = outFile; 2) Create CLzmaEncHandle object; CLzmaEncHandle enc; enc = LzmaEnc_Create(&g_Alloc); if (enc == 0) return SZ_ERROR_MEM; 3) initialize CLzmaEncProps properties; LzmaEncProps_Init(&props); Then you can change some properties in that structure. 4) Send LZMA properties to LZMA Encoder res = LzmaEnc_SetProps(enc, &props); 5) Write encoded properties to header Byte header[LZMA_PROPS_SIZE + 8]; size_t headerSize = LZMA_PROPS_SIZE; UInt64 fileSize; int i; res = LzmaEnc_WriteProperties(enc, header, &headerSize); fileSize = MyGetFileLength(inFile); for (i = 0; i < 8; i++) header[headerSize++] = (Byte)(fileSize >> (8 * i)); MyWriteFileAndCheck(outFile, header, headerSize) 6) Call encoding function: res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable, NULL, &g_Alloc, &g_Alloc); 7) Destroy LZMA Encoder Object LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc); If callback function return some error code, LzmaEnc_Encode also returns that code or it can return the code like SZ_ERROR_READ, SZ_ERROR_WRITE or SZ_ERROR_PROGRESS. Single-call RAM->RAM Compression -------------------------------- Single-call RAM->RAM Compression is similar to Compression with callbacks, but you provide pointers to buffers instead of pointers to stream callbacks: HRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); Return code: SZ_OK - OK SZ_ERROR_MEM - Memory allocation error SZ_ERROR_PARAM - Incorrect paramater SZ_ERROR_OUTPUT_EOF - output buffer overflow SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) Defines ------- _LZMA_SIZE_OPT - Enable some optimizations in LZMA Decoder to get smaller executable code. _LZMA_PROB32 - It can increase the speed on some 32-bit CPUs, but memory usage for some structures will be doubled in that case. _LZMA_UINT32_IS_ULONG - Define it if int is 16-bit on your compiler and long is 32-bit. _LZMA_NO_SYSTEM_SIZE_T - Define it if you don't want to use size_t type. _7ZIP_PPMD_SUPPPORT - Define it if you don't want to support PPMD method in AMSI-C .7z decoder. C++ LZMA Encoder/Decoder ~~~~~~~~~~~~~~~~~~~~~~~~ C++ LZMA code use COM-like interfaces. So if you want to use it, you can study basics of COM/OLE. C++ LZMA code is just wrapper over ANSI-C code. C++ Notes ~~~~~~~~~~~~~~~~~~~~~~~~ If you use some C++ code folders in 7-Zip (for example, C++ code for .7z handling), you must check that you correctly work with "new" operator. 7-Zip can be compiled with MSVC 6.0 that doesn't throw "exception" from "new" operator. So 7-Zip uses "CPP\Common\NewHandler.cpp" that redefines "new" operator: operator new(size_t size) { void *p = ::malloc(size); if (p == 0) throw CNewException(); return p; } If you use MSCV that throws exception for "new" operator, you can compile without "NewHandler.cpp". So standard exception will be used. Actually some code of 7-Zip catches any exception in internal code and converts it to HRESULT code. So you don't need to catch CNewException, if you call COM interfaces of 7-Zip. --- http://www.7-zip.org http://www.7-zip.org/sdk.html http://www.7-zip.org/support.html lrzip-0.651/m4/000077500000000000000000000000001421175057200132115ustar00rootroot00000000000000lrzip-0.651/m4/.gitignore000066400000000000000000000000001421175057200151670ustar00rootroot00000000000000lrzip-0.651/m4/ac_attribute.m4000066400000000000000000000024651421175057200161300ustar00rootroot00000000000000dnl Copyright (C) 2004-2008 Kim Woelders dnl Copyright (C) 2008 Vincent Torri dnl That code is public domain and can be freely used or copied. dnl Originally snatched from somewhere... dnl Macro for checking if the compiler supports __attribute__ dnl Usage: AC_C___ATTRIBUTE__ dnl call AC_DEFINE for HAVE___ATTRIBUTE__ and __UNUSED__ dnl if the compiler supports __attribute__, HAVE___ATTRIBUTE__ is dnl defined to 1 and __UNUSED__ is defined to __attribute__((unused)) dnl otherwise, HAVE___ATTRIBUTE__ is not defined and __UNUSED__ is dnl defined to nothing. AC_DEFUN([AC_C___ATTRIBUTE__], [ AC_MSG_CHECKING([for __attribute__]) AC_CACHE_VAL([ac_cv___attribute__], [AC_TRY_COMPILE( [ #include int func(int x); int foo(int x __attribute__ ((unused))) { exit(1); } ], [], [ac_cv___attribute__="yes"], [ac_cv___attribute__="no"] )]) AC_MSG_RESULT($ac_cv___attribute__) if test "x${ac_cv___attribute__}" = "xyes" ; then AC_DEFINE([HAVE___ATTRIBUTE__], [1], [Define to 1 if your compiler has __attribute__]) AC_DEFINE([__UNUSED__], [__attribute__((unused))], [Macro declaring a function argument to be unused]) else AC_DEFINE([__UNUSED__], [], [Macro declaring a function argument to be unused]) fi ]) dnl End of ac_attribute.m4 lrzip-0.651/m4/ax_pthread.m4000066400000000000000000000506131421175057200155770ustar00rootroot00000000000000# =========================================================================== # https://www.gnu.org/software/autoconf-archive/ax_pthread.html # =========================================================================== # # SYNOPSIS # # AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) # # DESCRIPTION # # This macro figures out how to build C programs using POSIX threads. It # sets the PTHREAD_LIBS output variable to the threads library and linker # flags, and the PTHREAD_CFLAGS output variable to any special C compiler # flags that are needed. (The user can also force certain compiler # flags/libs to be tested by setting these environment variables.) # # Also sets PTHREAD_CC to any special C compiler that is needed for # multi-threaded programs (defaults to the value of CC otherwise). (This # is necessary on AIX to use the special cc_r compiler alias.) # # NOTE: You are assumed to not only compile your program with these flags, # but also to link with them as well. For example, you might link with # $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS # # If you are only building threaded programs, you may wish to use these # variables in your default LIBS, CFLAGS, and CC: # # LIBS="$PTHREAD_LIBS $LIBS" # CFLAGS="$CFLAGS $PTHREAD_CFLAGS" # CC="$PTHREAD_CC" # # In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant # has a nonstandard name, this macro defines PTHREAD_CREATE_JOINABLE to # that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). # # Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the # PTHREAD_PRIO_INHERIT symbol is defined when compiling with # PTHREAD_CFLAGS. # # ACTION-IF-FOUND is a list of shell commands to run if a threads library # is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it # is not found. If ACTION-IF-FOUND is not specified, the default action # will define HAVE_PTHREAD. # # Please let the authors know if this macro fails on any platform, or if # you have any other suggestions or comments. This macro was based on work # by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help # from M. Frigo), as well as ac_pthread and hb_pthread macros posted by # Alejandro Forero Cuervo to the autoconf macro repository. We are also # grateful for the helpful feedback of numerous users. # # Updated for Autoconf 2.68 by Daniel Richard G. # # LICENSE # # Copyright (c) 2008 Steven G. Johnson # Copyright (c) 2011 Daniel Richard G. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Archive. When you make and distribute a # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. #serial 25 AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) AC_DEFUN([AX_PTHREAD], [ AC_REQUIRE([AC_CANONICAL_HOST]) AC_REQUIRE([AC_PROG_CC]) AC_REQUIRE([AC_PROG_SED]) AC_LANG_PUSH([C]) ax_pthread_ok=no # We used to check for pthread.h first, but this fails if pthread.h # requires special compiler flags (e.g. on Tru64 or Sequent). # It gets checked for in the link test anyway. # First of all, check if the user has set any of the PTHREAD_LIBS, # etcetera environment variables, and if threads linking works using # them: if test "x$PTHREAD_CFLAGS$PTHREAD_LIBS" != "x"; then ax_pthread_save_CC="$CC" ax_pthread_save_CFLAGS="$CFLAGS" ax_pthread_save_LIBS="$LIBS" AS_IF([test "x$PTHREAD_CC" != "x"], [CC="$PTHREAD_CC"]) CFLAGS="$CFLAGS $PTHREAD_CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" AC_MSG_CHECKING([for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS]) AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_join])], [ax_pthread_ok=yes]) AC_MSG_RESULT([$ax_pthread_ok]) if test "x$ax_pthread_ok" = "xno"; then PTHREAD_LIBS="" PTHREAD_CFLAGS="" fi CC="$ax_pthread_save_CC" CFLAGS="$ax_pthread_save_CFLAGS" LIBS="$ax_pthread_save_LIBS" fi # We must check for the threads library under a number of different # names; the ordering is very important because some systems # (e.g. DEC) have both -lpthread and -lpthreads, where one of the # libraries is broken (non-POSIX). # Create a list of thread flags to try. Items starting with a "-" are # C compiler flags, and other items are library names, except for "none" # which indicates that we try without any flags at all, and "pthread-config" # which is a program returning the flags for the Pth emulation library. ax_pthread_flags="pthreads none -Kthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" # The ordering *is* (sometimes) important. Some notes on the # individual items follow: # pthreads: AIX (must check this before -lpthread) # none: in case threads are in libc; should be tried before -Kthread and # other compiler flags to prevent continual compiler warnings # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) # -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads), Tru64 # (Note: HP C rejects this with "bad form for `-t' option") # -pthreads: Solaris/gcc (Note: HP C also rejects) # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it # doesn't hurt to check since this sometimes defines pthreads and # -D_REENTRANT too), HP C (must be checked before -lpthread, which # is present but should not be used directly; and before -mthreads, # because the compiler interprets this as "-mt" + "-hreads") # -mthreads: Mingw32/gcc, Lynx/gcc # pthread: Linux, etcetera # --thread-safe: KAI C++ # pthread-config: use pthread-config program (for GNU Pth library) case $host_os in freebsd*) # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) ax_pthread_flags="-kthread lthread $ax_pthread_flags" ;; hpux*) # From the cc(1) man page: "[-mt] Sets various -D flags to enable # multi-threading and also sets -lpthread." ax_pthread_flags="-mt -pthread pthread $ax_pthread_flags" ;; openedition*) # IBM z/OS requires a feature-test macro to be defined in order to # enable POSIX threads at all, so give the user a hint if this is # not set. (We don't define these ourselves, as they can affect # other portions of the system API in unpredictable ways.) AC_EGREP_CPP([AX_PTHREAD_ZOS_MISSING], [ # if !defined(_OPEN_THREADS) && !defined(_UNIX03_THREADS) AX_PTHREAD_ZOS_MISSING # endif ], [AC_MSG_WARN([IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support.])]) ;; solaris*) # On Solaris (at least, for some versions), libc contains stubbed # (non-functional) versions of the pthreads routines, so link-based # tests will erroneously succeed. (N.B.: The stubs are missing # pthread_cleanup_push, or rather a function called by this macro, # so we could check for that, but who knows whether they'll stub # that too in a future libc.) So we'll check first for the # standard Solaris way of linking pthreads (-mt -lpthread). ax_pthread_flags="-mt,pthread pthread $ax_pthread_flags" ;; esac # GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC) AS_IF([test "x$GCC" = "xyes"], [ax_pthread_flags="-pthread -pthreads $ax_pthread_flags"]) # The presence of a feature test macro requesting re-entrant function # definitions is, on some systems, a strong hint that pthreads support is # correctly enabled case $host_os in darwin* | hpux* | linux* | osf* | solaris*) ax_pthread_check_macro="_REENTRANT" ;; aix*) ax_pthread_check_macro="_THREAD_SAFE" ;; *) ax_pthread_check_macro="--" ;; esac AS_IF([test "x$ax_pthread_check_macro" = "x--"], [ax_pthread_check_cond=0], [ax_pthread_check_cond="!defined($ax_pthread_check_macro)"]) # Are we compiling with Clang? AC_CACHE_CHECK([whether $CC is Clang], [ax_cv_PTHREAD_CLANG], [ax_cv_PTHREAD_CLANG=no # Note that Autoconf sets GCC=yes for Clang as well as GCC if test "x$GCC" = "xyes"; then AC_EGREP_CPP([AX_PTHREAD_CC_IS_CLANG], [/* Note: Clang 2.7 lacks __clang_[a-z]+__ */ # if defined(__clang__) && defined(__llvm__) AX_PTHREAD_CC_IS_CLANG # endif ], [ax_cv_PTHREAD_CLANG=yes]) fi ]) ax_pthread_clang="$ax_cv_PTHREAD_CLANG" ax_pthread_clang_warning=no # Clang needs special handling, because older versions handle the -pthread # option in a rather... idiosyncratic way if test "x$ax_pthread_clang" = "xyes"; then # Clang takes -pthread; it has never supported any other flag # (Note 1: This will need to be revisited if a system that Clang # supports has POSIX threads in a separate library. This tends not # to be the way of modern systems, but it's conceivable.) # (Note 2: On some systems, notably Darwin, -pthread is not needed # to get POSIX threads support; the API is always present and # active. We could reasonably leave PTHREAD_CFLAGS empty. But # -pthread does define _REENTRANT, and while the Darwin headers # ignore this macro, third-party headers might not.) PTHREAD_CFLAGS="-pthread" PTHREAD_LIBS= ax_pthread_ok=yes # However, older versions of Clang make a point of warning the user # that, in an invocation where only linking and no compilation is # taking place, the -pthread option has no effect ("argument unused # during compilation"). They expect -pthread to be passed in only # when source code is being compiled. # # Problem is, this is at odds with the way Automake and most other # C build frameworks function, which is that the same flags used in # compilation (CFLAGS) are also used in linking. Many systems # supported by AX_PTHREAD require exactly this for POSIX threads # support, and in fact it is often not straightforward to specify a # flag that is used only in the compilation phase and not in # linking. Such a scenario is extremely rare in practice. # # Even though use of the -pthread flag in linking would only print # a warning, this can be a nuisance for well-run software projects # that build with -Werror. So if the active version of Clang has # this misfeature, we search for an option to squash it. AC_CACHE_CHECK([whether Clang needs flag to prevent "argument unused" warning when linking with -pthread], [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG], [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown # Create an alternate version of $ac_link that compiles and # links in two steps (.c -> .o, .o -> exe) instead of one # (.c -> exe), because the warning occurs only in the second # step ax_pthread_save_ac_link="$ac_link" ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g' ax_pthread_link_step=`$as_echo "$ac_link" | sed "$ax_pthread_sed"` ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)" ax_pthread_save_CFLAGS="$CFLAGS" for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do AS_IF([test "x$ax_pthread_try" = "xunknown"], [break]) CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS" ac_link="$ax_pthread_save_ac_link" AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], [ac_link="$ax_pthread_2step_ac_link" AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], [break]) ]) done ac_link="$ax_pthread_save_ac_link" CFLAGS="$ax_pthread_save_CFLAGS" AS_IF([test "x$ax_pthread_try" = "x"], [ax_pthread_try=no]) ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try" ]) case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in no | unknown) ;; *) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;; esac fi # $ax_pthread_clang = yes if test "x$ax_pthread_ok" = "xno"; then for ax_pthread_try_flag in $ax_pthread_flags; do case $ax_pthread_try_flag in none) AC_MSG_CHECKING([whether pthreads work without any flags]) ;; -mt,pthread) AC_MSG_CHECKING([whether pthreads work with -mt -lpthread]) PTHREAD_CFLAGS="-mt" PTHREAD_LIBS="-lpthread" ;; -*) AC_MSG_CHECKING([whether pthreads work with $ax_pthread_try_flag]) PTHREAD_CFLAGS="$ax_pthread_try_flag" ;; pthread-config) AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no]) AS_IF([test "x$ax_pthread_config" = "xno"], [continue]) PTHREAD_CFLAGS="`pthread-config --cflags`" PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" ;; *) AC_MSG_CHECKING([for the pthreads library -l$ax_pthread_try_flag]) PTHREAD_LIBS="-l$ax_pthread_try_flag" ;; esac ax_pthread_save_CFLAGS="$CFLAGS" ax_pthread_save_LIBS="$LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" # Check for various functions. We must include pthread.h, # since some functions may be macros. (On the Sequent, we # need a special flag -Kthread to make this header compile.) # We check for pthread_join because it is in -lpthread on IRIX # while pthread_create is in libc. We check for pthread_attr_init # due to DEC craziness with -lpthreads. We check for # pthread_cleanup_push because it is one of the few pthread # functions on Solaris that doesn't have a non-functional libc stub. # We try pthread_create on general principles. AC_LINK_IFELSE([AC_LANG_PROGRAM([#include # if $ax_pthread_check_cond # error "$ax_pthread_check_macro must be defined" # endif static void routine(void *a) { a = 0; } static void *start_routine(void *a) { return a; }], [pthread_t th; pthread_attr_t attr; pthread_create(&th, 0, start_routine, 0); pthread_join(th, 0); pthread_attr_init(&attr); pthread_cleanup_push(routine, 0); pthread_cleanup_pop(0) /* ; */])], [ax_pthread_ok=yes], []) CFLAGS="$ax_pthread_save_CFLAGS" LIBS="$ax_pthread_save_LIBS" AC_MSG_RESULT([$ax_pthread_ok]) AS_IF([test "x$ax_pthread_ok" = "xyes"], [break]) PTHREAD_LIBS="" PTHREAD_CFLAGS="" done fi # Various other checks: if test "x$ax_pthread_ok" = "xyes"; then ax_pthread_save_CFLAGS="$CFLAGS" ax_pthread_save_LIBS="$LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. AC_CACHE_CHECK([for joinable pthread attribute], [ax_cv_PTHREAD_JOINABLE_ATTR], [ax_cv_PTHREAD_JOINABLE_ATTR=unknown for ax_pthread_attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], [int attr = $ax_pthread_attr; return attr /* ; */])], [ax_cv_PTHREAD_JOINABLE_ATTR=$ax_pthread_attr; break], []) done ]) AS_IF([test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xunknown" && \ test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xPTHREAD_CREATE_JOINABLE" && \ test "x$ax_pthread_joinable_attr_defined" != "xyes"], [AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], [$ax_cv_PTHREAD_JOINABLE_ATTR], [Define to necessary symbol if this constant uses a non-standard name on your system.]) ax_pthread_joinable_attr_defined=yes ]) AC_CACHE_CHECK([whether more special flags are required for pthreads], [ax_cv_PTHREAD_SPECIAL_FLAGS], [ax_cv_PTHREAD_SPECIAL_FLAGS=no case $host_os in solaris*) ax_cv_PTHREAD_SPECIAL_FLAGS="-D_POSIX_PTHREAD_SEMANTICS" ;; esac ]) AS_IF([test "x$ax_cv_PTHREAD_SPECIAL_FLAGS" != "xno" && \ test "x$ax_pthread_special_flags_added" != "xyes"], [PTHREAD_CFLAGS="$ax_cv_PTHREAD_SPECIAL_FLAGS $PTHREAD_CFLAGS" ax_pthread_special_flags_added=yes]) AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], [ax_cv_PTHREAD_PRIO_INHERIT], [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], [[int i = PTHREAD_PRIO_INHERIT; return i;]])], [ax_cv_PTHREAD_PRIO_INHERIT=yes], [ax_cv_PTHREAD_PRIO_INHERIT=no]) ]) AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" && \ test "x$ax_pthread_prio_inherit_defined" != "xyes"], [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.]) ax_pthread_prio_inherit_defined=yes ]) CFLAGS="$ax_pthread_save_CFLAGS" LIBS="$ax_pthread_save_LIBS" # More AIX lossage: compile with *_r variant if test "x$GCC" != "xyes"; then case $host_os in aix*) AS_CASE(["x/$CC"], [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6], [#handle absolute path differently from PATH based program lookup AS_CASE(["x$CC"], [x/*], [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])], [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])]) ;; esac fi fi test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" AC_SUBST([PTHREAD_LIBS]) AC_SUBST([PTHREAD_CFLAGS]) AC_SUBST([PTHREAD_CC]) # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: if test "x$ax_pthread_ok" = "xyes"; then ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1]) : else ax_pthread_ok=no $2 fi AC_LANG_POP ])dnl AX_PTHREAD lrzip-0.651/m4/efl_doxygen.m4000066400000000000000000000052711421175057200157630ustar00rootroot00000000000000dnl Copyright (C) 2008 Vincent Torri dnl That code is public domain and can be freely used or copied. dnl Macro that check if doxygen is available or not. dnl EFL_CHECK_DOXYGEN([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]) dnl Test for the doxygen program dnl Defines efl_doxygen dnl Defines the automake conditionnal EFL_BUILD_DOC dnl AC_DEFUN([EFL_CHECK_DOXYGEN], [ dnl dnl Disable the build of the documentation dnl AC_ARG_ENABLE([doc], [AC_HELP_STRING( [--disable-doc], [Disable documentation build @<:@default=enabled@:>@])], [ if test "x${enableval}" = "xyes" ; then efl_enable_doc="yes" else efl_enable_doc="no" fi ], [efl_enable_doc="yes"]) AC_MSG_CHECKING([whether to build documentation]) AC_MSG_RESULT([${efl_enable_doc}]) if test "x${efl_enable_doc}" = "xyes" ; then dnl dnl Specify the file name, without path dnl efl_doxygen="doxygen" AC_ARG_WITH([doxygen], [AC_HELP_STRING( [--with-doxygen=FILE], [doxygen program to use @<:@default=doxygen@:>@])], dnl dnl Check the given doxygen program. dnl [efl_doxygen=${withval} AC_CHECK_PROG([efl_have_doxygen], [${efl_doxygen}], [yes], [no]) if test "x${efl_have_doxygen}" = "xno" ; then echo "WARNING:" echo "The doxygen program you specified:" echo "${efl_doxygen}" echo "was not found. Please check the path and make sure " echo "the program exists and is executable." AC_MSG_WARN([no doxygen detected. Documentation will not be built]) fi ], [AC_CHECK_PROG([efl_have_doxygen], [${efl_doxygen}], [yes], [no]) if test "x${efl_have_doxygen}" = "xno" ; then echo "WARNING:" echo "The doxygen program was not found in your execute path." echo "You may have doxygen installed somewhere not covered by your path." echo "" echo "If this is the case make sure you have the packages installed, AND" echo "that the doxygen program is in your execute path (see your" echo "shell manual page on setting the \$PATH environment variable), OR" echo "alternatively, specify the program to use with --with-doxygen." AC_MSG_WARN([no doxygen detected. Documentation will not be built]) fi ]) fi dnl dnl Substitution dnl AC_SUBST([efl_doxygen]) if ! test "x${efl_have_doxygen}" = "xyes" ; then efl_enable_doc="no" fi AM_CONDITIONAL(EFL_BUILD_DOC, test "x${efl_enable_doc}" = "xyes") if test "x${efl_enable_doc}" = "xyes" ; then m4_default([$1], [:]) else m4_default([$2], [:]) fi ]) dnl End of doxygen.m4 lrzip-0.651/main.c000066400000000000000000000576161421175057200140000ustar00rootroot00000000000000/* Copyright (C) 2006-2016,2021-2022 Con Kolivas Copyright (C) 2011 Peter Hyman Copyright (C) 1998-2003 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* lrzip compression - main program */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_TIME_H # include #endif #ifdef HAVE_SYS_RESOURCE_H # include #endif #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #include #ifdef HAVE_ENDIAN_H # include #elif HAVE_SYS_ENDIAN_H # include #endif #ifdef HAVE_ARPA_INET_H # include #endif #include #include #include #include "rzip.h" #include "lrzip_core.h" #include "util.h" #include "stream.h" /* needed for CRC routines */ #include "lzma/C/7zCrc.h" #define MAX_PATH_LEN 4096 static rzip_control base_control, local_control, *control; static void usage(bool compat) { print_output("lrz%s version %s\n", compat ? "" : "ip", PACKAGE_VERSION); print_output("Copyright (C) Con Kolivas 2006-2022\n"); print_output("Based on rzip "); print_output("Copyright (C) Andrew Tridgell 1998-2003\n\n"); print_output("Usage: lrz%s [options] \n", compat ? "" : "ip"); print_output("General options:\n"); if (compat) { print_output(" -c, --stdout output to STDOUT\n"); print_output(" -C, --check check integrity of file written on decompression\n"); } else print_output(" -c, -C, --check check integrity of file written on decompression\n"); print_output(" -d, --decompress decompress\n"); print_output(" -e, --encrypt[=password] password protected sha512/aes128 encryption on compression\n"); print_output(" -h, -?, --help show help\n"); print_output(" -H, --hash display md5 hash integrity information\n"); print_output(" -i, --info show compressed file information\n"); if (compat) { print_output(" -L, --license display software version and license\n"); print_output(" -P, --progress show compression progress\n"); } else { print_output(" -q, --quiet don't show compression progress\n"); print_output(" -Q, --very-quiet don't show any output\n"); } print_output(" -r, --recursive operate recursively on directories\n"); print_output(" -t, --test test compressed file integrity\n"); print_output(" -v[v%s], --verbose Increase verbosity\n", compat ? "v" : ""); print_output(" -V, --version show version\n"); print_output("Options affecting output:\n"); if (!compat) print_output(" -D, --delete delete existing files\n"); print_output(" -f, --force force overwrite of any existing files\n"); if (compat) print_output(" -k, --keep don't delete source files on de/compression\n"); print_output(" -K, --keep-broken keep broken or damaged output files\n"); print_output(" -o, --outfile filename specify the output file name and/or path\n"); print_output(" -O, --outdir directory specify the output directory when -o is not used\n"); print_output(" -S, --suffix suffix specify compressed suffix (default '.lrz')\n"); print_output("Options affecting compression:\n"); print_output(" --lzma lzma compression (default)\n"); print_output(" -b, --bzip2 bzip2 compression\n"); print_output(" -g, --gzip gzip compression using zlib\n"); print_output(" -l, --lzo lzo compression (ultra fast)\n"); print_output(" -n, --no-compress no backend compression - prepare for other compressor\n"); print_output(" -z, --zpaq zpaq compression (best, extreme compression, extremely slow)\n"); print_output("Low level options:\n"); if (compat) { print_output(" -1 .. -9 set lzma/bzip2/gzip compression level (1-9, default 7)\n"); print_output(" --fast alias for -1\n"); print_output(" --best alias for -9\n"); } if (!compat) print_output(" -L, --level level set lzma/bzip2/gzip compression level (1-9, default 7)\n"); print_output(" -N, --nice-level value Set nice value to value (default %d)\n", compat ? 0 : 19); print_output(" -p, --threads value Set processor count to override number of threads\n"); print_output(" -m, --maxram size Set maximum available ram in hundreds of MB\n"); print_output(" overrides detected amount of available ram\n"); print_output(" -T, --threshold Disable LZ4 compressibility testing\n"); print_output(" -U, --unlimited Use unlimited window size beyond ramsize (potentially much slower)\n"); print_output(" -w, --window size maximum compression window in hundreds of MB\n"); print_output(" default chosen by heuristic dependent on ram and chosen compression\n"); print_output("\nLRZIP=NOCONFIG environment variable setting can be used to bypass lrzip.conf.\n"); print_output("TMP environment variable will be used for storage of temporary files when needed.\n"); print_output("TMPDIR may also be stored in lrzip.conf file.\n"); print_output("\nIf no filenames or \"-\" is specified, stdin/out will be used.\n"); } static void license(void) { print_output("lrz version %s\n", PACKAGE_VERSION); print_output("Copyright (C) Con Kolivas 2006-2016\n"); print_output("Based on rzip "); print_output("Copyright (C) Andrew Tridgell 1998-2003\n\n"); print_output("This is free software. You may redistribute copies of it under the terms of\n"); print_output("the GNU General Public License .\n"); print_output("There is NO WARRANTY, to the extent permitted by law.\n"); } static void sighandler(int sig __UNUSED__) { signal(sig, SIG_IGN); signal(SIGTERM, SIG_IGN); signal(SIGTTIN, SIG_IGN); signal(SIGTTOU, SIG_IGN); print_err("Interrupted\n"); fatal_exit(&local_control); } static void show_summary(void) { /* OK, if verbosity set, print summary of options selected */ if (!INFO) { if (!TEST_ONLY) print_verbose("The following options are in effect for this %s.\n", DECOMPRESS ? "DECOMPRESSION" : "COMPRESSION"); print_verbose("Threading is %s. Number of CPUs detected: %d\n", control->threads > 1? "ENABLED" : "DISABLED", control->threads); print_verbose("Detected %lld bytes ram\n", control->ramsize); print_verbose("Compression level %d\n", control->compression_level); print_verbose("Nice Value: %d\n", control->nice_val); print_verbose("Show Progress\n"); print_maxverbose("Max "); print_verbose("Verbose\n"); if (FORCE_REPLACE) print_verbose("Overwrite Files\n"); if (!KEEP_FILES) print_verbose("Remove input files on completion\n"); if (control->outdir) print_verbose("Output Directory Specified: %s\n", control->outdir); else if (control->outname) print_verbose("Output Filename Specified: %s\n", control->outname); if (TEST_ONLY) print_verbose("Test file integrity\n"); if (control->tmpdir) print_verbose("Temporary Directory set as: %s\n", control->tmpdir); /* show compression options */ if (!DECOMPRESS && !TEST_ONLY) { print_verbose("Compression mode is: "); if (LZMA_COMPRESS) print_verbose("LZMA. LZ4 Compressibility testing %s\n", (LZ4_TEST? "enabled" : "disabled")); else if (LZO_COMPRESS) print_verbose("LZO\n"); else if (BZIP2_COMPRESS) print_verbose("BZIP2. LZ4 Compressibility testing %s\n", (LZ4_TEST? "enabled" : "disabled")); else if (ZLIB_COMPRESS) print_verbose("GZIP\n"); else if (ZPAQ_COMPRESS) print_verbose("ZPAQ. LZ4 Compressibility testing %s\n", (LZ4_TEST? "enabled" : "disabled")); else if (NO_COMPRESS) print_verbose("RZIP pre-processing only\n"); if (control->window) print_verbose("Compression Window: %lld = %lldMB\n", control->window, control->window * 100ull); /* show heuristically computed window size */ if (!control->window && !UNLIMITED) { i64 temp_chunk, temp_window; if (STDOUT || STDIN) temp_chunk = control->maxram; else temp_chunk = control->ramsize * 2 / 3; temp_window = temp_chunk / (100 * 1024 * 1024); print_verbose("Heuristically Computed Compression Window: %lld = %lldMB\n", temp_window, temp_window * 100ull); } if (UNLIMITED) print_verbose("Using Unlimited Window size\n"); } if (!DECOMPRESS && !TEST_ONLY) print_maxverbose("Storage time in seconds %lld\n", control->secs); if (ENCRYPT) print_maxverbose("Encryption hash loops %lld\n", control->encloops); } } static struct option long_options[] = { {"bzip2", no_argument, 0, 'b'}, /* 0 */ {"check", no_argument, 0, 'c'}, {"check", no_argument, 0, 'C'}, {"decompress", no_argument, 0, 'd'}, {"delete", no_argument, 0, 'D'}, {"encrypt", optional_argument, 0, 'e'}, /* 5 */ {"force", no_argument, 0, 'f'}, {"gzip", no_argument, 0, 'g'}, {"help", no_argument, 0, 'h'}, {"hash", no_argument, 0, 'H'}, {"info", no_argument, 0, 'i'}, /* 10 */ {"keep-broken", no_argument, 0, 'k'}, {"keep-broken", no_argument, 0, 'K'}, {"lzo", no_argument, 0, 'l'}, {"lzma", no_argument, 0, '/'}, {"level", optional_argument, 0, 'L'}, /* 15 */ {"license", no_argument, 0, 'L'}, {"maxram", required_argument, 0, 'm'}, {"no-compress", no_argument, 0, 'n'}, {"nice-level", required_argument, 0, 'N'}, {"outfile", required_argument, 0, 'o'}, {"outdir", required_argument, 0, 'O'}, /* 20 */ {"threads", required_argument, 0, 'p'}, {"progress", no_argument, 0, 'P'}, {"quiet", no_argument, 0, 'q'}, {"very-quiet", no_argument, 0, 'Q'}, {"recursive", no_argument, 0, 'r'}, {"suffix", required_argument, 0, 'S'}, {"test", no_argument, 0, 't'}, /* 25 */ {"threshold", required_argument, 0, 'T'}, {"unlimited", no_argument, 0, 'U'}, {"verbose", no_argument, 0, 'v'}, {"version", no_argument, 0, 'V'}, {"window", required_argument, 0, 'w'}, /* 30 */ {"zpaq", no_argument, 0, 'z'}, {"fast", no_argument, 0, '1'}, {"best", no_argument, 0, '9'}, {0, 0, 0, 0}, }; static void set_stdout(struct rzip_control *control) { control->flags |= FLAG_STDOUT; control->outFILE = stdout; control->msgout = stderr; register_outputfile(control, control->msgout); } /* Recursively enter all directories, adding all regular files to the dirlist array */ static void recurse_dirlist(char *indir, char **dirlist, int *entries) { char fname[MAX_PATH_LEN]; struct stat istat; struct dirent *dp; DIR *dirp; dirp = opendir(indir); if (unlikely(!dirp)) failure("Unable to open directory %s\n", indir); while ((dp = readdir(dirp)) != NULL) { if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) continue; sprintf(fname, "%s/%s", indir, dp->d_name); if (unlikely(stat(fname, &istat))) failure("Unable to stat file %s\n", fname); if (S_ISDIR(istat.st_mode)) { recurse_dirlist(fname, dirlist, entries); continue; } if (!S_ISREG(istat.st_mode)) { print_err("Not regular file %s\n", fname); continue; } print_maxverbose("Added file %s\n", fname); *dirlist = realloc(*dirlist, MAX_PATH_LEN * (*entries + 1)); strcpy(*dirlist + MAX_PATH_LEN * (*entries)++, fname); } closedir(dirp); } static const char *loptions = "bcCdDefghHiKlL:nN:o:O:p:PqQrS:tTUm:vVw:z?"; static const char *coptions = "bcCdefghHikKlLnN:o:O:p:PrS:tTUm:vVw:z?123456789"; int main(int argc, char *argv[]) { bool lrzcat = false, compat = false, recurse = false; bool options_file = false, conf_file_compression_set = false; /* for environment and tracking of compression setting */ struct timeval start_time, end_time; struct sigaction handler; double seconds,total_time; // for timers bool nice_set = false; int c, i; int hours,minutes; extern int optind; char *eptr, *av; /* for environment */ char *endptr = NULL; control = &base_control; initialise_control(control); av = basename(argv[0]); control->flags |= FLAG_OUTPUT; if (!strcmp(av, "lrunzip")) control->flags |= FLAG_DECOMPRESS; else if (!strcmp(av, "lrzcat")) { control->flags |= FLAG_DECOMPRESS | FLAG_STDOUT; lrzcat = true; } else if (!strcmp(av, "lrz")) { /* Called in gzip compatible command line mode */ control->flags &= ~FLAG_SHOW_PROGRESS; control->flags &= ~FLAG_KEEP_FILES; compat = true; long_options[1].name = "stdout"; long_options[11].name = "keep"; } /* generate crc table */ CrcGenerateTable(); /* Get Preloaded Defaults from lrzip.conf * Look in ., $HOME/.lrzip/, /etc/lrzip. * If LRZIP=NOCONFIG is set, then ignore config * If lrzip.conf sets a compression mode, options_file will be true. * This will allow for a test to permit an override of compression mode. * If there is an override, then all compression settings will be reset * and command line switches will prevail, including for --lzma. */ eptr = getenv("LRZIP"); if (eptr == NULL) options_file = read_config(control); else if (!strstr(eptr,"NOCONFIG")) options_file = read_config(control); if (options_file && (control->flags & FLAG_NOT_LZMA)) /* if some compression set in lrzip.conf */ conf_file_compression_set = true; /* need this to allow command line override */ while ((c = getopt_long(argc, argv, compat ? coptions : loptions, long_options, &i)) != -1) { switch (c) { case 'b': case 'g': case 'l': case 'n': case 'z': /* If some compression was chosen in lrzip.conf, allow this one time * because conf_file_compression_set will be true */ if ((control->flags & FLAG_NOT_LZMA) && conf_file_compression_set == false) failure("Can only use one of -l, -b, -g, -z or -n\n"); /* Select Compression Mode */ control->flags &= ~FLAG_NOT_LZMA; /* must clear all compressions first */ if (c == 'b') control->flags |= FLAG_BZIP2_COMPRESS; else if (c == 'g') control->flags |= FLAG_ZLIB_COMPRESS; else if (c == 'l') control->flags |= FLAG_LZO_COMPRESS; else if (c == 'n') control->flags |= FLAG_NO_COMPRESS; else if (c == 'z') control->flags |= FLAG_ZPAQ_COMPRESS; /* now FLAG_NOT_LZMA will evaluate as true */ conf_file_compression_set = false; break; case '/': /* LZMA Compress selected */ control->flags &= ~FLAG_NOT_LZMA; /* clear alternate compression flags */ break; case 'c': if (compat) { control->flags |= FLAG_KEEP_FILES; set_stdout(control); break; } /* FALLTHRU */ case 'C': control->flags |= FLAG_CHECK; control->flags |= FLAG_HASH; break; case 'd': control->flags |= FLAG_DECOMPRESS; break; case 'D': control->flags &= ~FLAG_KEEP_FILES; break; case 'e': control->flags |= FLAG_ENCRYPT; control->passphrase = optarg; break; case 'f': control->flags |= FLAG_FORCE_REPLACE; break; case 'h': usage(compat); exit(0); break; case 'H': control->flags |= FLAG_HASH; break; case 'i': control->flags |= FLAG_INFO; control->flags &= ~FLAG_DECOMPRESS; break; case 'k': if (compat) { control->flags |= FLAG_KEEP_FILES; break; } /* FALLTHRU */ case 'K': control->flags |= FLAG_KEEP_BROKEN; break; case 'L': if (compat) { license(); exit(0); } control->compression_level = strtol(optarg, &endptr, 10); if (control->compression_level < 1 || control->compression_level > 9) failure("Invalid compression level (must be 1-9)\n"); if (*endptr) failure("Extra characters after compression level: \'%s\'\n", endptr); break; case 'm': control->ramsize = strtol(optarg, &endptr, 10) * 1024 * 1024 * 100; if (*endptr) failure("Extra characters after ramsize: \'%s\'\n", endptr); break; case 'N': nice_set = true; control->nice_val = strtol(optarg, &endptr, 10); if (control->nice_val < PRIO_MIN || control->nice_val > PRIO_MAX) failure("Invalid nice value (must be %d...%d)\n", PRIO_MIN, PRIO_MAX); if (*endptr) failure("Extra characters after nice level: \'%s\'\n", endptr); break; case 'o': if (control->outdir) failure("Cannot have -o and -O together\n"); if (unlikely(STDOUT)) failure("Cannot specify an output filename when outputting to stdout\n"); control->outname = optarg; control->suffix = ""; break; case 'O': if (control->outname) /* can't mix -o and -O */ failure("Cannot have options -o and -O together\n"); if (unlikely(STDOUT)) failure("Cannot specify an output directory when outputting to stdout\n"); control->outdir = malloc(strlen(optarg) + 2); if (control->outdir == NULL) fatal("Failed to allocate for outdir\n"); strcpy(control->outdir,optarg); if (strcmp(optarg+strlen(optarg) - 1, "/")) /* need a trailing slash */ strcat(control->outdir, "/"); break; case 'p': control->threads = strtol(optarg, &endptr, 10); if (control->threads < 1) failure("Must have at least one thread\n"); if (*endptr) failure("Extra characters after number of threads: \'%s\'\n", endptr); break; case 'P': control->flags |= FLAG_SHOW_PROGRESS; break; case 'q': control->flags &= ~FLAG_SHOW_PROGRESS; break; case 'Q': control->flags &= ~FLAG_SHOW_PROGRESS; control->flags &= ~FLAG_OUTPUT; break; case 'r': recurse = true; break; case 'S': if (control->outname) failure("Specified output filename already, can't specify an extension.\n"); if (unlikely(STDOUT)) failure("Cannot specify a filename suffix when outputting to stdout\n"); control->suffix = optarg; break; case 't': if (control->outname) failure("Cannot specify an output file name when just testing.\n"); if (compat) control->flags |= FLAG_KEEP_FILES; if (!KEEP_FILES) failure("Doubt that you want to delete a file when just testing.\n"); control->flags |= FLAG_TEST_ONLY; break; case 'T': control->flags &= ~FLAG_THRESHOLD; break; case 'U': control->flags |= FLAG_UNLIMITED; break; case 'v': /* set verbosity flag */ if (!(control->flags & FLAG_SHOW_PROGRESS)) control->flags |= FLAG_SHOW_PROGRESS; else if (!(control->flags & FLAG_VERBOSITY) && !(control->flags & FLAG_VERBOSITY_MAX)) control->flags |= FLAG_VERBOSITY; else if ((control->flags & FLAG_VERBOSITY)) { control->flags &= ~FLAG_VERBOSITY; control->flags |= FLAG_VERBOSITY_MAX; } break; case 'V': control->msgout = stdout; print_output("lrzip version %s\n", PACKAGE_VERSION); exit(0); break; case 'w': control->window = strtol(optarg, &endptr, 10); if (control->window < 1) failure("Window must be positive\n"); if (*endptr) failure("Extra characters after window size: \'%s\'\n", endptr); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': control->compression_level = c - '0'; break; default: usage(compat); return 2; } } if (compat && !SHOW_PROGRESS) control->flags &= ~FLAG_OUTPUT; argc -= optind; argv += optind; if (control->outname) { if (argc > 1) failure("Cannot specify output filename with more than 1 file\n"); if (recurse) failure("Cannot specify output filename with recursive\n"); } if (VERBOSE && !SHOW_PROGRESS) { print_err("Cannot have -v and -q options. -v wins.\n"); control->flags |= FLAG_SHOW_PROGRESS; } if (UNLIMITED && control->window) { print_err("If -U used, cannot specify a window size with -w.\n"); control->window = 0; } if (argc < 1) control->flags |= FLAG_STDIN; if (UNLIMITED && STDIN) { print_err("Cannot have -U and stdin, unlimited mode disabled.\n"); control->flags &= ~FLAG_UNLIMITED; } setup_overhead(control); /* Set the main nice value to half that of the backend threads since * the rzip stage is usually the rate limiting step */ control->current_priority = getpriority(PRIO_PROCESS, 0); if (nice_set) { if (!NO_COMPRESS) { /* If niceness can't be set. just reset process priority */ if (unlikely(setpriority(PRIO_PROCESS, 0, control->nice_val/2) == -1)) { print_err("Warning, unable to set nice value %d...Resetting to %d\n", control->nice_val, control->current_priority); setpriority(PRIO_PROCESS, 0, (control->nice_val=control->current_priority)); } } else { if (unlikely(setpriority(PRIO_PROCESS, 0, control->nice_val) == -1)) { print_err("Warning, unable to set nice value %d...Resetting to %d\n", control->nice_val, control->current_priority); setpriority(PRIO_PROCESS, 0, (control->nice_val=control->current_priority)); } } } /* One extra iteration for the case of no parameters means we will default to stdin/out */ for (i = 0; i <= argc; i++) { char *dirlist = NULL, *infile = NULL; int direntries = 0, curentry = 0; if (i < argc) infile = argv[i]; else if (!(i == 0 && STDIN)) break; if (infile) { if ((strcmp(infile, "-") == 0)) control->flags |= FLAG_STDIN; else { bool isdir = false; struct stat istat; if (unlikely(stat(infile, &istat))) failure("Failed to stat %s\n", infile); isdir = S_ISDIR(istat.st_mode); if (!recurse && (isdir || !S_ISREG(istat.st_mode))) { failure("lrzip only works directly on regular FILES.\n" "Use -r recursive, lrztar or pipe through tar for compressing directories.\n"); } if (recurse && !isdir) failure("%s not a directory, -r recursive needs a directory\n", infile); } } if (recurse) { if (unlikely(STDIN || STDOUT)) failure("Cannot use -r recursive with STDIO\n"); recurse_dirlist(infile, &dirlist, &direntries); } if (INFO && STDIN) failure("Will not get file info from STDIN\n"); recursion: if (recurse) { if (curentry >= direntries) { infile = NULL; continue; } infile = dirlist + MAX_PATH_LEN * curentry++; } control->infile = infile; /* If no output filename is specified, and we're using * stdin, use stdout */ if ((control->outname && (strcmp(control->outname, "-") == 0)) || (!control->outname && STDIN) || lrzcat) set_stdout(control); if (lrzcat) { control->msgout = stderr; control->outFILE = stdout; register_outputfile(control, control->msgout); } if (!STDOUT) { control->msgout = stdout; register_outputfile(control, control->msgout); } if (STDIN) control->inFILE = stdin; /* Implement signal handler only once flags are set */ sigemptyset(&handler.sa_mask); handler.sa_flags = 0; handler.sa_handler = &sighandler; sigaction(SIGTERM, &handler, 0); sigaction(SIGINT, &handler, 0); if (!FORCE_REPLACE) { if (STDIN && isatty(fileno((FILE *)stdin))) { print_err("Will not read stdin from a terminal. Use -f to override.\n"); usage(compat); exit (1); } if (!TEST_ONLY && STDOUT && isatty(fileno((FILE *)stdout)) && !compat) { print_err("Will not write stdout to a terminal. Use -f to override.\n"); usage(compat); exit (1); } } if (CHECK_FILE) { if (!DECOMPRESS) { print_err("Can only check file written on decompression.\n"); control->flags &= ~FLAG_CHECK; } else if (STDOUT) { print_err("Can't check file written when writing to stdout. Checking disabled.\n"); control->flags &= ~FLAG_CHECK; } } setup_ram(control); show_summary(); gettimeofday(&start_time, NULL); if (unlikely((STDIN || STDOUT) && ENCRYPT)) failure("Unable to work from STDIO while reading password\n"); memcpy(&local_control, &base_control, sizeof(rzip_control)); if (DECOMPRESS || TEST_ONLY) decompress_file(&local_control); else if (INFO) get_fileinfo(&local_control); else compress_file(&local_control); /* compute total time */ gettimeofday(&end_time, NULL); total_time = (end_time.tv_sec + (double)end_time.tv_usec / 1000000) - (start_time.tv_sec + (double)start_time.tv_usec / 1000000); hours = (int)total_time / 3600; minutes = (int)(total_time / 60) % 60; seconds = total_time - hours * 3600 - minutes * 60; if (!INFO) print_output("Total time: %02d:%02d:%05.2f\n", hours, minutes, seconds); if (recurse) goto recursion; } return 0; } lrzip-0.651/man/000077500000000000000000000000001421175057200134445ustar00rootroot00000000000000lrzip-0.651/man/Makefile.am000066400000000000000000000006211421175057200154770ustar00rootroot00000000000000MAINTAINERCLEANFILES = Makefile.in lrunzip.1 lrztar.1 lrzuntar.1 lrz.1 man1_MANS = lrzip.1 lrunzip.1 lrzcat.1 lrztar.1 lrzuntar.1 lrz.1 man5_MANS = lrzip.conf.5 BUILT_SOURCES = lrunzip.1 lrzcat.1 lrztar.1 lrzuntar.1 lrz.1 CLEANFILES = $(BUILT_SOURCES) EXTRA_DIST = lrzip.1 lrunzip.1.pod lrzcat.1.pod lrztar.1.pod lrzuntar.1.pod lrz.1.pod $(man5_MANS) SUFFIXES = .1 .1.pod .1.pod.1: pod2man $< $@ lrzip-0.651/man/lrunzip.1.pod000066400000000000000000000037361421175057200160230ustar00rootroot00000000000000# Copyright # # Copyright (C) 2010-2016 Con Kolivas # Copyright (C) 2009-2009 Jari Aalto # # License # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Description # # To learn what TOP LEVEL section to use in manual pages, # see POSIX/Susv standard and "tility Description Defaults" at # http://www.opengroup.org/onlinepubs/009695399/utilities/xcu_chap01.html#tag_01_11 # # This is manual page in Perl POD format. Read more at # http://perldoc.perl.org/perlpod.html or run command: # # perldoc perlpod | less # # To check the syntax: # # podchecker *.pod # # Create manual page with command: # # pod2man PAGE.N.pod > PAGE.N =pod =head1 NAME lrunzip - Uncompress LRZ files =head1 SYNOPSIS lrunzip [options] FILE [... FILE] =head1 DESCRIPTION lrunzip is identical to C used to decompress files. =head1 OPTIONS See lrzip(1). =head1 ENVIRONMENT None. =head1 FILES None. =head1 SEE ALSO lrzip.conf(5), lrzip(1), lrzcat(1), lrztar(1), lrzuntar(1), lrz(1), bzip2(1), gzip(1), lzop(1), rzip(1), zip(1) =head1 AUTHORS Program was written by Con Kolivas. This manual page was written by Jari Aalto (but may be used by others). Released under license GNU GPL version 2 or (at your option) any later version. For more information about license, visit . =cut lrzip-0.651/man/lrz.1.pod000066400000000000000000000116011421175057200151150ustar00rootroot00000000000000#!/usr/bin/perl -w # Copyright # # Copyright (C) 2021 Con Kolivas # # License # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Description # # To learn what TOP LEVEL section to use in manual pages, # see POSIX/Susv standard and "tility Description Defaults" at # http://www.opengroup.org/onlinepubs/009695399/utilities/xcu_chap01.html#tag_01_11 # # This is manual page in Perl POD format. Read more at # http://perldoc.perl.org/perlpod.html or run command: # # perldoc perlpod | less # # To check the syntax: # # podchecker *.pod # # Create manual page with command: # # pod2man PAGE.N.pod > PAGE.N =pod =encoding utf8 =head1 NAME lrz - gzip compatible command line variant of lrzip =head1 SYNOPSIS B [options] I =head1 DESCRIPTION B is identical to the B application, however, its command line options and behaviour are made to be as compatible with B as possible. =head1 OPTIONS =head2 General options =over 9 =item B<--stdout> =item B<-c> Output to STDOUT. =item B<--check> =item B<-C> Check integrity of file written on decompression. =item B<--decompress> =item B<-d> Decompress. =item B<--encrypt>[=I] =item B<-e> Password protect sha512/aes128 encryption on compression. =item B<--help> =item B<-h> =item B<-?> Show help. =item B<--hash> =item B<-H> Display md5 hash integrity information. =item B<--info> =item B<-i> Show compressed file information. =item B<--license> =item B<-L> Display software version and license. =item B<--progress> =item B<-P> Show compression progress. =item B<--recursive> =item B<-r> Operate recursively on directories. =item B<--test> =item B<-t> Test compressed file integrity. =item B<--verbose> =item B<-v[vv]> Increase verbosity. =item B<--version> =item B<-V> Show version. =back =head2 Options affecting output =over 9 =item B<--force> =item B<-f> Force overwrite of any existing files. =item B<--keep> =item B<-k> Don't delete source files on de/compression. =item B<--keep-broken> =item B<-K> Keep broken or damaged output files. =item B<--outfile> I =item B<-o> I Specify the output file name and/or path. =item B<--outdir> I =item B<-O> I Specify the output directory when B<-o> is not used. =item B<--suffix> I =item B<-S> I Specify compressed suffix (default '.lrz'). =back =head2 Options affecting compression =over 9 =item B<--bzip2> =item B<-b> Bzip2 compression. =item B<--gzip> =item B<-g> Gzip compression using zlib. =item B<--lzo> =item B<-l> Lzo compression (ultra fast). =item B<--lzma> Lzma compression (default). =item B<--no-compress> =item B<-n> No backend compression - prepare for other compressor. =item B<--zpaq> =item B<-z> Zpaq compression (best, extreme compression, extremely slow). =back =head2 Low level options =over 9 =item B<-1> .. B<-9> =item B<--level> I =item B<-L> I Set lzma/bzip2/gzip compression level (1-9, default 7). =item B<--fast> Alias for B<-1>. =item B<--best> Alias for B<-9>. =item B<--nice-level> I =item B<-N> I Set nice value to I (default 0). =item B<--threads> I =item B<-P> I Set processor count to override number of threads. =item B<--maxram> I =item B<-m> I Set maximum available ram as I * 100 MB. Overrides detected amount of available ram. =item B<--threshold> =item B<-T> Disable LZ4 compressibility testing. =item B<--unlimited> =item B<-U> Use unlimited window size beyond ramsize (potentially much slower). =item B<--window> I =item B<-w> I Set maximum compression window as I * 100 MB. Default chosen by heuristic dependent on ram and chosen compression. =back See also lrzip(1) =head1 ENVIRONMENT lrz uses the same environment and configuration files as lrzip(1) =head1 FILES See lrzip(1) =head1 SEE ALSO lrzip.conf(5), lrzip(1), lrunzip(1), lrztar(1), lrzuntar(1), bzip2(1), gzip(1), lzop(1), rzip(1), zip(1) =head1 AUTHORS This manual page was written by Con Kolivas (but may be used by others). Released under license GNU GPL version 2 or (at your option) any later version. For more information about license, visit . =cut lrzip-0.651/man/lrzcat.1.pod000066400000000000000000000036451421175057200156160ustar00rootroot00000000000000# Copyright # # Copyright (C) 2011-2016 Con Kolivas # # License # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Description # # To learn what TOP LEVEL section to use in manual pages, # see POSIX/Susv standard and "tility Description Defaults" at # http://www.opengroup.org/onlinepubs/009695399/utilities/xcu_chap01.html#tag_01_11 # # This is manual page in Perl POD format. Read more at # http://perldoc.perl.org/perlpod.html or run command: # # perldoc perlpod | less # # To check the syntax: # # podchecker *.pod # # Create manual page with command: # # pod2man PAGE.N.pod > PAGE.N =pod =head1 NAME lrzcat - Uncompress LRZ files to STDOUT =head1 SYNOPSIS lrzcat [options] FILE [... FILE] =head1 DESCRIPTION lrzcat is identical to C used to decompress files to STDOUT. =head1 OPTIONS See lrzip(1). =head1 ENVIRONMENT None. =head1 FILES None. =head1 SEE ALSO lrzip.conf(5), lrzip(1), lrunzip(1), lrztar(1), lrzuntar(1), lrz(1), bzip2(1), gzip(1), lzop(1), rzip(1), zip(1) =head1 AUTHORS This manual page was written by Con Kolivas (but may be used by others). Released under license GNU GPL version 2 or (at your option) any later version. For more information about license, visit . =cut lrzip-0.651/man/lrzip.1000066400000000000000000000366571421175057200147070ustar00rootroot00000000000000.TH "lrzip" "1" "February 2022" "" "" .SH "NAME" lrzip \- a large-file compression program .SH "SYNOPSIS" .PP lrzip [OPTIONS] .br lrzip \-d [OPTIONS] .br lrunzip [OPTIONS] .br lrzcat [OPTIONS] .br lrztar [lrzip options] .br lrztar \-d [lrzip options] .br lrzuntar [lrzip options] .br lrz [lrz options] .br LRZIP=NOCONFIG [lrzip|lrunzip] [OPTIONS] .PP .SH "DESCRIPTION" .PP LRZIP is a file compression program designed to do particularly well on very large files containing long distance redundancy\&. lrztar is a wrapper for LRZIP to simplify compression and decompression of directories. .PP .SH "OPTIONS SUMMARY" .PP Here is a summary of the options to lrzip\&. .nf General options: \-c, \-\-check check integrity of file written on decompression \-d, \-\-decompress decompress \-e, \-\-encrypt[=password] password protected sha512/aes128 encryption on compression \-h, \-?, \-\-help show help \-H, \-\-hash display md5 hash integrity information \-i, \-\-info show compressed file information \-q, \-\-quiet don't show compression progress \-Q, \-\-very-quiet don't show any output \-r, \-\-recursive operate recursively on directories \-t, \-\-test test compressed file integrity \-v[v], \-\-verbose Increase verbosity \-V, \-\-version show version Options affecting output: \-D, \-\-delete delete existing files \-f, \-\-force force overwrite of any existing files \-k, \-\-keep-broken keep broken or damaged output files \-o, \-\-outfile filename specify the output file name and/or path \-O, \-\-outdir directory specify the output directory when -o is not used \-S, \-\-suffix suffix specify compressed suffix (default '.lrz') Options affecting compression: \-b, \-\-bzip2 bzip2 compression \-g, \-\-gzip gzip compression using zlib \-l, \-\-lzo lzo compression (ultra fast) \-n, \-\-no-compress no backend compression - prepare for other compressor \-z, \-\-zpaq zpaq compression (best, extreme compression, extremely slow) Low level options: \-L, \-\-level level set lzma/bzip2/gzip compression level (1-9, default 7) \-N, \-\-nice-level value Set nice value to value (default 19) \-p, \-\-threads value Set processor count to override number of threads \-m, \-\-maxram size Set maximum available ram in hundreds of MB overrides detected amount of available ram \-T, \-\-threshold Disable LZ4 compressibility testing \-U, \-\-unlimited Use unlimited window size beyond ramsize (potentially much slower) \-w, \-\-window size maximum compression window in hundreds of MB default chosen by heuristic dependent on ram and chosen compression LRZIP=NOCONFIG environment variable setting can be used to bypass lrzip.conf. TMP environment variable will be used for storage of temporary files when needed. TMPDIR may also be stored in lrzip.conf file. If no filenames or "-" is specified, stdin/out will be used. .fi .PP .SH "OPTIONS" .PP .SH "General options" .IP "\fB-c\fP" This option enables integrity checking of the file written to disk on decompression. All decompression is tested internally in lrzip with either crc32 or md5 hash checking depending on the version of the archive already. However the file written to disk may be corrupted for other reasons to do with other userspace problems such as faulty library versions, drivers, hardware failure and so on. Enabling this option will make lrzip perform an md5 hash check on the file that's written to disk. When the archive has the md5 value stored in it, it is compared to this. Otherwise it is compared to the value calculated during decompression. This offers an extra guarantee that the file written is the same as the original archived. .IP .IP "\fB-d\fP" Decompress. If this option is not used then lrzip looks at the name used to launch the program. If it contains the string "lrunzip" then the \-d option is automatically set. If it contains the string "lrzcat" then the \-d \-o \- options are automatically set. .IP .IP "\fB-e\fP" .IP "\fB\-\-encrypt\fP[=\fIpassword\fP]" Encrypt. This option enables high grade password encryption using a combination of multiply sha512 hashed password, random salt and aes128 CBC encryption. Passwords up to 500 characters long are supported, and the encryption mechanism used virtually guarantees that the same file created with the same password will never be the same. Furthermore, the password hashing is increased according to the date the file is encrypted, increasing the number of CPU cycles required for each password attempt in accordance with Moore's law, thus making the difficulty of attempting brute force attacks proportional to the power of modern computers. .IP .IP "\fB-h|-?\fP" Print an options summary page .IP .IP "\fB-H\fP" This shows the md5 hash value calculated on compressing or decompressing an lrzip archive. By default all compression has the md5 value calculated and stored in all archives since version 0.560. On decompression, when an md5 value has been found, it will be calculated and used for integrity checking. If the md5 value is not stored in the archive, it will not be calculated unless explicitly specified with this option, or check integrity (see below) has been requested. .IP .IP "\fB-i\fP" This shows information about a compressed file. It shows the compressed size, the decompressed size, the compression ratio, what compression was used and what hash checking will be used for internal integrity checking. Note that the compression mode is detected from the first block only and it will show no compression used if the first block was incompressible, even if later blocks were compressible. If verbose options \-v or \-vv are added, a breakdown of all the internal blocks and progressively more information pertaining to them will also be shown. .IP .IP "\fB-q\fP" If this option is specified then lrzip will not show the percentage progress while compressing. Note that compression happens in bursts with lzma compression which is the default compression. This means that it will progress very rapidly for short periods and then stop for long periods. .IP .IP "\fB-Q\fP" If this option is specified then lrzip will not show any output to the console except for error messages. .IP .IP "\fB-r\fP" If this option is specified, lrzip will recursively enter the directories specified, compressing or decompressing every file individually in the same directory. Note for better compression it is recommended to instead combine files in a tar file rather than compress them separately, either manually or with the lrztar helper. .IP .IP "\fB-t\fP" This tests the compressed file integrity. It does this by decompressing it to a temporary file and then deleting it. .IP .IP "\fB-v[v]\fP" Increases verbosity. \-vv will print more messages than \-v. .IP .IP "\fB-V\fP" Print the lrzip version number .IP .PP .SH "Options affecting output" .PP .IP "\fB-D\fP" If this option is specified then lrzip will delete the source file after successful compression or decompression. When this option is not specified then the source files are not deleted. .IP .IP "\fB-f\fP" If this option is not specified (Default) then lrzip will not overwrite any existing files. If you set this option then rzip will silently overwrite any files as needed. .IP .IP "\fB-k\fP" This option will keep broken or damaged files instead of deleting them. When compression or decompression is interrupted either by user or error, or a file decompressed fails an integrity check, it is normally deleted by LRZIP. .IP .IP "\fB-o\fP" Set the output file name. If this option is not set then the output file name is chosen based on the input name and the suffix. The \-o option cannot be used if more than one file name is specified on the command line. .IP .IP "\fB-O\fP" Set the output directory for the default filename. This option cannot be combined with \-o. .IP .IP "\fB-S\fP" Set the compression suffix. The default is '.lrz'. .IP .PP .SH "Options affecting compression" .PP .IP "\fB-b\fP" Bzip2 compression. Uses bzip2 compression for the 2nd stage, much like the original rzip does. .IP "\fB-g\fP" Gzip compression. Uses gzip compression for the 2nd stage. Uses libz compress and uncompress functions. .IP .IP "\fB-l\fP" LZO Compression. If this option is set then lrzip will use the ultra fast lzo compression algorithm for the 2nd stage. This mode of compression gives bzip2 like compression at the speed it would normally take to simply copy the file, giving excellent compression/time value. .IP .IP "\fB-n\fP" No 2nd stage compression. If this option is set then lrzip will only perform the long distance redundancy 1st stage compression. While this does not compress any faster than LZO compression, it produces a smaller file that then responds better to further compression (by eg another application), also reducing the compression time substantially. .IP .IP "\fB-z\fP" ZPAQ compression. Uses ZPAQ compression which is from the PAQ family of compressors known for having some of the highest compression ratios possible but at the cost of being extremely slow on both compress and decompress (4x slower than lzma which is the default). .IP .PP .SH "Low level options" .PP .IP "\fB-L 1\&.\&.9\fP" Set the compression level from 1 to 9. The default is to use level 7, which gives good all round compression. The compression level is also strongly related to how much memory lrzip uses. See the \-w option for details. .IP .IP "\fB-N value\fP" The default nice value is 19. This option can be used to set the priority scheduling for the lrzip backup or decompression. Valid nice values are from \-20 to 19. Note this does NOT speed up or slow down compression. .IP .IP "\fB-p value\fP" Set the number of processor count to determine the number of threads to run. Normally lrzip will scale according to the number of CPUs it detects. Using this will override the value in case you wish to use less CPUs to either decrease the load on your machine, or to improve compression. Setting it to 1 will maximise compression but will not attempt to use more than one CPU. .IP .IP "\fB-T\fP" Disables the LZ4 compressibility threshold testing when a slower compression back-end is used. LZ4 testing is normally performed for the slower back-end compression of LZMA and ZPAQ. The reasoning is that if it is completely incompressible by LZ4 then it will also be incompressible by them. Thus if a block fails to be compressed by the very fast LZ4, lrzip will not attempt to compress that block with the slower compressor, thereby saving time. If this option is enabled, it will bypass the LZ4 testing and attempt to compress each block regardless. .IP .IP "\fB-U \fP" Unlimited window size\&. If this option is set, and the file being compressed does not fit into the available ram, lrzip will use a moving second buffer as a "sliding mmap" which emulates having infinite ram. This will provide the most possible compression in the first rzip stage which can improve the compression of ultra large files when they're bigger than the available ram. However it runs progressively slower the larger the difference between ram and the file size, so is best reserved for when the smallest possible size is desired on a very large file, and the time taken is not important. .IP .IP "\fB-w n\fP" Set the maximum allowable compression window size to n in hundreds of megabytes. This is the amount of memory lrzip will search during its first stage of pre-compression and is the main thing that will determine how much benefit lrzip will provide over ordinary compression with the 2nd stage algorithm. If not set (recommended), the value chosen will be determined by an internal heuristic in lrzip which uses the most memory that is reasonable, without any hard upper limit. It is limited to 2GB on 32bit machines. lrzip will always reduce the window size to the biggest it can be without running out of memory. .IP .PP .SH "INSTALLATION" .PP "make install" or just install lrzip somewhere in your search path. .PP .SH "COMPRESSION ALGORITHM" .PP LRZIP operates in two stages. The first stage finds and encodes large chunks of duplicated data over potentially very long distances in the input file. The second stage is to use a compression algorithm to compress the output of the first stage. The compression algorithm can be chosen to be optimised for extreme size (zpaq), size (lzma - default), speed (lzo), legacy (bzip2 or gzip) or can be omitted entirely doing only the first stage. A one stage only compressed file can almost always improve both the compression size and speed done by a subsequent compression program. .PP The key difference between lrzip and other well known compression algorithms is its ability to take advantage of very long distance redundancy. The well known deflate algorithm used in gzip uses a maximum history buffer of 32k. The block sorting algorithm used in bzip2 is limited to 900k of history. The history buffer in lrzip can be any size long, not even limited by available ram. . .PP It is quite common these days to need to compress files that contain long distance redundancies. For example, when compressing a set of home directories several users might have copies of the same file, or of quite similar files. It is also common to have a single file that contains large duplicated chunks over long distances, such as pdf files containing repeated copies of the same image. Most compression programs won't be able to take advantage of this redundancy, and thus might achieve a much lower compression ratio than lrzip can achieve. .IP .PP .SH "FILES" .PP LRZIP recognises a configuration file that contains default settings. This configuration is searched for in the current directory, /etc/lrzip, and $HOME/.lrzip. The configuration filename must be \fBlrzip.conf\fP. .PP .SH "ENVIRONMENT" By default, lrzip will search for and use a configuration file, lrzip.conf. If the user wishes to bypass the file, a startup ENV variable may be set. .br .B LRZIP = .I "NOCONFIG " .B "[lrzip|lrunzip]" [OPTIONS] .br which will force lrzip to ignore the configuration file. .PP .SH "HISTORY - Notes on rzip by Andrew Tridgell" .PP The ideas behind rzip were first implemented in 1998 while I was working on rsync. That version was too slow to be practical, and was replaced by this version in 2003. LRZIP was created by the desire to have better compression and/or speed by Con Kolivas on blending the lzma and lzo compression algorithms with the rzip first stage, and extending the compression windows to scale with increasing ram sizes. .PP .SH "BUGS" .PP Nil known. .PP .SH "SEE ALSO" lrzip.conf(5), lrunzip(1), lrzcat(1), lrztar(1), lrzuntar(1), lrz(1), bzip2(1), gzip(1), lzop(1), rzip(1), zip(1) .PP .SH "DIAGNOSTICS" .PP Exit status is normally 0; if an error occurs, exit status is 1, usage errors is 2. .PP .SH "AUTHOR and CREDITS" .br lrzip is being extensively bastardised from rzip by Con Kolivas. .br rzip was written by Andrew Tridgell. .br lzma was written by Igor Pavlov. .br lzo was written by Markus Oberhumer. .br zpaq was written by Matt Mahoney. .br Peter Hyman added informational output, updated LZMA SDK, and added lzma multi-threading capabilities. .PP If you wish to report a problem, or make a suggestion, then please consult the git repository at: https://github.com/ckolivas/lrzip .PP lrzip is released under the GNU General Public License version 2. Please see the file COPYING for license details. lrzip-0.651/man/lrzip.conf.5000066400000000000000000000045031421175057200156200ustar00rootroot00000000000000.TH "lrzip.conf" "5" "January 2009, updated May 2019" "" "" .SH "NAME" lrzip.conf \- Configuration File for lrzip .SH "DESCRIPTION" .PP This file if used, will be read by the lrzip program\&, parsed\&, and options passed to the program\&. Some options may be overridden on the command line\&. Others are fixed\&. .PP The configuration file must be called \fBlrzip\&.conf\fP\&. The lrzip program will search for the file automatically in one of three places\&: .nf $PWD \- Current Directory /etc/lrzip $HOME/\&.lrzip .PP Parameters are set in \fBPARAMETER\&=VALUE\fP fashion where any line beginning with a \fB#\fP or that is blank will be ignored\&. Parameter values are not case sensitive except where specified\&. .PP .SH "CONFIG FILE EXAMPLE" .nf # This is a comment. # Compression Window size in 100MB. Normally selected by program. (-w) # WINDOW = 20 # Compression Level 1-9 (7 Default). (-L) # COMPRESSIONLEVEL = 7 # Use -U setting, Unlimited ram. Yes or No # UNLIMITED = NO # Compression Method, rzip, gzip, bzip2, lzo, or lzma (default), or zpaq. (-n -g -b -l --lzma -z) # If specified here, command line options not usable. # COMPRESSIONMETHOD = lzma # Perform LZO Test. Default = YES (-T ) # LZOTEST = NO # Hash Check on decompression, (-c) # HASHCHECK = YES # Show HASH value on Compression even if Verbose is off, YES (-H) # SHOWHASH = YES # Default output directory (-O) # OUTPUTDIRECTORY = location # Verbosity, YES or MAX (v, vv) # VERBOSITY = max # Show Progress as file is parsed, YES or no (NO = -q option) # SHOWPROGRESS = YES # Set Niceness. 19 is default. -20 to 19 is the allowable range (-N) # NICE = 19 # Keep broken or damaged output files, YES (-K) # KEEPBROKEN = YES # Delete source file after compression (-D) # this parameter and value are case sensitive # value must be YES to activate # DELETEFILES = NO # Replace existing lrzip file when compressing (-f) # this parameter and value are case sensitive # value must be YES to activate # REPLACEFILE = YES # Override for Temporary Directory. Only valid when stdin/out or Test is used # TMPDIR = /tmp # Whether to use encryption on compression YES, NO (-e) # ENCRYPT = NO .fi .PP .SH "NOTES" .PP Be careful when using \fBDELETEFILES\fP or \fBREPLACEFILE\fP as no warning will be given and lrzip will simply delete the source or replace the output file! .PP .SH "SEE ALSO" lrzip(1) lrzip-0.651/man/lrztar.1.pod000066400000000000000000000043001421175057200156220ustar00rootroot00000000000000# Copyright # # Copyright (C) 2010-2016 Con Kolivas # Copyright (C) 2009-2010 Jari Aalto # # License # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Description # # To learn what TOP LEVEL section to use in manual pages, # see POSIX/Susv standard and "tility Description Defaults" at # http://www.opengroup.org/onlinepubs/009695399/utilities/xcu_chap01.html#tag_01_11 # # This is manual page in Perl POD format. Read more at # http://perldoc.perl.org/perlpod.html or run command: # # perldoc perlpod | less # # To check the syntax: # # podchecker *.pod # # Create manual page with command: # # pod2man PAGE.N.pod > PAGE.N =pod =head1 NAME lrztar - Directory wrapper for lrzip =head1 SYNOPSIS lrztar [options] DIRECTORY lrztar -d [options] DIRECTORY.tar.lrz lrzuntar [options] DIRECTORY.tar.lrz =head1 DESCRIPTION lrztar is a wrapper for compressing and decompressing whole directories with lrzip(1) to corresponding file C. lrzuntar is identical to C. lrztar takes the same options as lrzip. =head1 OPTIONS See lrzip(1). =head1 ENVIRONMENT None. =head1 FILES None. =head1 SEE ALSO lrzip.conf(5), lrzuntar(1), lrzip(1), lrunzip(1), lrzcat(1), lrz(1), bzip2(1), gzip(1), lzop(1), rzip(1), zip(1) =head1 AUTHORS Program was written by Con Kolivas. This manual page was written by Jari Aalto (but may be used by others). Released under license GNU GPL version 2 or (at your option) any later version. For more information about license, visit . =cut lrzip-0.651/man/lrzuntar.1.pod000066400000000000000000000027221421175057200161730ustar00rootroot00000000000000# Copyright # # Copyright (C) 2010-2016 Con Kolivas # # License # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # =pod =head1 NAME lrzuntar - Extract whole tarball from .tar.lrz files =head1 SYNOPSIS lrzuntar [options] DIRECTORY.tar.lrz =head1 DESCRIPTION lrzuntar is identical to C used to decompress lrzip compressed tarballs. =head1 OPTIONS See lrzip(1) =head1 ENVIRONMENT None. =head1 FILES None. =head1 SEE ALSO lrzip.conf(5), lrztar(1), lrzip(1), lrunzip(1), lrzcat(1), lrz(1), bzip2(1), gzip(1), lzop(1), rzip(1), zip(1) =head1 AUTHORS This manual page was written by Con Kolivas (but may be used by others). Released under license GNU GPL version 2 or (at your option) any later version. For more information about license, visit . =cut lrzip-0.651/man/pod2man.mk000066400000000000000000000033261421175057200153410ustar00rootroot00000000000000# pod2man.mk -- Makefile portion to convert *.pod files to manual pages # # Copyright information # # Copyright (C) 2008-2010 Jari Aalto # # License # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Description # # Convert *.pod files to manual pages. Write this to 'install' # target: # # install: build $(MANPAGE) ifneq (,) This makefile requires GNU Make. endif # This variable *must* be set when called PACKAGE ?= package # Optional variables to set MANSECT ?= 1 PODCENTER = PODDATE = $$(date "+%Y-%m-%d") # Directories MANSRC = MANDEST = $(MANSRC) MANPOD = $(MANSRC)$(PACKAGE).$(MANSECT).pod MANPAGE = $(MANDEST)$(PACKAGE).$(MANSECT) POD2MAN = pod2man POD2MAN_FLAGS = --utf8 makeman: $(MANPAGE) $(MANPAGE): $(MANPOD) # make target - create manual page from a *.pod page podchecker $(MANPOD) LC_ALL= LANG=C $(POD2MAN) $(POD2MAN_FLAGS) \ --center="$(PODCENTER)" \ --date="$(PODDATE)" \ --name="$(PACKAGE)" \ --section="$(MANSECT)" \ $(MANPOD) \ | sed 's,[Pp]erl v[0-9.]\+,$(PACKAGE),' \ > $(MANPAGE) && \ rm -f pod*.tmp # End of of Makefile part lrzip-0.651/md5.c000066400000000000000000000356471421175057200135410ustar00rootroot00000000000000/* Copyright (C) 2012-2013 Con Kolivas Copyright (C) 1995-2011 Ulrich Drepper. Functions to compute MD5 message digest of files or memory blocks. according to the definition of MD5 in RFC 1321 from April 1992. Copyright (C) 1995-1997, 1999-2001, 2005-2006, 2008-2011 Free Software Foundation, Inc. This file is part of the GNU C Library. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* Written by Ulrich Drepper , 1995. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include "lrzip_private.h" #include "md5.h" #if USE_UNLOCKED_IO # include "unlocked-io.h" #endif #ifdef HAVE_ENDIAN_H # include #elif HAVE_SYS_ENDIAN_H # include #endif #ifdef HAVE_ARPA_INET_H # include #endif /* We need to keep the namespace clean so define the MD5 function protected using leading __ . */ # define md5_init_ctx __md5_init_ctx # define md5_process_block __md5_process_block # define md5_process_bytes __md5_process_bytes # define md5_finish_ctx __md5_finish_ctx # define md5_read_ctx __md5_read_ctx # define md5_stream __md5_stream # define md5_buffer __md5_buffer #define BLOCKSIZE 32768 #if BLOCKSIZE % 64 != 0 # error "invalid BLOCKSIZE" #endif /* This array contains the bytes used to pad the buffer to the next 64-byte boundary. (RFC 1321, 3.1: Step 1) */ static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ }; /* Initialize structure containing state of computation. (RFC 1321, 3.3: Step 3) */ void md5_init_ctx (struct md5_ctx *ctx) { ctx->A = 0x67452301; ctx->B = 0xefcdab89; ctx->C = 0x98badcfe; ctx->D = 0x10325476; ctx->total[0] = ctx->total[1] = 0; ctx->buflen = 0; } /* Copy the 4 byte value from v into the memory location pointed to by *cp, If your architecture allows unaligned access this is equivalent to * (uint32_t *) cp = v */ static inline void set_uint32 (char *cp, uint32_t v) { memcpy (cp, &v, sizeof v); } /* Put result from CTX in first 16 bytes following RESBUF. The result must be in little endian byte order. */ void * md5_read_ctx (const struct md5_ctx *ctx, void *resbuf) { char *r = resbuf; set_uint32 (r + 0 * sizeof ctx->A, htole32 (ctx->A)); set_uint32 (r + 1 * sizeof ctx->B, htole32 (ctx->B)); set_uint32 (r + 2 * sizeof ctx->C, htole32 (ctx->C)); set_uint32 (r + 3 * sizeof ctx->D, htole32 (ctx->D)); return resbuf; } /* Process the remaining bytes in the internal buffer and the usual prolog according to the standard and write the result to RESBUF. */ void * md5_finish_ctx (struct md5_ctx *ctx, void *resbuf) { /* Take yet unprocessed bytes into account. */ uint32_t bytes = ctx->buflen; size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4; /* Now count remaining bytes. */ ctx->total[0] += bytes; if (ctx->total[0] < bytes) ++ctx->total[1]; /* Put the 64-bit file length in *bits* at the end of the buffer. */ ctx->buffer[size - 2] = htole32 (ctx->total[0] << 3); ctx->buffer[size - 1] = htole32 ((ctx->total[1] << 3) | (ctx->total[0] >> 29)); memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes); /* Process last bytes. */ md5_process_block (ctx->buffer, size * 4, ctx); return md5_read_ctx (ctx, resbuf); } /* Compute MD5 message digest for bytes read from STREAM. The resulting message digest number will be written into the 16 bytes beginning at RESBLOCK. */ int md5_stream (FILE *stream, void *resblock) { struct md5_ctx ctx; size_t sum; char *buffer = malloc (BLOCKSIZE + 72); if (!buffer) return 1; /* Initialize the computation context. */ md5_init_ctx (&ctx); /* Iterate over full file contents. */ while (1) { /* We read the file in blocks of BLOCKSIZE bytes. One call of the computation function processes the whole buffer so that with the next round of the loop another block can be read. */ size_t n; sum = 0; /* Read block. Take care for partial reads. */ while (1) { n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream); sum += n; if (sum == BLOCKSIZE) break; if (n == 0) { /* Check for the error flag IFF N == 0, so that we don't exit the loop after a partial read due to e.g., EAGAIN or EWOULDBLOCK. */ if (ferror (stream)) { free (buffer); return 1; } goto process_partial_block; } /* We've read at least one byte, so ignore errors. But always check for EOF, since feof may be true even though N > 0. Otherwise, we could end up calling fread after EOF. */ if (feof (stream)) goto process_partial_block; } /* Process buffer with BLOCKSIZE bytes. Note that BLOCKSIZE % 64 == 0 */ md5_process_block (buffer, BLOCKSIZE, &ctx); } process_partial_block: /* Process any remaining bytes. */ if (sum > 0) md5_process_bytes (buffer, sum, &ctx); /* Construct result in desired memory. */ md5_finish_ctx (&ctx, resblock); free (buffer); return 0; } /* Compute MD5 message digest for LEN bytes beginning at BUFFER. The result is always in little endian byte order, so that a byte-wise output yields to the wanted ASCII representation of the message digest. */ void * md5_buffer (const char *buffer, size_t len, void *resblock) { struct md5_ctx ctx; /* Initialize the computation context. */ md5_init_ctx (&ctx); /* Process whole buffer but last len % 64 bytes. */ md5_process_bytes (buffer, len, &ctx); /* Put result in desired memory area. */ return md5_finish_ctx (&ctx, resblock); } void md5_process_bytes (const void *buffer, size_t len, struct md5_ctx *ctx) { /* When we already have some bits in our internal buffer concatenate both inputs first. */ if (ctx->buflen != 0) { size_t left_over = ctx->buflen; size_t add = 128 - left_over > len ? len : 128 - left_over; memcpy (&((char *) ctx->buffer)[left_over], buffer, add); ctx->buflen += add; if (ctx->buflen > 64) { md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx); ctx->buflen &= 63; /* The regions in the following copy operation cannot overlap. */ memcpy (ctx->buffer, &((char *) ctx->buffer)[(left_over + add) & ~63], ctx->buflen); } buffer = (const char *) buffer + add; len -= add; } /* Process available complete blocks. */ if (len >= 64) { #if !_STRING_ARCH_unaligned # define alignof(type) offsetof (struct { char c; type x; }, x) # define UNALIGNED_P(p) (((size_t) p) % alignof (uint32_t) != 0) if (UNALIGNED_P (buffer)) while (len > 64) { md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); buffer = (const char *) buffer + 64; len -= 64; } else #endif { md5_process_block (buffer, len & ~63, ctx); buffer = (const char *) buffer + (len & ~63); len &= 63; } } /* Move remaining bytes in internal buffer. */ if (len > 0) { size_t left_over = ctx->buflen; memcpy (&((char *) ctx->buffer)[left_over], buffer, len); left_over += len; if (left_over >= 64) { md5_process_block (ctx->buffer, 64, ctx); left_over -= 64; memcpy (ctx->buffer, &ctx->buffer[16], left_over); } ctx->buflen = left_over; } } /* These are the four functions used in the four steps of the MD5 algorithm and defined in the RFC 1321. The first function is a little bit optimized (as found in Colin Plumbs public domain implementation). */ /* #define FF(b, c, d) ((b & c) | (~b & d)) */ #define FF(b, c, d) (d ^ (b & (c ^ d))) #define FG(b, c, d) FF (d, b, c) #define FH(b, c, d) (b ^ c ^ d) #define FI(b, c, d) (c ^ (b | ~d)) /* Process LEN bytes of BUFFER, accumulating context into CTX. It is assumed that LEN % 64 == 0. */ void md5_process_block (const void *buffer, size_t len, struct md5_ctx *ctx) { uint32_t correct_words[16]; const uint32_t *words = buffer; size_t nwords = len / sizeof (uint32_t); const uint32_t *endp = words + nwords; uint32_t A = ctx->A; uint32_t B = ctx->B; uint32_t C = ctx->C; uint32_t D = ctx->D; uint32_t lolen = len; /* First increment the byte count. RFC 1321 specifies the possible length of the file up to 2^64 bits. Here we only compute the number of bytes. Do a double word increment. */ ctx->total[0] += lolen; ctx->total[1] += (len >> 31 >> 1) + (ctx->total[0] < lolen); /* Process all bytes in the buffer with 64 bytes in each round of the loop. */ while (words < endp) { uint32_t *cwp = correct_words; uint32_t A_save = A; uint32_t B_save = B; uint32_t C_save = C; uint32_t D_save = D; /* First round: using the given function, the context and a constant the next context is computed. Because the algorithms processing unit is a 32-bit word and it is determined to work on words in little endian byte order we perhaps have to change the byte order before the computation. To reduce the work for the next steps we store the swapped words in the array CORRECT_WORDS. */ #define OP(a, b, c, d, s, T) \ do \ { \ a += FF (b, c, d) + (*cwp++ = htole32 (*words)) + T; \ ++words; \ CYCLIC (a, s); \ a += b; \ } \ while (0) /* It is unfortunate that C does not provide an operator for cyclic rotation. Hope the C compiler is smart enough. */ #define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s))) /* Before we start, one word to the strange constants. They are defined in RFC 1321 as T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64 Here is an equivalent invocation using Perl: perl -e 'foreach(1..64){printf "0x%08x\n", int (4294967296 * abs (sin $_))}' */ /* Round 1. */ OP (A, B, C, D, 7, 0xd76aa478); OP (D, A, B, C, 12, 0xe8c7b756); OP (C, D, A, B, 17, 0x242070db); OP (B, C, D, A, 22, 0xc1bdceee); OP (A, B, C, D, 7, 0xf57c0faf); OP (D, A, B, C, 12, 0x4787c62a); OP (C, D, A, B, 17, 0xa8304613); OP (B, C, D, A, 22, 0xfd469501); OP (A, B, C, D, 7, 0x698098d8); OP (D, A, B, C, 12, 0x8b44f7af); OP (C, D, A, B, 17, 0xffff5bb1); OP (B, C, D, A, 22, 0x895cd7be); OP (A, B, C, D, 7, 0x6b901122); OP (D, A, B, C, 12, 0xfd987193); OP (C, D, A, B, 17, 0xa679438e); OP (B, C, D, A, 22, 0x49b40821); /* For the second to fourth round we have the possibly swapped words in CORRECT_WORDS. Redefine the macro to take an additional first argument specifying the function to use. */ #undef OP #define OP(f, a, b, c, d, k, s, T) \ do \ { \ a += f (b, c, d) + correct_words[k] + T; \ CYCLIC (a, s); \ a += b; \ } \ while (0) /* Round 2. */ OP (FG, A, B, C, D, 1, 5, 0xf61e2562); OP (FG, D, A, B, C, 6, 9, 0xc040b340); OP (FG, C, D, A, B, 11, 14, 0x265e5a51); OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa); OP (FG, A, B, C, D, 5, 5, 0xd62f105d); OP (FG, D, A, B, C, 10, 9, 0x02441453); OP (FG, C, D, A, B, 15, 14, 0xd8a1e681); OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8); OP (FG, A, B, C, D, 9, 5, 0x21e1cde6); OP (FG, D, A, B, C, 14, 9, 0xc33707d6); OP (FG, C, D, A, B, 3, 14, 0xf4d50d87); OP (FG, B, C, D, A, 8, 20, 0x455a14ed); OP (FG, A, B, C, D, 13, 5, 0xa9e3e905); OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8); OP (FG, C, D, A, B, 7, 14, 0x676f02d9); OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a); /* Round 3. */ OP (FH, A, B, C, D, 5, 4, 0xfffa3942); OP (FH, D, A, B, C, 8, 11, 0x8771f681); OP (FH, C, D, A, B, 11, 16, 0x6d9d6122); OP (FH, B, C, D, A, 14, 23, 0xfde5380c); OP (FH, A, B, C, D, 1, 4, 0xa4beea44); OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9); OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60); OP (FH, B, C, D, A, 10, 23, 0xbebfbc70); OP (FH, A, B, C, D, 13, 4, 0x289b7ec6); OP (FH, D, A, B, C, 0, 11, 0xeaa127fa); OP (FH, C, D, A, B, 3, 16, 0xd4ef3085); OP (FH, B, C, D, A, 6, 23, 0x04881d05); OP (FH, A, B, C, D, 9, 4, 0xd9d4d039); OP (FH, D, A, B, C, 12, 11, 0xe6db99e5); OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8); OP (FH, B, C, D, A, 2, 23, 0xc4ac5665); /* Round 4. */ OP (FI, A, B, C, D, 0, 6, 0xf4292244); OP (FI, D, A, B, C, 7, 10, 0x432aff97); OP (FI, C, D, A, B, 14, 15, 0xab9423a7); OP (FI, B, C, D, A, 5, 21, 0xfc93a039); OP (FI, A, B, C, D, 12, 6, 0x655b59c3); OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92); OP (FI, C, D, A, B, 10, 15, 0xffeff47d); OP (FI, B, C, D, A, 1, 21, 0x85845dd1); OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f); OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0); OP (FI, C, D, A, B, 6, 15, 0xa3014314); OP (FI, B, C, D, A, 13, 21, 0x4e0811a1); OP (FI, A, B, C, D, 4, 6, 0xf7537e82); OP (FI, D, A, B, C, 11, 10, 0xbd3af235); OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb); OP (FI, B, C, D, A, 9, 21, 0xeb86d391); /* Add the starting values of the context. */ A += A_save; B += B_save; C += C_save; D += D_save; } /* Put checksum in context given as argument. */ ctx->A = A; ctx->B = B; ctx->C = C; ctx->D = D; } lrzip-0.651/md5.h000066400000000000000000000100001421175057200135160ustar00rootroot00000000000000/* Copyright (C) 2011 Con Kolivas Copyright (C) 1995-2011 Ulrich Drepper. Declaration of functions and data types used for MD5 sum computing library functions. Copyright (C) 1995-1997, 1999-2001, 2004-2006, 2008-2011 Free Software Foundation, Inc. This file is part of the GNU C Library. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _MD5_H #define _MD5_H 1 #include #include "lrzip_private.h" #define MD5_DIGEST_SIZE 16 #define MD5_BLOCK_SIZE 64 #ifndef __GNUC_PREREQ # if defined __GNUC__ && defined __GNUC_MINOR__ # define __GNUC_PREREQ(maj, min) \ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) # else # define __GNUC_PREREQ(maj, min) 0 # endif #endif #ifndef __THROW # if defined __cplusplus && __GNUC_PREREQ (2,8) # define __THROW throw () # else # define __THROW # endif #endif #ifndef _LIBC # define __md5_buffer md5_buffer # define __md5_finish_ctx md5_finish_ctx # define __md5_init_ctx md5_init_ctx # define __md5_process_block md5_process_block # define __md5_process_bytes md5_process_bytes # define __md5_read_ctx md5_read_ctx # define __md5_stream md5_stream #endif # ifdef __cplusplus extern "C" { # endif /* * The following three functions are build up the low level used in * the functions `md5_stream' and `md5_buffer'. */ /* Initialize structure containing state of computation. (RFC 1321, 3.3: Step 3) */ extern void __md5_init_ctx (struct md5_ctx *ctx) __THROW; /* Starting with the result of former calls of this function (or the initialization function update the context for the next LEN bytes starting at BUFFER. It is necessary that LEN is a multiple of 64!!! */ extern void __md5_process_block (const void *buffer, size_t len, struct md5_ctx *ctx) __THROW; /* Starting with the result of former calls of this function (or the initialization function update the context for the next LEN bytes starting at BUFFER. It is NOT required that LEN is a multiple of 64. */ extern void __md5_process_bytes (const void *buffer, size_t len, struct md5_ctx *ctx) __THROW; /* Process the remaining bytes in the buffer and put result from CTX in first 16 bytes following RESBUF. The result is always in little endian byte order, so that a byte-wise output yields to the wanted ASCII representation of the message digest. */ extern void *__md5_finish_ctx (struct md5_ctx *ctx, void *resbuf) __THROW; /* Put result from CTX in first 16 bytes following RESBUF. The result is always in little endian byte order, so that a byte-wise output yields to the wanted ASCII representation of the message digest. */ extern void *__md5_read_ctx (const struct md5_ctx *ctx, void *resbuf) __THROW; /* Compute MD5 message digest for bytes read from STREAM. The resulting message digest number will be written into the 16 bytes beginning at RESBLOCK. */ extern int __md5_stream (FILE *stream, void *resblock) __THROW; /* Compute MD5 message digest for LEN bytes beginning at BUFFER. The result is always in little endian byte order, so that a byte-wise output yields to the wanted ASCII representation of the message digest. */ extern void *__md5_buffer (const char *buffer, size_t len, void *resblock) __THROW; # ifdef __cplusplus } # endif #endif /* md5.h */ lrzip-0.651/regressiontest.good000066400000000000000000000013361421175057200166260ustar00rootroot00000000000000Test basic use Test decompression in read-only dir 1000 1000 3893 this should be silent man page for lrz should exist 0 compress stdin to stdout Respect $TMPDIR 1000 1000 3893 Decompress in read only dir 1000 1000 3893 Test -cd 1000 1000 3893 Test -cfd should not remove testfile.lrz 1000 1000 3893 testfile.lrz Test -1c 1002 1002 3975 Test -r t10.lrz t1.lrz t2.lrz t3.lrz t4.lrz t5.lrz t6.lrz t7.lrz t8.lrz t9.lrz Test tar compatibility t/ t/t8 t/t7 t/t3 t/t5 t/t2 t/t6 t/t10 t/t4 t/t9 t/t1 11 test compress of 1 GB data with parallel --pipe --compress 1073741824 test compress of 1 GB with sort --compress-program 1073741825 test should not lrz -dc removes file OK testfile.lrz lrzip-0.651/regressiontest.sh000066400000000000000000000053401421175057200163070ustar00rootroot00000000000000#!/bin/bash # Regression test. # # Copyright (C) 2016 # Ole Tange and Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see # or write to the Free Software Foundation, Inc., 51 Franklin St, # Fifth Floor, Boston, MA 02110-1301 USA bash > regressiontest.out 2>&1 <<'_EOS' rm -f testfile.lrz seq 1000 > testfile echo 'Test basic use' lrz testfile echo 'Test decompression in read-only dir' mkdir -p ro cp testfile.lrz ro chmod 500 ro cd ro lrz -dc testfile.lrz | wc cd .. echo 'this should be silent' lrz -d testfile.lrz echo 'man page for lrz should exist' man lrz >/dev/null echo $? echo 'compress stdin to stdout' cat testfile | lrz | cat > testfile.lrz echo 'Respect $TMPDIR' mkdir -p t chmod 111 t cd t TMPDIR=.. lrz -d < ../testfile.lrz | wc cd .. rm -rf t echo 'Decompress in read only dir' mkdir -p t chmod 111 t cd t lrz -d < ../testfile.lrz | wc cd .. rm -rf t echo 'Test -cd' mkdir -p t chmod 111 t cd t lrz -cd ../testfile.lrz | wc cd .. rm -rf t echo 'Test -cfd should not remove testfile.lrz' mkdir -p t chmod 111 t cd t lrz -cfd ../testfile.lrz | wc cd .. rm -rf t ls testfile.lrz echo 'Test -1c' lrz -1c testfile | wc echo 'Test -r' mkdir t touch t/t{1..10} lrz -r t ls t rm -r t echo 'Test tar compatibility' mkdir t touch t/t{1..10} tar --use-compress-program lrz -cvf testfile.tar.lrz t tar --use-compress-program lrz -tvf testfile.tar.lrz | wc -l rm -r t echo 'test compress of 1 GB data with parallel --pipe --compress' yes "`echo {1..100}`" | head -c 1G | parallel --pipe --block 100m --compress-program lrz cat | wc -c echo 'test compress of 1 GB with sort --compress-program' yes "`echo {1..100}`" | head -c 1G | sort --compress-program lrz | wc -c echo 'test should not lrz -dc removes file' rm testfile.lrz echo OK > testfile lrz testfile lrz -dc testfile.lrz ls testfile.lrz _EOS diff regressiontest.good regressiontest.out lrzip-0.651/runzip.c000066400000000000000000000335311421175057200143710ustar00rootroot00000000000000/* Copyright (C) 2006-2016,2018,2021-2022 Con Kolivas Copyright (C) 1998-2003 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* rzip decompression algorithm */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_SYS_TIME_H # include #endif #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_ENDIAN_H # include #elif HAVE_SYS_ENDIAN_H # include #endif #ifdef HAVE_ARPA_INET_H # include #endif #include "md5.h" #include "runzip.h" #include "stream.h" #include "util.h" #include "lrzip_core.h" /* needed for CRC routines */ #include "lzma/C/7zCrc.h" static inline uchar read_u8(rzip_control *control, void *ss, int stream, bool *err) { uchar b; if (unlikely(read_stream(control, ss, stream, &b, 1) != 1)) { *err = true; fatal_return(("Stream read u8 failed\n"), 0); } return b; } static inline u32 read_u32(rzip_control *control, void *ss, int stream, bool *err) { u32 ret; if (unlikely(read_stream(control, ss, stream, (uchar *)&ret, 4) != 4)) { *err = true; fatal_return(("Stream read u32 failed\n"), 0); } ret = le32toh(ret); return ret; } /* Read a variable length of chars dependant on how big the chunk was */ static inline i64 read_vchars(rzip_control *control, void *ss, int stream, int length) { i64 s = 0; if (unlikely(read_stream(control, ss, stream, (uchar *)&s, length) != length)) fatal_return(("Stream read of %d bytes failed\n", length), -1); s = le64toh(s); return s; } static i64 seekcur_fdout(rzip_control *control) { if (!TMP_OUTBUF) return lseek(control->fd_out, 0, SEEK_CUR); return (control->out_relofs + control->out_ofs); } static i64 seekto_fdhist(rzip_control *control, i64 pos) { if (!TMP_OUTBUF) return lseek(control->fd_hist, pos, SEEK_SET); control->hist_ofs = pos - control->out_relofs; if (control->hist_ofs > control->out_len) control->out_len = control->hist_ofs; if (unlikely(control->hist_ofs < 0 || control->hist_ofs > control->out_maxlen)) { print_err("Trying to seek outside tmpoutbuf to %lld in seekto_fdhist\n", control->hist_ofs); return -1; } return pos; } static i64 seekcur_fdin(rzip_control *control) { if (!TMP_INBUF) return lseek(control->fd_in, 0, SEEK_CUR); return control->in_ofs; } static i64 seekto_fdin(rzip_control *control, i64 pos) { if (!TMP_INBUF) return lseek(control->fd_in, pos, SEEK_SET); if (unlikely(pos > control->in_len || pos < 0)) { print_err("Trying to seek outside tmpinbuf to %lld in seekto_fdin\n", pos); return -1; } control->in_ofs = pos; return 0; } static i64 seekto_fdinend(rzip_control *control) { int tmpchar; if (!TMP_INBUF) return lseek(control->fd_in, 0, SEEK_END); while ((tmpchar = getchar()) != EOF) { control->tmp_inbuf[control->in_len++] = (char)tmpchar; if (unlikely(control->in_len > control->in_maxlen)) failure_return(("Trying to read greater than max_len\n"), -1); } control->in_ofs = control->in_len; return control->in_ofs; } static i64 read_header(rzip_control *control, void *ss, uchar *head) { bool err = false; *head = read_u8(control, ss, 0, &err); if (err) return -1; return read_vchars(control, ss, 0, control->chunk_bytes); } static i64 unzip_literal(rzip_control *control, void *ss, i64 len, uint32 *cksum) { i64 stream_read; uchar *buf; if (unlikely(len < 0)) failure_return(("len %lld is negative in unzip_literal!\n",len), -1); buf = (uchar *)malloc(len); if (unlikely(!buf)) fatal_return(("Failed to malloc literal buffer of size %lld\n", len), -1); stream_read = read_stream(control, ss, 1, buf, len); if (unlikely(stream_read == -1 )) { dealloc(buf); fatal_return(("Failed to read_stream in unzip_literal\n"), -1); } if (unlikely(write_1g(control, buf, (size_t)stream_read) != (ssize_t)stream_read)) { dealloc(buf); fatal_return(("Failed to write literal buffer of size %lld\n", stream_read), -1); } if (!HAS_MD5) *cksum = CrcUpdate(*cksum, buf, stream_read); if (!NO_MD5) md5_process_bytes(buf, stream_read, &control->ctx); dealloc(buf); return stream_read; } static i64 read_fdhist(rzip_control *control, void *buf, i64 len) { if (!TMP_OUTBUF) return read_1g(control, control->fd_hist, buf, len); if (unlikely(len + control->hist_ofs > control->out_maxlen)) { print_err("Trying to read beyond end of tmpoutbuf in read_fdhist\n"); return -1; } memcpy(buf, control->tmp_outbuf + control->hist_ofs, len); return len; } static i64 unzip_match(rzip_control *control, void *ss, i64 len, uint32 *cksum, int chunk_bytes) { i64 offset, n, total, cur_pos; uchar *buf; if (unlikely(len < 0)) failure_return(("len %lld is negative in unzip_match!\n",len), -1); total = 0; cur_pos = seekcur_fdout(control); if (unlikely(cur_pos == -1)) fatal_return(("Seek failed on out file in unzip_match.\n"), -1); /* Note the offset is in a different format v0.40+ */ offset = read_vchars(control, ss, 0, chunk_bytes); if (unlikely(offset == -1)) return -1; if (unlikely(seekto_fdhist(control, cur_pos - offset) == -1)) fatal_return(("Seek failed by %d from %d on history file in unzip_match\n", offset, cur_pos), -1); n = MIN(len, offset); if (unlikely(n < 1)) fatal_return(("Failed fd history in unzip_match due to corrupt archive\n"), -1); buf = (uchar *)malloc(n); if (unlikely(!buf)) fatal_return(("Failed to malloc match buffer of size %lld\n", len), -1); if (unlikely(read_fdhist(control, buf, (size_t)n) != (ssize_t)n)) { dealloc(buf); fatal_return(("Failed to read %d bytes in unzip_match\n", n), -1); } while (len) { n = MIN(len, offset); if (unlikely(n < 1)) fatal_return(("Failed fd history in unzip_match due to corrupt archive\n"), -1); if (unlikely(write_1g(control, buf, (size_t)n) != (ssize_t)n)) { dealloc(buf); fatal_return(("Failed to write %d bytes in unzip_match\n", n), -1); } if (!HAS_MD5) *cksum = CrcUpdate(*cksum, buf, n); if (!NO_MD5) md5_process_bytes(buf, n, &control->ctx); len -= n; total += n; } dealloc(buf); return total; } /* decompress a section of an open file. Call fatal_return(() on error return the number of bytes that have been retrieved */ static i64 runzip_chunk(rzip_control *control, int fd_in, i64 expected_size, i64 tally) { uint32 good_cksum, cksum = 0; i64 len, ofs, total = 0; int l = -1, p = 0; char chunk_bytes; struct stat st; uchar head; void *ss; bool err = false; /* for display of progress */ unsigned long divisor[] = {1,1024,1048576,1073741824U}; char *suffix[] = {"","KB","MB","GB"}; double prog_done, prog_tsize; int divisor_index; if (expected_size > (i64)10737418240ULL) /* > 10GB */ divisor_index = 3; else if (expected_size > 10485760) /* > 10MB */ divisor_index = 2; else if (expected_size > 10240) /* > 10KB */ divisor_index = 1; else divisor_index = 0; prog_tsize = (long double)expected_size / (long double)divisor[divisor_index]; /* Determine the chunk_byte width size. Versions < 0.4 used 4 * bytes for all offsets, version 0.4 used 8 bytes. Versions 0.5+ use * a variable number of bytes depending on chunk size.*/ if (control->major_version == 0 && control->minor_version < 4) chunk_bytes = 4; else if (control->major_version == 0 && control->minor_version == 4) chunk_bytes = 8; else { print_maxverbose("Reading chunk_bytes at %lld\n", get_readseek(control, fd_in)); /* Read in the stored chunk byte width from the file */ if (unlikely(read_1g(control, fd_in, &chunk_bytes, 1) != 1)) fatal_return(("Failed to read chunk_bytes size in runzip_chunk\n"), -1); if (unlikely(chunk_bytes < 1 || chunk_bytes > 8)) failure_return(("chunk_bytes %d is invalid in runzip_chunk\n", chunk_bytes), -1); } if (!tally && expected_size) print_maxverbose("Expected size: %lld\n", expected_size); print_maxverbose("Chunk byte width: %d\n", chunk_bytes); ofs = seekcur_fdin(control); if (unlikely(ofs == -1)) fatal_return(("Failed to seek input file in runzip_fd\n"), -1); if (fstat(fd_in, &st) || st.st_size - ofs == 0) return 0; ss = open_stream_in(control, fd_in, NUM_STREAMS, chunk_bytes); if (unlikely(!ss)) failure_return(("Failed to open_stream_in in runzip_chunk\n"), -1); /* All chunks were unnecessarily encoded 8 bytes wide version 0.4x */ if (control->major_version == 0 && control->minor_version == 4) control->chunk_bytes = 8; else control->chunk_bytes = 2; while ((len = read_header(control, ss, &head)) || head) { i64 u; if (unlikely(len == -1)) return -1; switch (head) { case 0: u = unzip_literal(control, ss, len, &cksum); if (unlikely(u == -1)) { close_stream_in(control, ss); return -1; } total += u; break; default: u = unzip_match(control, ss, len, &cksum, chunk_bytes); if (unlikely(u == -1)) { close_stream_in(control, ss); return -1; } total += u; break; } if (expected_size) { p = 100 * ((double)(tally + total) / (double)expected_size); if (p / 10 != l / 10) { prog_done = (double)(tally + total) / (double)divisor[divisor_index]; print_progress("%3d%% %9.2f / %9.2f %s\r", p, prog_done, prog_tsize, suffix[divisor_index] ); l = p; } } } if (!HAS_MD5) { good_cksum = read_u32(control, ss, 0, &err); if (unlikely(err)) { close_stream_in(control, ss); return -1; } if (unlikely(good_cksum != cksum)) { close_stream_in(control, ss); failure_return(("Bad checksum: 0x%08x - expected: 0x%08x\n", cksum, good_cksum), -1); } print_maxverbose("Checksum for block: 0x%08x\n", cksum); } if (unlikely(close_stream_in(control, ss))) fatal("Failed to close stream!\n"); return total; } /* Decompress an open file. Call fatal_return(() on error return the number of bytes that have been retrieved */ i64 runzip_fd(rzip_control *control, int fd_in, int fd_hist, i64 expected_size) { uchar md5_stored[MD5_DIGEST_SIZE]; struct timeval start,end; i64 total = 0, u; double tdiff; if (!NO_MD5) md5_init_ctx (&control->ctx); gettimeofday(&start,NULL); do { u = runzip_chunk(control, fd_in, expected_size, total); if (u < 1) { if (u < 0 || total < expected_size) { print_err("Failed to runzip_chunk in runzip_fd\n"); return -1; } } total += u; if (unlikely(!flush_tmpout(control))) { print_err("Failed to flush_tmpout in runzip_fd\n"); return -1; } if (TMP_INBUF) clear_tmpinbuf(control); else if (STDIN && !DECOMPRESS) { if (unlikely(!clear_tmpinfile(control))) { print_err("Failed to clear_tmpinfile in runzip_fd\n"); return -1; } } } while (total < expected_size || (!expected_size && !control->eof)); gettimeofday(&end,NULL); if (!ENCRYPT) { tdiff = end.tv_sec - start.tv_sec; if (!tdiff) tdiff = 1; print_output("\nAverage DeCompression Speed: %6.3fMB/s\n", (total / 1024 / 1024) / tdiff); } if (!NO_MD5) { int i,j; md5_finish_ctx (&control->ctx, control->md5_resblock); if (HAS_MD5) { i64 fdinend = seekto_fdinend(control); if (unlikely(fdinend == -1)) failure_return(("Failed to seekto_fdinend in rzip_fd\n"), -1); if (unlikely(seekto_fdin(control, fdinend - MD5_DIGEST_SIZE) == -1)) failure_return(("Failed to seekto_fdin in rzip_fd\n"), -1); if (unlikely(read_1g(control, fd_in, md5_stored, MD5_DIGEST_SIZE) != MD5_DIGEST_SIZE)) fatal_return(("Failed to read md5 data in runzip_fd\n"), -1); if (ENCRYPT) if (unlikely(!lrz_decrypt(control, md5_stored, MD5_DIGEST_SIZE, control->salt_pass))) return -1; for (i = 0; i < MD5_DIGEST_SIZE; i++) if (md5_stored[i] != control->md5_resblock[i]) { print_output("MD5 CHECK FAILED.\nStored:"); for (j = 0; j < MD5_DIGEST_SIZE; j++) print_output("%02x", md5_stored[j] & 0xFF); print_output("\nOutput file:"); for (j = 0; j < MD5_DIGEST_SIZE; j++) print_output("%02x", control->md5_resblock[j] & 0xFF); failure_return(("\n"), -1); } } if (HASH_CHECK || MAX_VERBOSE) { print_output("MD5: "); for (i = 0; i < MD5_DIGEST_SIZE; i++) print_output("%02x", control->md5_resblock[i] & 0xFF); print_output("\n"); } if (CHECK_FILE) { FILE *md5_fstream; int i, j; if (TMP_OUTBUF) close_tmpoutbuf(control); memcpy(md5_stored, control->md5_resblock, MD5_DIGEST_SIZE); if (unlikely(seekto_fdhist(control, 0) == -1)) fatal_return(("Failed to seekto_fdhist in runzip_fd\n"), -1); if (unlikely((md5_fstream = fdopen(fd_hist, "r")) == NULL)) fatal_return(("Failed to fdopen fd_hist in runzip_fd\n"), -1); if (unlikely(md5_stream(md5_fstream, control->md5_resblock))) fatal_return(("Failed to md5_stream in runzip_fd\n"), -1); /* We don't close the file here as it's closed in main */ for (i = 0; i < MD5_DIGEST_SIZE; i++) if (md5_stored[i] != control->md5_resblock[i]) { print_output("MD5 CHECK FAILED.\nStored:"); for (j = 0; j < MD5_DIGEST_SIZE; j++) print_output("%02x", md5_stored[j] & 0xFF); print_output("\nOutput file:"); for (j = 0; j < MD5_DIGEST_SIZE; j++) print_output("%02x", control->md5_resblock[j] & 0xFF); failure_return(("\n"), -1); } print_output("MD5 integrity of written file matches archive\n"); if (!HAS_MD5) print_output("Note this lrzip archive did not have a stored md5 value.\n" "The archive decompression was validated with crc32 and the md5 hash was " "calculated on decompression\n"); } } return total; } lrzip-0.651/runzip.h000066400000000000000000000016321421175057200143730ustar00rootroot00000000000000/* Copyright (C) 2006-2011,2022 Con Kolivas Copyright (C) 2011 Peter Hyman Copyright (C) 1998-2003 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef RUNZIP_H #define RUNZIP_H #include "lrzip_private.h" i64 runzip_fd(rzip_control *control, int fd_in, int fd_hist, i64 expected_size); #endif lrzip-0.651/rzip.c000066400000000000000000001062711421175057200140300ustar00rootroot00000000000000/* Copyright (C) 2006-2016,2018,2022 Con Kolivas Copyright (C) 1998 Andrew Tridgell Modified to use flat hash, memory limit and variable hash culling by Rusty Russell copyright (C) 2003. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* rzip compression algorithm */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #ifdef HAVE_SYS_MMAN_H # include #endif #include #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_SYS_TIME_H # include #endif #ifdef HAVE_UNISTD_H # include #endif #include #include #ifdef HAVE_ERRNO_H # include #endif #ifdef HAVE_ENDIAN_H # include #elif HAVE_SYS_ENDIAN_H # include #endif #ifdef HAVE_ARPA_INET_H # include #endif #include #include "md5.h" #include "stream.h" #include "util.h" #include "lrzip_core.h" /* needed for CRC routines */ #include "lzma/C/7zCrc.h" #ifndef MAP_ANONYMOUS # define MAP_ANONYMOUS MAP_ANON #endif #define CHUNK_MULTIPLE (100 * 1024 * 1024) #define CKSUM_CHUNK 1024*1024 #define GREAT_MATCH 1024 #define MINIMUM_MATCH 31 /* Hash table works as follows. We start by throwing tags at every * offset into the table. As it fills, we start eliminating tags * which don't have lower bits set to one (ie. first we eliminate all * even tags, then all tags divisible by four, etc.). This ensures * that on average, all parts of the file are covered by the hash, if * sparsely. */ /* All zero means empty. We might miss the first chunk this way. */ struct hash_entry { i64 offset; tag t; }; /* Levels control hashtable size and bzip2 level. */ static struct level { unsigned long mb_used; unsigned initial_freq; unsigned max_chain_len; } levels[10] = { { 1, 4, 1 }, { 2, 4, 2 }, { 4, 4, 2 }, { 8, 4, 2 }, { 16, 4, 3 }, { 32, 4, 4 }, { 32, 2, 6 }, { 64, 1, 16 }, /* More MB makes sense, but need bigger test files */ { 64, 1, 32 }, { 64, 1, 128 }, }; static void remap_low_sb(rzip_control *control, struct sliding_buffer *sb) { i64 new_offset; new_offset = sb->offset_search; round_to_page(&new_offset); print_maxverbose("Sliding main buffer to offset %lld\n", new_offset); if (unlikely(munmap(sb->buf_low, sb->size_low))) failure("Failed to munmap in remap_low_sb\n"); if (new_offset + sb->size_low > sb->orig_size) sb->size_low = sb->orig_size - new_offset; sb->offset_low = new_offset; sb->buf_low = (uchar *)mmap(sb->buf_low, sb->size_low, PROT_READ, MAP_SHARED, sb->fd, sb->orig_offset + sb->offset_low); if (unlikely(sb->buf_low == MAP_FAILED)) failure("Failed to re mmap in remap_low_sb\n"); } static inline void remap_high_sb(rzip_control *control, struct sliding_buffer *sb, i64 p) { if (unlikely(munmap(sb->buf_high, sb->size_high))) failure("Failed to munmap in remap_high_sb\n"); sb->size_high = sb->high_length; /* In case we shrunk it when we hit the end of the file */ sb->offset_high = p; /* Make sure offset is rounded to page size of total offset */ sb->offset_high -= (sb->offset_high + sb->orig_offset) % control->page_size; if (unlikely(sb->offset_high + sb->size_high > sb->orig_size)) sb->size_high = sb->orig_size - sb->offset_high; sb->buf_high = (uchar *)mmap(sb->buf_high, sb->size_high, PROT_READ, MAP_SHARED, sb->fd, sb->orig_offset + sb->offset_high); if (unlikely(sb->buf_high == MAP_FAILED)) failure("Failed to re mmap in remap_high_sb\n"); } /* We use a "sliding mmap" to effectively read more than we can fit into the * compression window. This is done by using a maximally sized lower mmap at * the beginning of the block which slides up once the hash search moves beyond * it, and a 64k mmap block that slides up and down as is required for any * offsets outside the range of the lower one. This is much slower than mmap * but makes it possible to have unlimited sized compression windows. * We use a pointer to the function we actually want to use and only enable * the sliding mmap version if we need sliding mmap functionality as this is * a hot function during the rzip phase */ static uchar *sliding_get_sb(rzip_control *control, i64 p) { struct sliding_buffer *sb = &control->sb; i64 sbo; sbo = sb->offset_low; if (p >= sbo && p < sbo + sb->size_low) return (sb->buf_low + p - sbo); sbo = sb->offset_high; if (p >= sbo && p < (sbo + sb->size_high)) return (sb->buf_high + (p - sbo)); /* p is not within the low or high buffer range */ remap_high_sb(control, &control->sb, p); /* Use sb->offset_high directly since it will have changed */ return (sb->buf_high + (p - sb->offset_high)); } /* The length of continous range of the sliding buffer, * starting from the offset P. */ static inline i64 sliding_get_sb_range(rzip_control *control, i64 p) { struct sliding_buffer *sb = &control->sb; i64 sbo, sbs; sbo = sb->offset_low; sbs = sb->size_low; if (p >= sbo && p < sbo + sbs) return (sbs - (p - sbo)); sbo = sb->offset_high; sbs = sb->size_high; if (likely(p >= sbo && p < (sbo + sbs))) return (sbs - (p - sbo)); fatal_return(("sliding_get_sb_range: the pointer is out of range\n"), 0); } /* Since the sliding get_sb only allows us to access one byte at a time, we * do the same as we did with get_sb with the memcpy since one memcpy is much * faster than numerous memcpys 1 byte at a time */ static void single_mcpy(rzip_control *control, unsigned char *buf, i64 offset, i64 len) { memcpy(buf, control->sb.buf_low + offset, len); } static void sliding_mcpy(rzip_control *control, unsigned char *buf, i64 offset, i64 len) { i64 n = 0; while (n < len) { uchar *srcbuf = sliding_get_sb(control, offset + n); i64 m = MIN(sliding_get_sb_range(control, offset + n), len - n); memcpy(buf + n, srcbuf, m); n += m; } } /* All put_u8/u32/vchars go to stream 0 */ static inline void put_u8(rzip_control *control, void *ss, uchar b) { write_stream(control, ss, 0, &b, 1); } static inline void put_u32(rzip_control *control, void *ss, uint32_t s) { s = htole32(s); write_stream(control, ss, 0, (uchar *)&s, 4); } /* Put a variable length of bytes dependant on how big the chunk is */ static void put_vchars(rzip_control *control, void *ss, i64 s, i64 length) { s = htole64(s); write_stream(control, ss, 0, (uchar *)&s, length); } static void put_header(rzip_control *control, void *ss, uchar head, i64 len) { put_u8(control, ss, head); put_vchars(control, ss, len, 2); } static inline void put_match(rzip_control *control, struct rzip_state *st, i64 p, i64 offset, i64 len) { do { i64 ofs; i64 n = len; if (n > 0xFFFF) n = 0xFFFF; ofs = (p - offset); put_header(control, st->ss, 1, n); put_vchars(control, st->ss, ofs, st->chunk_bytes); st->stats.matches++; st->stats.match_bytes += n; len -= n; p += n; offset += n; } while (len); } /* write some data to a stream mmap encoded. Return -1 on failure */ static inline void write_sbstream(rzip_control *control, void *ss, int stream, i64 p, i64 len) { struct stream_info *sinfo = ss; while (len) { i64 n = MIN(sinfo->bufsize - sinfo->s[stream].buflen, len); control->do_mcpy(control, sinfo->s[stream].buf + sinfo->s[stream].buflen, p, n); sinfo->s[stream].buflen += n; p += n; len -= n; if (sinfo->s[stream].buflen == sinfo->bufsize) flush_buffer(control, sinfo, stream); } } static void put_literal(rzip_control *control, struct rzip_state *st, i64 last, i64 p) { do { i64 len = p - last; if (len > 0xFFFF) len = 0xFFFF; st->stats.literals++; st->stats.literal_bytes += len; put_header(control, st->ss, 0, len); if (len) write_sbstream(control, st->ss, 1, last, len); last += len; } while (p > last); } /* Could give false positive on offset 0. Who cares. */ static inline bool empty_hash(struct hash_entry *he) { return !(he->offset | he->t); } static i64 primary_hash(struct rzip_state *st, tag t) { return t & ((1 << st->hash_bits) - 1); } static inline tag increase_mask(tag tag_mask) { /* Get more precise. */ return (tag_mask << 1) | 1; } static inline bool minimum_bitness(struct rzip_state *st, tag t) { tag better_than_min = increase_mask(st->minimum_tag_mask); if ((t & better_than_min) != better_than_min) return true; return false; } /* Is a going to be cleaned before b? ie. does a have fewer low bits * set than b? */ static inline bool lesser_bitness(tag a, tag b) { a ^= 0xffffffffffffffff; b ^= 0xffffffffffffffff; return (ffsll(a) < ffsll(b)); } /* If hash bucket is taken, we spill into next bucket(s). Secondary hashing works better in theory, but modern caches make this 20% faster. */ static void insert_hash(struct rzip_state *st, tag t, i64 offset) { i64 h, victim_h = 0, round = 0; /* If we need to kill one, this will be it. */ static i64 victim_round = 0; struct hash_entry *he; h = primary_hash(st, t); he = &st->hash_table[h]; while (!empty_hash(he)) { /* If this due for cleaning anyway, just replace it: rehashing might move it behind tag_clean_ptr. */ if (minimum_bitness(st, he->t)) { st->hash_count--; break; } /* If we are better than current occupant, we can't jump over it: it will be cleaned before us, and noone would then find us in the hash table. Rehash it, then take its place. */ if (lesser_bitness(he->t, t)) { insert_hash(st, he->t, he->offset); break; } /* If we have lots of identical patterns, we end up with lots of the same hash number. Discard random. */ if (he->t == t) { if (round == victim_round) victim_h = h; if (++round == st->level->max_chain_len) { h = victim_h; he = &st->hash_table[h]; st->hash_count--; victim_round++; if (victim_round == st->level->max_chain_len) victim_round = 0; break; } } h++; h &= ((1 << st->hash_bits) - 1); he = &st->hash_table[h]; } he->t = t; he->offset = offset; } /* Eliminate one hash entry with minimum number of lower bits set. Returns tag requirement for any new entries. */ static inline tag clean_one_from_hash(rzip_control *control, struct rzip_state *st) { struct hash_entry *he; tag better_than_min; again: better_than_min = increase_mask(st->minimum_tag_mask); if (!st->tag_clean_ptr) print_maxverbose("Starting sweep for mask %u\n", (unsigned int)st->minimum_tag_mask); for (; st->tag_clean_ptr < (1U << st->hash_bits); st->tag_clean_ptr++) { he = &st->hash_table[st->tag_clean_ptr]; if (empty_hash(he)) continue; if ((he->t & better_than_min) != better_than_min) { he->offset = 0; he->t = 0; st->hash_count--; return better_than_min; } } /* We hit the end: everthing in hash satisfies the better mask. */ st->minimum_tag_mask = better_than_min; st->tag_clean_ptr = 0; goto again; } static void single_next_tag(rzip_control *control, struct rzip_state *st, i64 p, tag *t) { uchar u; u = control->sb.buf_low[p - 1]; *t ^= st->hash_index[u]; u = control->sb.buf_low[p + MINIMUM_MATCH - 1]; *t ^= st->hash_index[u]; } static void sliding_next_tag(rzip_control *control, struct rzip_state *st, i64 p, tag *t) { uchar *u; u = sliding_get_sb(control, p - 1); *t ^= st->hash_index[*u]; u = sliding_get_sb(control, p + MINIMUM_MATCH - 1); *t ^= st->hash_index[*u]; } static tag single_full_tag(rzip_control *control, struct rzip_state *st, i64 p) { tag ret = 0; int i; uchar u; for (i = 0; i < MINIMUM_MATCH; i++) { u = control->sb.buf_low[p + i]; ret ^= st->hash_index[u]; } return ret; } static tag sliding_full_tag(rzip_control *control, struct rzip_state *st, i64 p) { tag ret = 0; int i; uchar *u; for (i = 0; i < MINIMUM_MATCH; i++) { u = sliding_get_sb(control, p + i); ret ^= st->hash_index[*u]; } return ret; } static i64 single_match_len(rzip_control *control, struct rzip_state *st, i64 p0, i64 op, i64 end, i64 *rev) { i64 p, len; if (op >= p0) return 0; p = p0; while (p < end && control->sb.buf_low[p] == control->sb.buf_low[op]) { p++; op++; } len = p - p0; p = p0; op -= len; end = MAX(0, st->last_match); while (p > end && op > 0 && control->sb.buf_low[op - 1] == control->sb.buf_low[p - 1]) { op--; p--; } len += *rev = p0 - p; if (len < MINIMUM_MATCH) return 0; return len; } static i64 sliding_match_len(rzip_control *control, struct rzip_state *st, i64 p0, i64 op, i64 end, i64 *rev) { i64 p, len; if (op >= p0) return 0; p = p0; while (p < end && *sliding_get_sb(control, p) == *sliding_get_sb(control, op)) { p++; op++; } len = p - p0; p = p0; op -= len; end = MAX(0, st->last_match); while (p > end && op > 0 && *sliding_get_sb(control, op - 1) == *sliding_get_sb(control, p - 1)) { op--; p--; } len += *rev = p0 - p; if (len < MINIMUM_MATCH) return 0; return len; } static inline i64 find_best_match(rzip_control *control, struct rzip_state *st, tag t, i64 p, i64 end, i64 *offset, i64 *reverse) { struct hash_entry *he; i64 length = 0; i64 rev; i64 h; rev = 0; *reverse = 0; /* Could optimise: if lesser goodness, can stop search. But * chains are usually short anyway. */ h = primary_hash(st, t); he = &st->hash_table[h]; while (!empty_hash(he)) { i64 mlen; if (t == he->t) { mlen = control->match_len(control, st, p, he->offset, end, &rev); if (mlen) { if (mlen > length) { length = mlen; (*offset) = he->offset - rev; (*reverse) = rev; } st->stats.tag_hits++; } else st->stats.tag_misses++; } h++; h &= ((1 << st->hash_bits) - 1); he = &st->hash_table[h]; } return length; } static void show_distrib(rzip_control *control, struct rzip_state *st) { struct hash_entry *he; i64 primary = 0; i64 total = 0; i64 i; for (i = 0; i < (1U << st->hash_bits); i++) { he = &st->hash_table[i]; if (empty_hash(he)) continue; total++; if (primary_hash(st, he->t) == i) primary++; } if (total != st->hash_count) print_err("WARNING: hash_count says total %lld\n", st->hash_count); if (!total) print_output("0 total hashes\n"); else { print_output("%lld total hashes -- %lld in primary bucket (%-2.3f%%)\n", total, primary, primary * 100.0 / total); } } /* Perform all checksumming in a separate thread to speed up the hash search. */ static void *cksumthread(void *data) { rzip_control *control = (rzip_control *)data; pthread_detach(pthread_self()); *control->checksum.cksum = CrcUpdate(*control->checksum.cksum, control->checksum.buf, control->checksum.len); if (!NO_MD5) md5_process_bytes(control->checksum.buf, control->checksum.len, &control->ctx); dealloc(control->checksum.buf); cksem_post(control, &control->cksumsem); return NULL; } static inline void cksum_update(rzip_control *control) { pthread_t thread; create_pthread(control, &thread, NULL, cksumthread, control); } static inline void hash_search(rzip_control *control, struct rzip_state *st, double pct_base, double pct_multiple) { i64 cksum_limit = 0, p, end, cksum_chunks, cksum_remains, i; tag t = 0, tag_mask = (1 << st->level->initial_freq) - 1; struct sliding_buffer *sb = &control->sb; int lastpct = 0, last_chunkpct = 0; struct { i64 p; i64 ofs; i64 len; } current; if (st->hash_table) memset(st->hash_table, 0, sizeof(st->hash_table[0]) * (1<hash_bits)); else { i64 hashsize = st->level->mb_used * (1024 * 1024 / sizeof(st->hash_table[0])); for (st->hash_bits = 0; (1U << st->hash_bits) < hashsize; st->hash_bits++); print_maxverbose("hashsize = %lld. bits = %lld. %luMB\n", hashsize, st->hash_bits, st->level->mb_used); /* 66% full at max. */ st->hash_limit = (1 << st->hash_bits) / 3 * 2; st->hash_table = calloc(sizeof(st->hash_table[0]), (1 << st->hash_bits)); if (unlikely(!st->hash_table)) failure("Failed to allocate hash table in hash_search\n"); } st->minimum_tag_mask = tag_mask; st->tag_clean_ptr = 0; st->cksum = 0; st->hash_count = 0; p = 0; end = st->chunk_size - MINIMUM_MATCH; st->last_match = p; current.len = 0; current.p = p; current.ofs = 0; if (likely(end > 0)) t = control->full_tag(control, st, p); while (p < end) { i64 reverse, mlen, offset; sb->offset_search = ++p; if (unlikely(sb->offset_search > sb->offset_low + sb->size_low)) remap_low_sb(control, &control->sb); if (unlikely(p % 128 == 0 && st->chunk_size)) { i64 chunk_pct; int pct; pct = pct_base + (pct_multiple * (100.0 * p) / st->chunk_size ); chunk_pct = p * 100 / end; if (pct != lastpct || chunk_pct != last_chunkpct) { if (!STDIN || st->stdin_eof) print_progress("Total: %2d%% ", pct); print_progress("Chunk: %2d%%\r", chunk_pct); if (control->info_cb) control->info_cb(control->info_data, (!STDIN || st->stdin_eof) ? pct : -1, chunk_pct); lastpct = pct; last_chunkpct = chunk_pct; } } control->next_tag(control, st, p, &t); /* Don't look for a match if there are no tags with this number of bits in the hash table. */ if ((t & st->minimum_tag_mask) != st->minimum_tag_mask) continue; offset = 0; mlen = find_best_match(control, st, t, p, end, &offset, &reverse); /* Only insert occasionally into hash. */ if ((t & tag_mask) == tag_mask) { st->stats.inserts++; st->hash_count++; insert_hash(st, t, p); if (st->hash_count > st->hash_limit) tag_mask = clean_one_from_hash(control, st); } if (mlen > current.len) { current.p = p - reverse; current.len = mlen; current.ofs = offset; } if ((current.len >= GREAT_MATCH || p >= current.p + MINIMUM_MATCH) && current.len >= MINIMUM_MATCH) { if (st->last_match < current.p) put_literal(control, st, st->last_match, current.p); put_match(control, st, current.p, current.ofs, current.len); st->last_match = current.p + current.len; current.p = p = st->last_match; current.len = 0; t = control->full_tag(control, st, p); } if (p > cksum_limit) { /* We lock the mutex here and unlock it in the * cksumthread. This lock protects all the data in * control->checksum. */ cksem_wait(control, &control->cksumsem); control->checksum.len = MIN(st->chunk_size - p, control->page_size); control->checksum.buf = malloc(control->checksum.len); if (unlikely(!control->checksum.buf)) failure("Failed to malloc ckbuf in hash_search\n"); control->do_mcpy(control, control->checksum.buf, cksum_limit, control->checksum.len); control->checksum.cksum = &st->cksum; cksum_limit += control->checksum.len; cksum_update(control); } } if (MAX_VERBOSE) show_distrib(control, st); if (st->last_match < st->chunk_size) put_literal(control, st, st->last_match, st->chunk_size); if (st->chunk_size > cksum_limit) { i64 cksum_len = control->maxram; void *buf; while (42) { round_to_page(&cksum_len); buf = malloc(cksum_len); if (buf) { print_maxverbose("Malloced %"PRId64" for checksum ckbuf\n", cksum_len); break; } cksum_len = cksum_len / 3 * 2; if (cksum_len < control->page_size) failure("Failed to malloc any ram for checksum ckbuf\n"); } /* Compute checksum. If the entire chunk is longer than maxram, * do it "per-partes" */ cksem_wait(control, &control->cksumsem); control->checksum.buf = buf; control->checksum.len = st->chunk_size - cksum_limit; cksum_chunks = control->checksum.len / cksum_len; cksum_remains = control->checksum.len % cksum_len; for (i = 0; i < cksum_chunks; i++) { control->do_mcpy(control, control->checksum.buf, cksum_limit, cksum_len); cksum_limit += cksum_len; st->cksum = CrcUpdate(st->cksum, control->checksum.buf, cksum_len); if (!NO_MD5) md5_process_bytes(control->checksum.buf, cksum_len, &control->ctx); } /* Process end of the checksum buffer */ control->do_mcpy(control, control->checksum.buf, cksum_limit, cksum_remains); st->cksum = CrcUpdate(st->cksum, control->checksum.buf, cksum_remains); if (!NO_MD5) md5_process_bytes(control->checksum.buf, cksum_remains, &control->ctx); dealloc(control->checksum.buf); cksem_post(control, &control->cksumsem); } else { cksem_wait(control, &control->cksumsem); cksem_post(control, &control->cksumsem); } put_literal(control, st, 0, 0); put_u32(control, st->ss, st->cksum); } static inline void init_hash_indexes(struct rzip_state *st) { int i; for (i = 0; i < 256; i++) st->hash_index[i] = ((random() << 16) ^ random()); } #if !defined(__linux) # define mremap fake_mremap static inline void *fake_mremap(void *old_address, size_t old_size, size_t new_size, int flags __UNUSED__) { if (new_size > old_size) { fprintf(stderr, "fake_mremap: This should only be used to shrink things. I'm not bothering with this.\n"); exit(1); } else { /* new_size occupies N pages; old_size occupies M > N pages; we want to unmap the M - N pages at the end. note the idiom: ceiling(n/k) = (n+k-1) div k */ size_t kept_n = (new_size + PAGE_SIZE - 1) / PAGE_SIZE; int ret = munmap(old_address + (kept_n * PAGE_SIZE), old_size - (kept_n * PAGE_SIZE)); if (ret < 0) return MAP_FAILED; return old_address; } } #endif /* stdin is not file backed so we have to emulate the mmap by mapping * anonymous ram and reading stdin into it. It means the maximum ram * we can use will be less but we will already have determined this in * rzip_chunk */ static inline void mmap_stdin(rzip_control *control, uchar *buf, struct rzip_state *st) { i64 len = st->chunk_size; uchar *offset_buf = buf; ssize_t ret; i64 total; total = 0; while (len > 0) { ret = MIN(len, one_g); ret = read(fileno(control->inFILE), offset_buf, (size_t)ret); if (unlikely(ret < 0)) failure("Failed to read in mmap_stdin\n"); total += ret; if (ret == 0) { /* Should be EOF */ print_maxverbose("Shrinking chunk to %lld\n", total); if (likely(total)) { buf = (uchar *)mremap(buf, st->chunk_size, total, 0); st->mmap_size = st->chunk_size = total; } else { /* Empty file */ buf = (uchar *)mremap(buf, st->chunk_size, control->page_size, 0); st->mmap_size = control->page_size; st->chunk_size = 0; } if (unlikely(buf == MAP_FAILED)) failure("Failed to remap to smaller buf in mmap_stdin\n"); control->eof = st->stdin_eof = 1; break; } offset_buf += ret; len -= ret; } control->st_size += total; } static inline void init_sliding_mmap(rzip_control *control, struct rzip_state *st, int fd_in, i64 offset) { struct sliding_buffer *sb = &control->sb; /* Initialise the high buffer. One page size is fastest to manipulate */ if (!STDIN) { sb->high_length = control->page_size; sb->buf_high = (uchar *)mmap(NULL, sb->high_length, PROT_READ, MAP_SHARED, fd_in, offset); if (unlikely(sb->buf_high == MAP_FAILED)) failure("Unable to mmap buf_high in init_sliding_mmap\n"); sb->size_high = sb->high_length; sb->offset_high = 0; } sb->offset_low = 0; sb->offset_search = 0; sb->size_low = st->mmap_size; sb->orig_size = st->chunk_size; sb->fd = fd_in; } static void add_to_sslist(rzip_control *control, struct rzip_state *st) { struct node *node = calloc(sizeof(struct node), 1); if (unlikely(!node)) failure("Failed to calloc struct node in add_to_sslist\n"); node->data = st->ss; node->prev = st->sslist; st->head = node; } /* compress a chunk of an open file. Assumes that the file is able to be mmap'd and is seekable */ static inline void rzip_chunk(rzip_control *control, struct rzip_state *st, int fd_in, int fd_out, i64 offset, double pct_base, double pct_multiple) { struct sliding_buffer *sb = &control->sb; init_sliding_mmap(control, st, fd_in, offset); st->ss = open_stream_out(control, fd_out, NUM_STREAMS, st->chunk_size, st->chunk_bytes); if (unlikely(!st->ss)) failure("Failed to open streams in rzip_chunk\n"); print_verbose("Beginning rzip pre-processing phase\n"); hash_search(control, st, pct_base, pct_multiple); /* unmap buffer before closing and reallocating streams */ if (unlikely(munmap(sb->buf_low, sb->size_low))) { close_stream_out(control, st->ss); failure("Failed to munmap in rzip_chunk\n"); } if (!STDIN) { if (unlikely(munmap(sb->buf_high, sb->size_high))) { close_stream_out(control, st->ss); failure("Failed to munmap in rzip_chunk\n"); } } if (unlikely(close_stream_out(control, st->ss))) failure("Failed to flush/close streams in rzip_chunk\n"); /* Save the sinfo data to a list to be safely released after all * threads have been shut down. */ add_to_sslist(control, st); } static void clear_sslist(struct rzip_state *st) { while (st->head) { struct node *node = st->head; struct stream_info *sinfo = node->data; dealloc(sinfo->s); dealloc(sinfo); st->head = node->prev; dealloc(node); } } /* compress a whole file chunks at a time */ void rzip_fd(rzip_control *control, int fd_in, int fd_out) { struct sliding_buffer *sb = &control->sb; /* add timers for ETA estimates * Base it off the file size and number of iterations required * depending on compression window size * Track elapsed time and estimated time to go * If file size < compression window, can't do */ struct timeval current, start, last; i64 len = 0, last_chunk = 0; int pass = 0, passes, j; double chunkmbs, tdiff; struct rzip_state *st; struct statvfs fbuf; struct stat s, s2; i64 free_space; init_mutex(control, &control->control_lock); if (!NO_MD5) md5_init_ctx(&control->ctx); cksem_init(control, &control->cksumsem); cksem_post(control, &control->cksumsem); st = calloc(sizeof(*st), 1); if (unlikely(!st)) failure("Failed to allocate control state in rzip_fd\n"); if (LZO_COMPRESS) { if (unlikely(lzo_init() != LZO_E_OK)) { dealloc(st); failure("lzo_init() failed\n"); } } if (unlikely(fstat(fd_in, &s))) { dealloc(st); failure("Failed to stat fd_in in rzip_fd\n"); } if (!STDIN) { len = control->st_size = s.st_size; print_verbose("File size: %lld\n", len); } else control->st_size = 0; if (!STDOUT) { /* Check if there's enough free space on the device chosen to fit the * compressed file, based on the compressed file being as large as the * uncompressed file. */ if (unlikely(fstatvfs(fd_out, &fbuf))) { dealloc(st); failure("Failed to fstatvfs in compress_file\n"); } free_space = (i64)fbuf.f_bsize * (i64)fbuf.f_bavail; if (free_space < control->st_size) { if (FORCE_REPLACE) print_output("Warning, possibly inadequate free space detected, but attempting to compress due to -f option being used.\n"); else { dealloc(st); failure("Possibly inadequate free space to compress file, use -f to override.\n"); } } } /* Optimal use of ram involves using no more than 2/3 of it, so we * allocate 1/3 of it to the main buffer and use a sliding mmap * buffer to work on 2/3 ram size, leaving enough ram for the * compression backends */ control->max_mmap = control->maxram; round_to_page(&control->max_mmap); /* Set maximum chunk size to 2/3 of ram if not unlimited or specified * by a control window. When it's smaller than the file size, round it * to page size for efficiency. */ if (UNLIMITED) control->max_chunk = control->st_size; else if (control->window) control->max_chunk = control->window * CHUNK_MULTIPLE; else control->max_chunk = control->ramsize / 3 * 2; control->max_mmap = MIN(control->max_mmap, control->max_chunk); if (control->max_chunk < control->st_size) round_to_page(&control->max_chunk); if (!STDIN) st->chunk_size = MIN(control->max_chunk, len); else st->chunk_size = control->max_mmap; if (st->chunk_size < len) round_to_page(&st->chunk_size); st->level = &levels[control->compression_level]; st->fd_in = fd_in; st->fd_out = fd_out; st->stdin_eof = 0; init_hash_indexes(st); passes = 0; /* set timers and chunk counter */ last.tv_sec = last.tv_usec = 0; gettimeofday(&start, NULL); prepare_streamout_threads(control); control->do_mcpy = single_mcpy; control->next_tag = &single_next_tag; control->full_tag = &single_full_tag; control->match_len = &single_match_len; while (!pass || len > 0 || (STDIN && !st->stdin_eof)) { double pct_base, pct_multiple; i64 offset = s.st_size - len; int bits = 8; st->chunk_size = control->max_chunk; st->mmap_size = control->max_mmap; if (!STDIN) { st->chunk_size = MIN(st->chunk_size, len); if (likely(st->chunk_size)) st->mmap_size = MIN(st->mmap_size, len); else st->mmap_size = control->page_size; } retry: if (STDIN) { /* NOTE the buf is saved here for STDIN mode */ sb->buf_low = mmap(NULL, st->mmap_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); /* Better to shrink the window to the largest size that works than fail */ if (sb->buf_low == MAP_FAILED) { if (unlikely(errno != ENOMEM)) { close_streamout_threads(control); dealloc(st->hash_table); dealloc(st); failure("Failed to mmap %s\n", control->infile); } st->mmap_size = st->mmap_size / 10 * 9; round_to_page(&st->mmap_size); if (unlikely(!st->mmap_size)) { close_streamout_threads(control); dealloc(st->hash_table); dealloc(st); failure("Unable to mmap any ram\n"); } goto retry; } st->chunk_size = st->mmap_size; mmap_stdin(control, sb->buf_low, st); } else { /* NOTE The buf is saved here for !STDIN mode */ sb->buf_low = (uchar *)mmap(sb->buf_low, st->mmap_size, PROT_READ, MAP_SHARED, fd_in, offset); if (sb->buf_low == MAP_FAILED) { if (unlikely(errno != ENOMEM)) { close_streamout_threads(control); dealloc(st->hash_table); dealloc(st); failure("Failed to mmap %s\n", control->infile); } st->mmap_size = st->mmap_size / 10 * 9; round_to_page(&st->mmap_size); if (unlikely(!st->mmap_size)) { close_streamout_threads(control); dealloc(st->hash_table); dealloc(st); failure("Unable to mmap any ram\n"); } goto retry; } if (st->mmap_size < st->chunk_size) { print_maxverbose("Enabling sliding mmap mode and using mmap of %lld bytes with window of %lld bytes\n", st->mmap_size, st->chunk_size); control->do_mcpy = &sliding_mcpy; control->next_tag = &sliding_next_tag; control->full_tag = &sliding_full_tag; control->match_len = &sliding_match_len; } } print_maxverbose("Succeeded in testing %lld sized mmap for rzip pre-processing\n", st->mmap_size); if (st->chunk_size > control->ramsize) print_verbose("Compression window is larger than ram, will proceed with unlimited mode possibly much slower\n"); if (!passes && !STDIN && st->chunk_size) { passes = s.st_size / st->chunk_size + !!(s.st_size % st->chunk_size); if (passes == 1) print_verbose("Will take 1 pass\n"); else print_verbose("Will take %d passes\n", passes); } sb->orig_offset = offset; print_maxverbose("Chunk size: %lld\n", st->chunk_size); /* Determine the chunk byte width to write to the file * This allows archives of different chunk sizes to have * optimal byte width entries. When working with stdin we * won't know in advance how big it is so it will always be * rounded up to the window size. */ while (st->chunk_size >> bits > 0) bits++; st->chunk_bytes = bits / 8; if (bits % 8) st->chunk_bytes++; print_maxverbose("Byte width: %d\n", st->chunk_bytes); if (STDIN) pct_base = (100.0 * -len) / control->st_size; else pct_base = (100.0 * (control->st_size - len)) / control->st_size; pct_multiple = ((double)st->chunk_size) / control->st_size; pass++; if (st->stdin_eof) passes = pass; gettimeofday(¤t, NULL); /* this will count only when size > window */ if (last.tv_sec > 0 && pct_base > 100) { unsigned int eta_hours, eta_minutes, eta_seconds, elapsed_time, finish_time, elapsed_hours, elapsed_minutes, elapsed_seconds, diff_seconds; elapsed_time = current.tv_sec - start.tv_sec; finish_time = elapsed_time / (pct_base / 100.0); elapsed_hours = elapsed_time / 3600; elapsed_minutes = (elapsed_time / 60) % 60; elapsed_seconds = elapsed_time % 60; diff_seconds = finish_time - elapsed_time; eta_hours = diff_seconds / 3600; eta_minutes = (diff_seconds / 60) % 60; eta_seconds = diff_seconds % 60; chunkmbs = (last_chunk / 1024 / 1024) / (double)(current.tv_sec-last.tv_sec); if (!STDIN || st->stdin_eof) print_verbose("\nPass %d / %d -- Elapsed Time: %02d:%02d:%02d. ETA: %02d:%02d:%02d. Compress Speed: %3.3fMB/s.\n", pass, passes, elapsed_hours, elapsed_minutes, elapsed_seconds, eta_hours, eta_minutes, eta_seconds, chunkmbs); else print_verbose("\nPass %d -- Elapsed Time: %02d:%02d:%02d. Compress Speed: %3.3fMB/s.\n", pass, elapsed_hours, elapsed_minutes, elapsed_seconds, chunkmbs); } last.tv_sec = current.tv_sec; last.tv_usec = current.tv_usec; if (st->chunk_size == len) control->eof = 1; rzip_chunk(control, st, fd_in, fd_out, offset, pct_base, pct_multiple); /* st->chunk_size may be shrunk in rzip_chunk */ last_chunk = st->chunk_size; len -= st->chunk_size; if (unlikely(len > 0 && control->eof)) { close_streamout_threads(control); dealloc(st->hash_table); dealloc(st); failure("Wrote EOF to file yet chunk_size was shrunk, corrupting archive.\n"); } } if (likely(st->hash_table)) dealloc(st->hash_table); if (unlikely(!close_streamout_threads(control))) { dealloc(st); failure("Failed to close_streamout_threads in rzip_fd\n"); } if (!NO_MD5) { /* Temporary workaround till someone fixes apple md5 */ md5_finish_ctx(&control->ctx, control->md5_resblock); if (HASH_CHECK || MAX_VERBOSE) { print_output("MD5: "); for (j = 0; j < MD5_DIGEST_SIZE; j++) print_output("%02x", control->md5_resblock[j] & 0xFF); print_output("\n"); } /* When encrypting data, we encrypt the MD5 value as well */ if (ENCRYPT) if (unlikely(!lrz_encrypt(control, control->md5_resblock, MD5_DIGEST_SIZE, control->salt_pass))) { dealloc(st); failure("Failed to lrz_encrypt in rzip_fd\n"); } if (unlikely(write_1g(control, control->md5_resblock, MD5_DIGEST_SIZE) != MD5_DIGEST_SIZE)) { dealloc(st); failure("Failed to write md5 in rzip_fd\n"); } } if (unlikely(!flush_tmpout(control))) { dealloc(st); failure("Failed to flush_tmpout in rzip_fd\n"); } gettimeofday(¤t, NULL); if (STDIN) s.st_size = control->st_size; tdiff = current.tv_sec - start.tv_sec; if (!tdiff) tdiff = 1; chunkmbs = (s.st_size / 1024 / 1024) / tdiff; fstat(fd_out, &s2); print_maxverbose("matches=%u match_bytes=%u\n", (unsigned int)st->stats.matches, (unsigned int)st->stats.match_bytes); print_maxverbose("literals=%u literal_bytes=%u\n", (unsigned int)st->stats.literals, (unsigned int)st->stats.literal_bytes); print_maxverbose("true_tag_positives=%u false_tag_positives=%u\n", (unsigned int)st->stats.tag_hits, (unsigned int)st->stats.tag_misses); print_maxverbose("inserts=%u match %.3f\n", (unsigned int)st->stats.inserts, (1.0 + st->stats.match_bytes) / st->stats.literal_bytes); if (!STDIN) print_output("%s - ", control->infile); print_output("Compression Ratio: %.3f. Average Compression Speed: %6.3fMB/s.\n", 1.0 * s.st_size / s2.st_size, chunkmbs); clear_sslist(st); dealloc(st); } lrzip-0.651/rzip.h000066400000000000000000000015731421175057200140340ustar00rootroot00000000000000/* Copyright (C) 2006-2016,2022 Con Kolivas Copyright (C) 2011 Peter Hyman Copyright (C) 1998 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef RZIP_H #define RZIP_H #include "lrzip_private.h" void rzip_fd(rzip_control *control, int fd_in, int fd_out); #endif lrzip-0.651/sha4.c000066400000000000000000000243101421175057200136740ustar00rootroot00000000000000/* * FIPS-180-2 compliant SHA-384/512 implementation * * Copyright (C) 2011, Con Kolivas * Copyright (C) 2006-2010, Brainspark B.V. * * This file is part of PolarSSL (http://www.polarssl.org) * Lead Maintainer: Paul Bakker * * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ /* * The SHA-512 Secure Hash Standard was published by NIST in 2002. * * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf */ #include "sha4.h" #include #include /* * 64-bit integer manipulation macros (big endian) */ #ifndef GET_UINT64_BE #define GET_UINT64_BE(n,b,i) \ { \ (n) = ( (unsigned int64) (b)[(i) ] << 56 ) \ | ( (unsigned int64) (b)[(i) + 1] << 48 ) \ | ( (unsigned int64) (b)[(i) + 2] << 40 ) \ | ( (unsigned int64) (b)[(i) + 3] << 32 ) \ | ( (unsigned int64) (b)[(i) + 4] << 24 ) \ | ( (unsigned int64) (b)[(i) + 5] << 16 ) \ | ( (unsigned int64) (b)[(i) + 6] << 8 ) \ | ( (unsigned int64) (b)[(i) + 7] ); \ } #endif #ifndef PUT_UINT64_BE #define PUT_UINT64_BE(n,b,i) \ { \ (b)[(i) ] = (unsigned char) ( (n) >> 56 ); \ (b)[(i) + 1] = (unsigned char) ( (n) >> 48 ); \ (b)[(i) + 2] = (unsigned char) ( (n) >> 40 ); \ (b)[(i) + 3] = (unsigned char) ( (n) >> 32 ); \ (b)[(i) + 4] = (unsigned char) ( (n) >> 24 ); \ (b)[(i) + 5] = (unsigned char) ( (n) >> 16 ); \ (b)[(i) + 6] = (unsigned char) ( (n) >> 8 ); \ (b)[(i) + 7] = (unsigned char) ( (n) ); \ } #endif /* * Round constants */ static const unsigned int64 K[80] = { UL64(0x428A2F98D728AE22), UL64(0x7137449123EF65CD), UL64(0xB5C0FBCFEC4D3B2F), UL64(0xE9B5DBA58189DBBC), UL64(0x3956C25BF348B538), UL64(0x59F111F1B605D019), UL64(0x923F82A4AF194F9B), UL64(0xAB1C5ED5DA6D8118), UL64(0xD807AA98A3030242), UL64(0x12835B0145706FBE), UL64(0x243185BE4EE4B28C), UL64(0x550C7DC3D5FFB4E2), UL64(0x72BE5D74F27B896F), UL64(0x80DEB1FE3B1696B1), UL64(0x9BDC06A725C71235), UL64(0xC19BF174CF692694), UL64(0xE49B69C19EF14AD2), UL64(0xEFBE4786384F25E3), UL64(0x0FC19DC68B8CD5B5), UL64(0x240CA1CC77AC9C65), UL64(0x2DE92C6F592B0275), UL64(0x4A7484AA6EA6E483), UL64(0x5CB0A9DCBD41FBD4), UL64(0x76F988DA831153B5), UL64(0x983E5152EE66DFAB), UL64(0xA831C66D2DB43210), UL64(0xB00327C898FB213F), UL64(0xBF597FC7BEEF0EE4), UL64(0xC6E00BF33DA88FC2), UL64(0xD5A79147930AA725), UL64(0x06CA6351E003826F), UL64(0x142929670A0E6E70), UL64(0x27B70A8546D22FFC), UL64(0x2E1B21385C26C926), UL64(0x4D2C6DFC5AC42AED), UL64(0x53380D139D95B3DF), UL64(0x650A73548BAF63DE), UL64(0x766A0ABB3C77B2A8), UL64(0x81C2C92E47EDAEE6), UL64(0x92722C851482353B), UL64(0xA2BFE8A14CF10364), UL64(0xA81A664BBC423001), UL64(0xC24B8B70D0F89791), UL64(0xC76C51A30654BE30), UL64(0xD192E819D6EF5218), UL64(0xD69906245565A910), UL64(0xF40E35855771202A), UL64(0x106AA07032BBD1B8), UL64(0x19A4C116B8D2D0C8), UL64(0x1E376C085141AB53), UL64(0x2748774CDF8EEB99), UL64(0x34B0BCB5E19B48A8), UL64(0x391C0CB3C5C95A63), UL64(0x4ED8AA4AE3418ACB), UL64(0x5B9CCA4F7763E373), UL64(0x682E6FF3D6B2B8A3), UL64(0x748F82EE5DEFB2FC), UL64(0x78A5636F43172F60), UL64(0x84C87814A1F0AB72), UL64(0x8CC702081A6439EC), UL64(0x90BEFFFA23631E28), UL64(0xA4506CEBDE82BDE9), UL64(0xBEF9A3F7B2C67915), UL64(0xC67178F2E372532B), UL64(0xCA273ECEEA26619C), UL64(0xD186B8C721C0C207), UL64(0xEADA7DD6CDE0EB1E), UL64(0xF57D4F7FEE6ED178), UL64(0x06F067AA72176FBA), UL64(0x0A637DC5A2C898A6), UL64(0x113F9804BEF90DAE), UL64(0x1B710B35131C471B), UL64(0x28DB77F523047D84), UL64(0x32CAAB7B40C72493), UL64(0x3C9EBE0A15C9BEBC), UL64(0x431D67C49C100D4C), UL64(0x4CC5D4BECB3E42B6), UL64(0x597F299CFC657E2A), UL64(0x5FCB6FAB3AD6FAEC), UL64(0x6C44198C4A475817) }; /* * SHA-512 context setup */ void sha4_starts( sha4_context *ctx, int is384 ) { ctx->total[0] = 0; ctx->total[1] = 0; if( is384 == 0 ) { /* SHA-512 */ ctx->state[0] = UL64(0x6A09E667F3BCC908); ctx->state[1] = UL64(0xBB67AE8584CAA73B); ctx->state[2] = UL64(0x3C6EF372FE94F82B); ctx->state[3] = UL64(0xA54FF53A5F1D36F1); ctx->state[4] = UL64(0x510E527FADE682D1); ctx->state[5] = UL64(0x9B05688C2B3E6C1F); ctx->state[6] = UL64(0x1F83D9ABFB41BD6B); ctx->state[7] = UL64(0x5BE0CD19137E2179); } else { /* SHA-384 */ ctx->state[0] = UL64(0xCBBB9D5DC1059ED8); ctx->state[1] = UL64(0x629A292A367CD507); ctx->state[2] = UL64(0x9159015A3070DD17); ctx->state[3] = UL64(0x152FECD8F70E5939); ctx->state[4] = UL64(0x67332667FFC00B31); ctx->state[5] = UL64(0x8EB44A8768581511); ctx->state[6] = UL64(0xDB0C2E0D64F98FA7); ctx->state[7] = UL64(0x47B5481DBEFA4FA4); } ctx->is384 = is384; } static void sha4_process( sha4_context *ctx, const unsigned char data[128] ) { int i; unsigned int64 temp1, temp2, W[80]; unsigned int64 A, B, C, D, E, F, G, H; #define SHR(x,n) (x >> n) #define ROTR(x,n) (SHR(x,n) | (x << (64 - n))) #define S0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x, 7)) #define S1(x) (ROTR(x,19) ^ ROTR(x,61) ^ SHR(x, 6)) #define S2(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39)) #define S3(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41)) #define F0(x,y,z) ((x & y) | (z & (x | y))) #define F1(x,y,z) (z ^ (x & (y ^ z))) #define P(a,b,c,d,e,f,g,h,x,K) \ { \ temp1 = h + S3(e) + F1(e,f,g) + K + x; \ temp2 = S2(a) + F0(a,b,c); \ d += temp1; h = temp1 + temp2; \ } for( i = 0; i < 16; i++ ) { GET_UINT64_BE( W[i], data, i << 3 ); } for( ; i < 80; i++ ) { W[i] = S1(W[i - 2]) + W[i - 7] + S0(W[i - 15]) + W[i - 16]; } A = ctx->state[0]; B = ctx->state[1]; C = ctx->state[2]; D = ctx->state[3]; E = ctx->state[4]; F = ctx->state[5]; G = ctx->state[6]; H = ctx->state[7]; i = 0; do { P( A, B, C, D, E, F, G, H, W[i], K[i] ); i++; P( H, A, B, C, D, E, F, G, W[i], K[i] ); i++; P( G, H, A, B, C, D, E, F, W[i], K[i] ); i++; P( F, G, H, A, B, C, D, E, W[i], K[i] ); i++; P( E, F, G, H, A, B, C, D, W[i], K[i] ); i++; P( D, E, F, G, H, A, B, C, W[i], K[i] ); i++; P( C, D, E, F, G, H, A, B, W[i], K[i] ); i++; P( B, C, D, E, F, G, H, A, W[i], K[i] ); i++; } while( i < 80 ); ctx->state[0] += A; ctx->state[1] += B; ctx->state[2] += C; ctx->state[3] += D; ctx->state[4] += E; ctx->state[5] += F; ctx->state[6] += G; ctx->state[7] += H; } /* * SHA-512 process buffer */ void sha4_update( sha4_context *ctx, const unsigned char *input, int ilen ) { int fill; unsigned int64 left; if( ilen <= 0 ) return; left = ctx->total[0] & 0x7F; fill = (int)( 128 - left ); ctx->total[0] += ilen; if( ctx->total[0] < (unsigned int64) ilen ) ctx->total[1]++; if( left && ilen >= fill ) { memcpy( (void *) (ctx->buffer + left), (void *) input, fill ); sha4_process( ctx, ctx->buffer ); input += fill; ilen -= fill; left = 0; } while( ilen >= 128 ) { sha4_process( ctx, input ); input += 128; ilen -= 128; } if( ilen > 0 ) { memcpy( (void *) (ctx->buffer + left), (void *) input, ilen ); } } static const unsigned char sha4_padding[128] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; /* * SHA-512 final digest */ void sha4_finish( sha4_context *ctx, unsigned char output[64] ) { int last, padn; unsigned int64 high, low; unsigned char msglen[16]; high = ( ctx->total[0] >> 61 ) | ( ctx->total[1] << 3 ); low = ( ctx->total[0] << 3 ); PUT_UINT64_BE( high, msglen, 0 ); PUT_UINT64_BE( low, msglen, 8 ); last = (int)( ctx->total[0] & 0x7F ); padn = ( last < 112 ) ? ( 112 - last ) : ( 240 - last ); sha4_update( ctx, (unsigned char *) sha4_padding, padn ); sha4_update( ctx, msglen, 16 ); PUT_UINT64_BE( ctx->state[0], output, 0 ); PUT_UINT64_BE( ctx->state[1], output, 8 ); PUT_UINT64_BE( ctx->state[2], output, 16 ); PUT_UINT64_BE( ctx->state[3], output, 24 ); PUT_UINT64_BE( ctx->state[4], output, 32 ); PUT_UINT64_BE( ctx->state[5], output, 40 ); if( ctx->is384 == 0 ) { PUT_UINT64_BE( ctx->state[6], output, 48 ); PUT_UINT64_BE( ctx->state[7], output, 56 ); } } /* * output = SHA-512( input buffer ) */ void sha4( const unsigned char *input, int ilen, unsigned char output[64], int is384 ) { sha4_context ctx; sha4_starts( &ctx, is384 ); sha4_update( &ctx, input, ilen ); sha4_finish( &ctx, output ); memset( &ctx, 0, sizeof( sha4_context ) ); } lrzip-0.651/sha4.h000066400000000000000000000053721421175057200137100ustar00rootroot00000000000000/** * \file sha4.h * * Copyright (C) 2011, Con Kolivas * Copyright (C) 2006-2010, Brainspark B.V. * * This file is part of PolarSSL (http://www.polarssl.org) * Lead Maintainer: Paul Bakker * * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef POLARSSL_SHA4_H #define POLARSSL_SHA4_H #if defined(_MSC_VER) || defined(__WATCOMC__) #define UL64(x) x##ui64 #define int64 __int64 #else #define UL64(x) x##ULL #define int64 long long #endif /** * \brief SHA-512 context structure */ typedef struct { unsigned int64 total[2]; /*!< number of bytes processed */ unsigned int64 state[8]; /*!< intermediate digest state */ unsigned char buffer[128]; /*!< data block being processed */ unsigned char ipad[128]; /*!< HMAC: inner padding */ unsigned char opad[128]; /*!< HMAC: outer padding */ int is384; /*!< 0 => SHA-512, else SHA-384 */ } sha4_context; #ifdef __cplusplus extern "C" { #endif /** * \brief SHA-512 context setup * * \param ctx context to be initialized * \param is384 0 = use SHA512, 1 = use SHA384 */ void sha4_starts( sha4_context *ctx, int is384 ); /** * \brief SHA-512 process buffer * * \param ctx SHA-512 context * \param input buffer holding the data * \param ilen length of the input data */ void sha4_update( sha4_context *ctx, const unsigned char *input, int ilen ); /** * \brief SHA-512 final digest * * \param ctx SHA-512 context * \param output SHA-384/512 checksum result */ void sha4_finish( sha4_context *ctx, unsigned char output[64] ); /** * \brief Output = SHA-512( input buffer ) * * \param input buffer holding the data * \param ilen length of the input data * \param output SHA-384/512 checksum result * \param is384 0 = use SHA512, 1 = use SHA384 */ void sha4( const unsigned char *input, int ilen, unsigned char output[64], int is384 ); #ifdef __cplusplus } #endif #endif /* sha4.h */ lrzip-0.651/stream.c000066400000000000000000001570461421175057200143450ustar00rootroot00000000000000/* Copyright (C) 2006-2016,2018,2021-2022 Con Kolivas Copyright (C) 2011 Serge Belyshev Copyright (C) 2011 Peter Hyman Copyright (C) 1998 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* multiplex N streams into a file - the streams are passed through different compressors */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #ifdef HAVE_SYS_TIME_H # include #endif #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_RESOURCE_H # include #endif #ifdef HAVE_UNISTD_H # include #endif #include #include #include #include #include #include #include #ifdef HAVE_ERRNO_H # include #endif #ifdef HAVE_ENDIAN_H # include #elif HAVE_SYS_ENDIAN_H # include #endif #ifdef HAVE_ARPA_INET_H # include #endif /* LZMA C Wrapper */ #include "lzma/C/LzmaLib.h" #include "util.h" #include "lrzip_core.h" #define STREAM_BUFSIZE (1024 * 1024 * 10) static struct compress_thread { uchar *s_buf; /* Uncompressed buffer -> Compressed buffer */ uchar c_type; /* Compression type */ i64 s_len; /* Data length uncompressed */ i64 c_len; /* Data length compressed */ cksem_t cksem; /* This thread's semaphore */ struct stream_info *sinfo; int streamno; uchar salt[SALT_LEN]; } *cthreads; typedef struct stream_thread_struct { int i; rzip_control *control; struct stream_info *sinfo; } stream_thread_struct; static long output_thread; static pthread_mutex_t output_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t output_cond = PTHREAD_COND_INITIALIZER; bool init_mutex(rzip_control *control, pthread_mutex_t *mutex) { if (unlikely(pthread_mutex_init(mutex, NULL))) fatal_return(("Failed to pthread_mutex_init\n"), false); return true; } bool unlock_mutex(rzip_control *control, pthread_mutex_t *mutex) { if (unlikely(pthread_mutex_unlock(mutex))) fatal_return(("Failed to pthread_mutex_unlock\n"), false); return true; } bool lock_mutex(rzip_control *control, pthread_mutex_t *mutex) { if (unlikely(pthread_mutex_lock(mutex))) fatal_return(("Failed to pthread_mutex_lock\n"), false); return true; } static bool cond_wait(rzip_control *control, pthread_cond_t *cond, pthread_mutex_t *mutex) { if (unlikely(pthread_cond_wait(cond, mutex))) fatal_return(("Failed to pthread_cond_wait\n"), false); return true; } static bool cond_broadcast(rzip_control *control, pthread_cond_t *cond) { if (unlikely(pthread_cond_broadcast(cond))) fatal_return(("Failed to pthread_cond_broadcast\n"), false); return true; } bool create_pthread(rzip_control *control, pthread_t *thread, pthread_attr_t * attr, void * (*start_routine)(void *), void *arg) { if (unlikely(pthread_create(thread, attr, start_routine, arg))) fatal_return(("Failed to pthread_create\n"), false); return true; } bool detach_pthread(rzip_control *control, pthread_t *thread) { if (unlikely(pthread_detach(*thread))) fatal_return(("Failed to pthread_detach\n"), false); return true; } bool join_pthread(rzip_control *control, pthread_t th, void **thread_return) { if (pthread_join(th, thread_return)) fatal_return(("Failed to pthread_join\n"), false); return true; } /* just to keep things clean, declare function here * but move body to the end since it's a work function */ static int lz4_compresses(rzip_control *control, uchar *s_buf, i64 s_len); /* ***** COMPRESSION FUNCTIONS ***** ZPAQ, BZIP, GZIP, LZMA, LZO try to compress a buffer. If compression fails for whatever reason then leave uncompressed. Return the compression type in c_type and resulting length in c_len */ static int zpaq_compress_buf(rzip_control *control, struct compress_thread *cthread, long thread) { i64 c_len, c_size; uchar *c_buf; if (!lz4_compresses(control, cthread->s_buf, cthread->s_len)) return 0; c_size = round_up_page(control, cthread->s_len + 10000); c_buf = malloc(c_size); if (!c_buf) { print_err("Unable to allocate c_buf in zpaq_compress_buf\n"); return -1; } c_len = 0; zpaq_compress(c_buf, &c_len, cthread->s_buf, cthread->s_len, control->compression_level / 4 + 1, control->msgout, SHOW_PROGRESS ? true: false, thread); if (unlikely(c_len >= cthread->c_len)) { print_maxverbose("Incompressible block\n"); /* Incompressible, leave as CTYPE_NONE */ dealloc(c_buf); return 0; } cthread->c_len = c_len; dealloc(cthread->s_buf); cthread->s_buf = c_buf; cthread->c_type = CTYPE_ZPAQ; return 0; } static int bzip2_compress_buf(rzip_control *control, struct compress_thread *cthread) { u32 dlen = round_up_page(control, cthread->s_len); int bzip2_ret; uchar *c_buf; if (!lz4_compresses(control, cthread->s_buf, cthread->s_len)) return 0; c_buf = malloc(dlen); if (!c_buf) { print_err("Unable to allocate c_buf in bzip2_compress_buf\n"); return -1; } bzip2_ret = BZ2_bzBuffToBuffCompress((char *)c_buf, &dlen, (char *)cthread->s_buf, cthread->s_len, control->compression_level, 0, control->compression_level * 10); /* if compressed data is bigger then original data leave as * CTYPE_NONE */ if (bzip2_ret == BZ_OUTBUFF_FULL) { print_maxverbose("Incompressible block\n"); /* Incompressible, leave as CTYPE_NONE */ dealloc(c_buf); return 0; } if (unlikely(bzip2_ret != BZ_OK)) { dealloc(c_buf); print_maxverbose("BZ2 compress failed\n"); return -1; } if (unlikely(dlen >= cthread->c_len)) { print_maxverbose("Incompressible block\n"); /* Incompressible, leave as CTYPE_NONE */ dealloc(c_buf); return 0; } cthread->c_len = dlen; dealloc(cthread->s_buf); cthread->s_buf = c_buf; cthread->c_type = CTYPE_BZIP2; return 0; } static int gzip_compress_buf(rzip_control *control, struct compress_thread *cthread) { unsigned long dlen = round_up_page(control, cthread->s_len); uchar *c_buf; int gzip_ret; c_buf = malloc(dlen); if (!c_buf) { print_err("Unable to allocate c_buf in gzip_compress_buf\n"); return -1; } gzip_ret = compress2(c_buf, &dlen, cthread->s_buf, cthread->s_len, control->compression_level); /* if compressed data is bigger then original data leave as * CTYPE_NONE */ if (gzip_ret == Z_BUF_ERROR) { print_maxverbose("Incompressible block\n"); /* Incompressible, leave as CTYPE_NONE */ dealloc(c_buf); return 0; } if (unlikely(gzip_ret != Z_OK)) { dealloc(c_buf); print_maxverbose("compress2 failed\n"); return -1; } if (unlikely((i64)dlen >= cthread->c_len)) { print_maxverbose("Incompressible block\n"); /* Incompressible, leave as CTYPE_NONE */ dealloc(c_buf); return 0; } cthread->c_len = dlen; dealloc(cthread->s_buf); cthread->s_buf = c_buf; cthread->c_type = CTYPE_GZIP; return 0; } static int lzma_compress_buf(rzip_control *control, struct compress_thread *cthread) { unsigned char lzma_properties[5]; /* lzma properties, encoded */ int lzma_level, lzma_ret; size_t prop_size = 5; /* return value for lzma_properties */ uchar *c_buf; size_t dlen; if (!lz4_compresses(control, cthread->s_buf, cthread->s_len)) return 0; /* only 7 levels with lzma, scale them */ lzma_level = control->compression_level * 7 / 9; if (!lzma_level) lzma_level = 1; print_maxverbose("Starting lzma back end compression thread...\n"); retry: dlen = round_up_page(control, cthread->s_len); c_buf = malloc(dlen); if (!c_buf) { print_err("Unable to allocate c_buf in lzma_compress_buf\n"); return -1; } /* with LZMA SDK 4.63, we pass compression level and threads only * and receive properties in lzma_properties */ lzma_ret = LzmaCompress(c_buf, &dlen, cthread->s_buf, (size_t)cthread->s_len, lzma_properties, &prop_size, lzma_level, 0, /* dict size. set default, choose by level */ -1, -1, -1, -1, /* lc, lp, pb, fb */ control->threads > 1 ? 2: 1); /* LZMA spec has threads = 1 or 2 only. */ if (lzma_ret != SZ_OK) { switch (lzma_ret) { case SZ_ERROR_MEM: break; case SZ_ERROR_PARAM: print_err("LZMA Parameter ERROR: %d. This should not happen.\n", SZ_ERROR_PARAM); break; case SZ_ERROR_OUTPUT_EOF: print_maxverbose("Harmless LZMA Output Buffer Overflow error: %d. Incompressible block.\n", SZ_ERROR_OUTPUT_EOF); break; case SZ_ERROR_THREAD: print_err("LZMA Multi Thread ERROR: %d. This should not happen.\n", SZ_ERROR_THREAD); break; default: print_err("Unidentified LZMA ERROR: %d. This should not happen.\n", lzma_ret); break; } /* can pass -1 if not compressible! Thanks Lasse Collin */ dealloc(c_buf); if (lzma_ret == SZ_ERROR_MEM) { if (lzma_level > 1) { lzma_level--; print_verbose("LZMA Warning: %d. Can't allocate enough RAM for compression window, trying smaller.\n", SZ_ERROR_MEM); goto retry; } /* lzma compress can be fragile on 32 bit. If it fails, * fall back to bzip2 compression so the block doesn't * remain uncompressed */ print_verbose("Unable to allocate enough RAM for any sized compression window, falling back to bzip2 compression.\n"); return bzip2_compress_buf(control, cthread); } else if (lzma_ret == SZ_ERROR_OUTPUT_EOF) return 0; return -1; } if (unlikely((i64)dlen >= cthread->c_len)) { /* Incompressible, leave as CTYPE_NONE */ print_maxverbose("Incompressible block\n"); dealloc(c_buf); return 0; } /* Make sure multiple threads don't race on writing lzma_properties */ lock_mutex(control, &control->control_lock); if (!control->lzma_prop_set) { memcpy(control->lzma_properties, lzma_properties, 5); control->lzma_prop_set = true; /* Reset the magic written flag so we write it again if we * get lzma properties and haven't written them yet. */ if (TMP_OUTBUF) control->magic_written = 0; } unlock_mutex(control, &control->control_lock); cthread->c_len = dlen; dealloc(cthread->s_buf); cthread->s_buf = c_buf; cthread->c_type = CTYPE_LZMA; return 0; } static int lzo_compress_buf(rzip_control *control, struct compress_thread *cthread) { lzo_uint in_len = cthread->s_len; lzo_uint dlen = round_up_page(control, in_len + in_len / 16 + 64 + 3); lzo_bytep wrkmem; uchar *c_buf; int ret = -1; wrkmem = (lzo_bytep) calloc(1, LZO1X_1_MEM_COMPRESS); if (unlikely(wrkmem == NULL)) { print_maxverbose("Failed to malloc wkmem\n"); return ret; } c_buf = malloc(dlen); if (!c_buf) { print_err("Unable to allocate c_buf in lzo_compress_buf"); goto out_free; } /* lzo1x_1_compress does not return anything but LZO_OK so we ignore * the return value */ lzo1x_1_compress(cthread->s_buf, in_len, c_buf, &dlen, wrkmem); ret = 0; if (dlen >= in_len){ /* Incompressible, leave as CTYPE_NONE */ print_maxverbose("Incompressible block\n"); dealloc(c_buf); goto out_free; } cthread->c_len = dlen; dealloc(cthread->s_buf); cthread->s_buf = c_buf; cthread->c_type = CTYPE_LZO; out_free: dealloc(wrkmem); return ret; } /* ***** DECOMPRESSION FUNCTIONS ***** ZPAQ, BZIP, GZIP, LZMA, LZO try to decompress a buffer. Return 0 on success and -1 on failure. */ static int zpaq_decompress_buf(rzip_control *control __UNUSED__, struct uncomp_thread *ucthread, long thread) { i64 dlen = ucthread->u_len; uchar *c_buf; int ret = 0; c_buf = ucthread->s_buf; ucthread->s_buf = malloc(round_up_page(control, dlen)); if (unlikely(!ucthread->s_buf)) { print_err("Failed to allocate %ld bytes for decompression\n", dlen); ret = -1; goto out; } dlen = 0; zpaq_decompress(ucthread->s_buf, &dlen, c_buf, ucthread->c_len, control->msgout, SHOW_PROGRESS ? true: false, thread); if (unlikely(dlen != ucthread->u_len)) { print_err("Inconsistent length after decompression. Got %ld bytes, expected %lld\n", dlen, ucthread->u_len); ret = -1; } else dealloc(c_buf); out: if (ret == -1) { dealloc(ucthread->s_buf); ucthread->s_buf = c_buf; } return ret; } static int bzip2_decompress_buf(rzip_control *control __UNUSED__, struct uncomp_thread *ucthread) { u32 dlen = ucthread->u_len; int ret = 0, bzerr; uchar *c_buf; c_buf = ucthread->s_buf; ucthread->s_buf = malloc(round_up_page(control, dlen)); if (unlikely(!ucthread->s_buf)) { print_err("Failed to allocate %d bytes for decompression\n", dlen); ret = -1; goto out; } bzerr = BZ2_bzBuffToBuffDecompress((char*)ucthread->s_buf, &dlen, (char*)c_buf, ucthread->c_len, 0, 0); if (unlikely(bzerr != BZ_OK)) { print_err("Failed to decompress buffer - bzerr=%d\n", bzerr); ret = -1; goto out; } if (unlikely(dlen != ucthread->u_len)) { print_err("Inconsistent length after decompression. Got %d bytes, expected %lld\n", dlen, ucthread->u_len); ret = -1; } else dealloc(c_buf); out: if (ret == -1) { dealloc(ucthread->s_buf); ucthread->s_buf = c_buf; } return ret; } static int gzip_decompress_buf(rzip_control *control __UNUSED__, struct uncomp_thread *ucthread) { unsigned long dlen = ucthread->u_len; int ret = 0, gzerr; uchar *c_buf; c_buf = ucthread->s_buf; ucthread->s_buf = malloc(round_up_page(control, dlen)); if (unlikely(!ucthread->s_buf)) { print_err("Failed to allocate %ld bytes for decompression\n", dlen); ret = -1; goto out; } gzerr = uncompress(ucthread->s_buf, &dlen, c_buf, ucthread->c_len); if (unlikely(gzerr != Z_OK)) { print_err("Failed to decompress buffer - gzerr=%d\n", gzerr); ret = -1; goto out; } if (unlikely((i64)dlen != ucthread->u_len)) { print_err("Inconsistent length after decompression. Got %ld bytes, expected %lld\n", dlen, ucthread->u_len); ret = -1; } else dealloc(c_buf); out: if (ret == -1) { dealloc(ucthread->s_buf); ucthread->s_buf = c_buf; } return ret; } static int lzma_decompress_buf(rzip_control *control, struct uncomp_thread *ucthread) { size_t dlen = ucthread->u_len; int ret = 0, lzmaerr; uchar *c_buf; SizeT c_len = ucthread->c_len; c_buf = ucthread->s_buf; ucthread->s_buf = malloc(round_up_page(control, dlen)); if (unlikely(!ucthread->s_buf)) { print_err("Failed to allocate %lld bytes for decompression\n", (i64)dlen); ret = -1; goto out; } /* With LZMA SDK 4.63 we pass control->lzma_properties * which is needed for proper uncompress */ lzmaerr = LzmaUncompress(ucthread->s_buf, &dlen, c_buf, &c_len, control->lzma_properties, 5); if (unlikely(lzmaerr)) { print_err("Failed to decompress buffer - lzmaerr=%d\n", lzmaerr); ret = -1; goto out; } if (unlikely((i64)dlen != ucthread->u_len)) { print_err("Inconsistent length after decompression. Got %lld bytes, expected %lld\n", (i64)dlen, ucthread->u_len); ret = -1; } else dealloc(c_buf); out: if (ret == -1) { dealloc(ucthread->s_buf); ucthread->s_buf = c_buf; } return ret; } static int lzo_decompress_buf(rzip_control *control __UNUSED__, struct uncomp_thread *ucthread) { lzo_uint dlen = ucthread->u_len; int ret = 0, lzerr; uchar *c_buf; c_buf = ucthread->s_buf; ucthread->s_buf = malloc(round_up_page(control, dlen)); if (unlikely(!ucthread->s_buf)) { print_err("Failed to allocate %lu bytes for decompression\n", (unsigned long)dlen); ret = -1; goto out; } lzerr = lzo1x_decompress_safe((uchar*)c_buf, ucthread->c_len, (uchar*)ucthread->s_buf, &dlen, NULL); if (unlikely(lzerr != LZO_E_OK)) { print_err("Failed to decompress buffer - lzerr=%d\n", lzerr); ret = -1; goto out; } if (unlikely((i64)dlen != ucthread->u_len)) { print_err("Inconsistent length after decompression. Got %lu bytes, expected %lld\n", (unsigned long)dlen, ucthread->u_len); ret = -1; } else dealloc(c_buf); out: if (ret == -1) { dealloc(ucthread->s_buf); ucthread->s_buf = c_buf; } return ret; } /* WORK FUNCTIONS */ /* Look at whether we're writing to a ram location or physical files and write * the data accordingly. */ ssize_t put_fdout(rzip_control *control, void *offset_buf, ssize_t ret) { if (!TMP_OUTBUF) return write(control->fd_out, offset_buf, (size_t)ret); if (unlikely(control->out_ofs + ret > control->out_maxlen)) { /* The data won't fit in a temporary output buffer so we have * to fall back to temporary files. */ print_verbose("Unable to %scompress entirely in ram, will use physical files\n", DECOMPRESS ? "de" : ""); if (unlikely(control->fd_out == -1)) { failure("Was unable to %scompress entirely in ram and no temporary file creation was possible\n", DECOMPRESS ? "de" : ""); } /* Copy tmp_outbuf to tmpoutfile before deallocation */ if (unlikely(!write_fdout(control, control->tmp_outbuf, control->out_len))) { print_err("Unable to write_fdout tmpoutbuf in put_fdout\n"); return -1; } /* Deallocate now unused tmpoutbuf and unset tmp_outbuf flag */ close_tmpoutbuf(control); return write(control->fd_out, offset_buf, (size_t)ret); } memcpy(control->tmp_outbuf + control->out_ofs, offset_buf, ret); control->out_ofs += ret; if (likely(control->out_ofs > control->out_len)) control->out_len = control->out_ofs; return ret; } /* This is a custom version of write() which writes in 1GB chunks to avoid the overflows at the >= 2GB mark thanks to 32bit fuckage. */ ssize_t write_1g(rzip_control *control, void *buf, i64 len) { uchar *offset_buf = buf; ssize_t ret; i64 total; total = 0; while (len > 0) { if (BITS32) ret = MIN(len, one_g); else ret = len; ret = put_fdout(control, offset_buf, (size_t)ret); if (unlikely(ret <= 0)) return ret; len -= ret; offset_buf += ret; total += ret; } return total; } /* Should be called only if we know the buffer will be large enough, otherwise * we must dump_stdin first */ static bool read_fdin(struct rzip_control *control, i64 len) { int tmpchar; i64 i; for (i = 0; i < len; i++) { tmpchar = getchar(); if (unlikely(tmpchar == EOF)) failure_return(("Reached end of file on STDIN prematurely on read_fdin, asked for %lld got %lld\n", len, i), false); control->tmp_inbuf[control->in_ofs + i] = (char)tmpchar; } control->in_len = control->in_ofs + len; return true; } /* Dump STDIN into a temporary file */ static int dump_stdin(rzip_control *control) { if (unlikely(!write_fdin(control))) return -1; if (unlikely(!read_tmpinfile(control, control->fd_in))) return -1; close_tmpinbuf(control); return 0; } /* Ditto for read */ ssize_t read_1g(rzip_control *control, int fd, void *buf, i64 len) { uchar *offset_buf = buf; ssize_t ret; i64 total; if (TMP_INBUF && fd == control->fd_in) { /* We're decompressing from STDIN */ if (unlikely(control->in_ofs + len > control->in_maxlen)) { /* We're unable to fit it all into the temp buffer */ if (dump_stdin(control)) { failure_return(("Inadequate ram to %scompress from STDIN and unable to create in tmpfile", DECOMPRESS ? "de" : ""), -1); } goto read_fd; } if (control->in_ofs + len > control->in_len) { if (unlikely(!read_fdin(control, control->in_ofs + len - control->in_len))) return false; } memcpy(buf, control->tmp_inbuf + control->in_ofs, len); control->in_ofs += len; return len; } if (TMP_OUTBUF && fd == control->fd_out) { if (unlikely(control->out_ofs + len > control->out_maxlen)) failure_return(("Trying to read beyond out_ofs in tmpoutbuf\n"), -1); memcpy(buf, control->tmp_outbuf + control->out_ofs, len); control->out_ofs += len; return len; } read_fd: total = 0; while (len > 0) { if (BITS32) ret = MIN(len, one_g); else ret = len; ret = read(fd, offset_buf, (size_t)ret); if (unlikely(ret <= 0)) return ret; len -= ret; offset_buf += ret; total += ret; } return total; } /* write to a file, return 0 on success and -1 on failure */ static int write_buf(rzip_control *control, uchar *p, i64 len) { ssize_t ret; ret = write_1g(control, p, (size_t)len); if (unlikely(ret == -1)) { print_err("Write of length %lld failed - %s\n", len, strerror(errno)); return -1; } if (unlikely(ret != (ssize_t)len)) { print_err("Partial write!? asked for %lld bytes but got %lld\n", len, (i64)ret); return -1; } return 0; } /* write a byte */ static inline int write_u8(rzip_control *control, uchar v) { return write_buf(control, &v, 1); } static inline int write_val(rzip_control *control, i64 v, int len) { v = htole64(v); return write_buf(control, (uchar *)&v, len); } static int read_buf(rzip_control *control, int f, uchar *p, i64 len) { ssize_t ret; ret = read_1g(control, f, p, (size_t)len); if (unlikely(ret == -1)) { print_err("Read of length %lld failed - %s\n", len, strerror(errno)); return -1; } if (unlikely(ret != (ssize_t)len)) { print_err("Partial read!? asked for %lld bytes but got %lld\n", len, (i64)ret); return -1; } return 0; } static inline int read_u8(rzip_control *control, int f, uchar *v) { return read_buf(control, f, v, 1); } static inline int read_u32(rzip_control *control, int f, u32 *v) { int ret = read_buf(control, f, (uchar *)v, 4); *v = le32toh(*v); return ret; } static inline int read_val(rzip_control *control, int f, i64 *v, int len) { int ret; /* We only partially read all 8 bytes so have to zero v here */ *v = 0; ret = read_buf(control, f, (uchar *)v, len); return ret; } static int fd_seekto(rzip_control *control, struct stream_info *sinfo, i64 spos, i64 pos) { if (unlikely(lseek(sinfo->fd, spos, SEEK_SET) != spos)) { print_err("Failed to seek to %lld in stream\n", pos); return -1; } return 0; } /* seek to a position within a set of streams - return -1 on failure */ static int seekto(rzip_control *control, struct stream_info *sinfo, i64 pos) { i64 spos = pos + sinfo->initial_pos; if (TMP_OUTBUF) { spos -= control->out_relofs; control->out_ofs = spos; if (unlikely(spos > control->out_len || spos < 0)) { print_err("Trying to seek to %lld outside tmp outbuf in seekto\n", spos); return -1; } return 0; } return fd_seekto(control, sinfo, spos, pos); } static int read_seekto(rzip_control *control, struct stream_info *sinfo, i64 pos) { i64 spos = pos + sinfo->initial_pos; if (TMP_INBUF) { if (spos > control->in_len) { i64 len = spos - control->in_len; if (control->in_ofs + len > control->in_maxlen) { if (unlikely(dump_stdin(control))) return -1; goto fd_seek; } else { if (unlikely(!read_fdin(control, len))) return -1; } } control->in_ofs = spos; if (unlikely(spos < 0)) { print_err("Trying to seek to %lld outside tmp inbuf in read_seekto\n", spos); return -1; } return 0; } fd_seek: return fd_seekto(control, sinfo, spos, pos); } static i64 get_seek(rzip_control *control, int fd) { i64 ret; if (TMP_OUTBUF) return control->out_relofs + control->out_ofs; ret = lseek(fd, 0, SEEK_CUR); if (unlikely(ret == -1)) fatal_return(("Failed to lseek in get_seek\n"), -1); return ret; } i64 get_readseek(rzip_control *control, int fd) { i64 ret; if (TMP_INBUF) return control->in_ofs; ret = lseek(fd, 0, SEEK_CUR); if (unlikely(ret == -1)) fatal_return(("Failed to lseek in get_seek\n"), -1); return ret; } bool prepare_streamout_threads(rzip_control *control) { pthread_t *threads; int i; /* As we serialise the generation of threads during the rzip * pre-processing stage, it's faster to have one more thread available * to keep all CPUs busy. There is no point splitting up the chunks * into multiple threads if there will be no compression back end. */ if (control->threads > 1) ++control->threads; if (NO_COMPRESS) control->threads = 1; threads = control->pthreads = calloc(sizeof(pthread_t), control->threads); if (unlikely(!threads)) fatal_return(("Unable to calloc threads in prepare_streamout_threads\n"), false); cthreads = calloc(sizeof(struct compress_thread), control->threads); if (unlikely(!cthreads)) { dealloc(threads); fatal_return(("Unable to calloc cthreads in prepare_streamout_threads\n"), false); } for (i = 0; i < control->threads; i++) { cksem_init(control, &cthreads[i].cksem); cksem_post(control, &cthreads[i].cksem); } return true; } bool close_streamout_threads(rzip_control *control) { int i, close_thread = output_thread; /* Wait for the threads in the correct order in case they end up * serialised */ for (i = 0; i < control->threads; i++) { cksem_wait(control, &cthreads[close_thread].cksem); if (++close_thread == control->threads) close_thread = 0; } dealloc(cthreads); dealloc(control->pthreads); return true; } /* open a set of output streams, compressing with the given compression level and algorithm */ void *open_stream_out(rzip_control *control, int f, unsigned int n, i64 chunk_limit, char cbytes) { struct stream_info *sinfo; unsigned int i, testbufs; bool threadlimit = false; i64 testsize, limit; uchar *testmalloc; sinfo = calloc(sizeof(struct stream_info), 1); if (unlikely(!sinfo)) return NULL; if (chunk_limit < control->page_size) chunk_limit = control->page_size; sinfo->bufsize = sinfo->size = limit = chunk_limit; sinfo->chunk_bytes = cbytes; sinfo->num_streams = n; sinfo->fd = f; sinfo->s = calloc(sizeof(struct stream), n); if (unlikely(!sinfo->s)) { dealloc(sinfo); return NULL; } /* Find the largest we can make the window based on ability to malloc * ram. We need 2 buffers for each compression thread and the overhead * of each compression back end. No 2nd buf is required when there is * no back end compression. We limit the total regardless to 1/3 ram * for when the OS lies due to heavy overcommit. */ if (NO_COMPRESS) testbufs = 1; else testbufs = 2; testsize = (limit * testbufs) + (control->overhead * control->threads); if (testsize > control->usable_ram) limit = (control->usable_ram - (control->overhead * control->threads)) / testbufs; /* If we don't have enough ram for the number of threads, decrease the * number of threads till we do, or only have one thread. */ while (limit < STREAM_BUFSIZE && limit < chunk_limit) { if (control->threads > 1) { --control->threads; threadlimit = true; } else break; limit = (control->usable_ram - (control->overhead * control->threads)) / testbufs; limit = MIN(limit, chunk_limit); } if (threadlimit) { print_output("Minimising number of threads to %d to limit memory usage\n", control->threads); } if (BITS32) { limit = MIN(limit, one_g); if (limit + (control->overhead * control->threads) > one_g) limit = one_g - (control->overhead * control->threads); } /* Use a nominal minimum size should we fail all previous shrinking */ if (limit < STREAM_BUFSIZE) { limit = MAX(limit, STREAM_BUFSIZE); print_output("Warning, low memory for chosen compression settings\n"); } limit = MIN(limit, chunk_limit); retest_malloc: testsize = limit + (control->overhead * control->threads); testmalloc = malloc(testsize); if (!testmalloc) { limit = limit / 10 * 9; if (limit < 100000000) { /* If we can't even allocate 100MB then we'll never * succeed */ print_err("Unable to allocate enough memory for operation\n"); dealloc(sinfo->s); dealloc(sinfo); return NULL; } goto retest_malloc; } if (!NO_COMPRESS) { char *testmalloc2 = malloc(limit); if (!testmalloc2) { dealloc(testmalloc); limit = limit / 10 * 9; goto retest_malloc; } dealloc(testmalloc2); } dealloc(testmalloc); print_maxverbose("Succeeded in testing %lld sized malloc for back end compression\n", testsize); /* Make the bufsize no smaller than STREAM_BUFSIZE. Round up the * bufsize to fit X threads into it */ sinfo->bufsize = MIN(limit, MAX((limit + control->threads - 1) / control->threads, STREAM_BUFSIZE)); if (control->threads > 1) print_maxverbose("Using up to %d threads to compress up to %lld bytes each.\n", control->threads, sinfo->bufsize); else print_maxverbose("Using only 1 thread to compress up to %lld bytes\n", sinfo->bufsize); for (i = 0; i < n; i++) { sinfo->s[i].buf = calloc(sinfo->bufsize , 1); if (unlikely(!sinfo->s[i].buf)) { fatal("Unable to malloc buffer of size %lld in open_stream_out\n", sinfo->bufsize); dealloc(sinfo->s); dealloc(sinfo); return NULL; } } return (void *)sinfo; } /* The block headers are all encrypted so we read the data and salt associated * with them, decrypt the data, then return the decrypted version of the * values */ static bool decrypt_header(rzip_control *control, uchar *head, uchar *c_type, i64 *c_len, i64 *u_len, i64 *last_head) { uchar *buf = head + SALT_LEN; memcpy(buf, c_type, 1); memcpy(buf + 1, c_len, 8); memcpy(buf + 9, u_len, 8); memcpy(buf + 17, last_head, 8); if (unlikely(!lrz_decrypt(control, buf, 25, head))) return false; memcpy(c_type, buf, 1); memcpy(c_len, buf + 1, 8); memcpy(u_len, buf + 9, 8); memcpy(last_head, buf + 17, 8); return true; } /* prepare a set of n streams for reading on file descriptor f */ void *open_stream_in(rzip_control *control, int f, int n, char chunk_bytes) { struct uncomp_thread *ucthreads; struct stream_info *sinfo; int total_threads, i; pthread_t *threads; i64 header_length; sinfo = calloc(sizeof(struct stream_info), 1); if (unlikely(!sinfo)) return NULL; /* We have one thread dedicated to stream 0, and one more thread than * CPUs to keep them busy, unless we're running single-threaded. */ if (control->threads > 1) total_threads = control->threads + 2; else total_threads = control->threads + 1; threads = control->pthreads = calloc(sizeof(pthread_t), total_threads); if (unlikely(!threads)) return NULL; sinfo->ucthreads = ucthreads = calloc(sizeof(struct uncomp_thread), total_threads); if (unlikely(!ucthreads)) { dealloc(sinfo); dealloc(threads); fatal_return(("Unable to calloc ucthreads in open_stream_in\n"), NULL); } sinfo->num_streams = n; sinfo->fd = f; sinfo->chunk_bytes = chunk_bytes; sinfo->s = calloc(sizeof(struct stream), n); if (unlikely(!sinfo->s)) { dealloc(sinfo); dealloc(threads); dealloc(ucthreads); return NULL; } sinfo->s[0].total_threads = 1; sinfo->s[1].total_threads = total_threads - 1; if (control->major_version == 0 && control->minor_version > 5) { /* Read in flag that tells us if there are more chunks after * this. Ignored if we know the final file size */ print_maxverbose("Reading eof flag at %lld\n", get_readseek(control, f)); if (unlikely(read_u8(control, f, &control->eof))) { print_err("Failed to read eof flag in open_stream_in\n"); goto failed; } print_maxverbose("EOF: %d\n", control->eof); /* Read in the expected chunk size */ if (!ENCRYPT) { print_maxverbose("Reading expected chunksize at %lld\n", get_readseek(control, f)); if (unlikely(read_val(control, f, &sinfo->size, sinfo->chunk_bytes))) { print_err("Failed to read in chunk size in open_stream_in\n"); goto failed; } sinfo->size = le64toh(sinfo->size); print_maxverbose("Chunk size: %lld\n", sinfo->size); control->st_size += sinfo->size; if (unlikely(sinfo->chunk_bytes < 1 || sinfo->chunk_bytes > 8 || sinfo->size < 0)) { print_err("Invalid chunk data size %d bytes %lld\n", sinfo->size, sinfo->chunk_bytes); goto failed; } } } sinfo->initial_pos = get_readseek(control, f); if (unlikely(sinfo->initial_pos == -1)) goto failed; for (i = 0; i < n; i++) { uchar c, enc_head[25 + SALT_LEN]; i64 v1, v2; sinfo->s[i].base_thread = i; sinfo->s[i].uthread_no = sinfo->s[i].base_thread; sinfo->s[i].unext_thread = sinfo->s[i].base_thread; if (unlikely(ENCRYPT && read_buf(control, f, enc_head, SALT_LEN))) goto failed; again: if (unlikely(read_u8(control, f, &c))) goto failed; /* Compatibility crap for versions < 0.40 */ if (control->major_version == 0 && control->minor_version < 4) { u32 v132, v232, last_head32; if (unlikely(read_u32(control, f, &v132))) goto failed; if (unlikely(read_u32(control, f, &v232))) goto failed; if (unlikely(read_u32(control, f, &last_head32))) goto failed; v1 = v132; v2 = v232; sinfo->s[i].last_head = last_head32; header_length = 13; } else { int read_len; print_maxverbose("Reading stream %d header at %lld\n", i, get_readseek(control, f)); if ((control->major_version == 0 && control->minor_version < 6) || ENCRYPT) read_len = 8; else read_len = sinfo->chunk_bytes; if (unlikely(read_val(control, f, &v1, read_len))) goto failed; if (unlikely(read_val(control, f, &v2, read_len))) goto failed; if (unlikely(read_val(control, f, &sinfo->s[i].last_head, read_len))) goto failed; header_length = 1 + (read_len * 3); } sinfo->total_read += header_length; if (ENCRYPT) { if (unlikely(!decrypt_header(control, enc_head, &c, &v1, &v2, &sinfo->s[i].last_head))) goto failed; sinfo->total_read += SALT_LEN; } v1 = le64toh(v1); v2 = le64toh(v2); sinfo->s[i].last_head = le64toh(sinfo->s[i].last_head); if (unlikely(c == CTYPE_NONE && v1 == 0 && v2 == 0 && sinfo->s[i].last_head == 0 && i == 0)) { print_err("Enabling stream close workaround\n"); sinfo->initial_pos += header_length; goto again; } if (unlikely(c != CTYPE_NONE)) { print_err("Unexpected initial tag %d in streams\n", c); if (ENCRYPT) print_err("Wrong password?\n"); goto failed; } if (unlikely(v1)) { print_err("Unexpected initial c_len %lld in streams %lld\n", v1, v2); goto failed; } if (unlikely(v2)) { print_err("Unexpected initial u_len %lld in streams\n", v2); goto failed; } } return (void *)sinfo; failed: dealloc(sinfo->s); dealloc(sinfo); dealloc(threads); dealloc(ucthreads); return NULL; } #define MIN_SIZE (ENCRYPT ? CBC_LEN : 0) /* Once the final data has all been written to the block header, we go back * and write SALT_LEN bytes of salt before it, and encrypt the header in place * by reading what has been written, encrypting it, and writing back over it. * This is very convoluted depending on whether a last_head value is written * to this block or not. See the callers of this function */ static bool rewrite_encrypted(rzip_control *control, struct stream_info *sinfo, i64 ofs) { uchar *buf, *head; i64 cur_ofs; cur_ofs = get_seek(control, sinfo->fd) - sinfo->initial_pos; if (unlikely(cur_ofs == -1)) return false; head = malloc(25 + SALT_LEN); if (unlikely(!head)) fatal_return(("Failed to malloc head in rewrite_encrypted\n"), false); buf = head + SALT_LEN; if (unlikely(!get_rand(control, head, SALT_LEN))) goto error; if (unlikely(seekto(control, sinfo, ofs - SALT_LEN))) failure_goto(("Failed to seekto buf ofs in rewrite_encrypted\n"), error); if (unlikely(write_buf(control, head, SALT_LEN))) failure_goto(("Failed to write_buf head in rewrite_encrypted\n"), error); if (unlikely(read_buf(control, sinfo->fd, buf, 25))) failure_goto(("Failed to read_buf buf in rewrite_encrypted\n"), error); if (unlikely(!lrz_encrypt(control, buf, 25, head))) goto error; if (unlikely(seekto(control, sinfo, ofs))) failure_goto(("Failed to seek back to ofs in rewrite_encrypted\n"), error); if (unlikely(write_buf(control, buf, 25))) failure_goto(("Failed to write_buf encrypted buf in rewrite_encrypted\n"), error); dealloc(head); seekto(control, sinfo, cur_ofs); return true; error: dealloc(head); return false; } /* Enter with s_buf allocated,s_buf points to the compressed data after the * backend compression and is then freed here */ static void *compthread(void *data) { stream_thread_struct *s = data; rzip_control *control = s->control; long i = s->i; struct compress_thread *cti; struct stream_info *ctis; int waited = 0, ret = 0; i64 padded_len; int write_len; /* Make sure this thread doesn't already exist */ dealloc(data); cti = &cthreads[i]; ctis = cti->sinfo; if (unlikely(setpriority(PRIO_PROCESS, 0, control->nice_val) == -1)) { print_err("Warning, unable to set thread nice value %d...Resetting to %d\n", control->nice_val, control->current_priority); setpriority(PRIO_PROCESS, 0, (control->nice_val=control->current_priority)); } cti->c_type = CTYPE_NONE; cti->c_len = cti->s_len; /* Flushing writes to disk frees up any dirty ram, improving chances * of succeeding in allocating more ram */ fsync(ctis->fd); /* This is a cludge in case we are compressing to stdout and our first * stream is not compressed, but subsequent ones are compressed by * lzma and we can no longer seek back to the beginning of the file * to write the lzma properties which are effectively always starting * with 93. */ if (TMP_OUTBUF && LZMA_COMPRESS) control->lzma_properties[0] = 93; retry: /* Very small buffers have issues to do with minimum amounts of ram * allocatable to a buffer combined with the MINIMUM_MATCH of rzip * being 31 bytes so don't bother trying to compress anything less * than 64 bytes. */ if (!NO_COMPRESS && cti->c_len >= 64) { if (LZMA_COMPRESS) ret = lzma_compress_buf(control, cti); else if (LZO_COMPRESS) ret = lzo_compress_buf(control, cti); else if (BZIP2_COMPRESS) ret = bzip2_compress_buf(control, cti); else if (ZLIB_COMPRESS) ret = gzip_compress_buf(control, cti); else if (ZPAQ_COMPRESS) ret = zpaq_compress_buf(control, cti, i); else failure_goto(("Dunno wtf compression to use!\n"), error); } padded_len = cti->c_len; if (!ret && padded_len < MIN_SIZE) { /* We need to pad out each block to at least be CBC_LEN bytes * long or encryption cannot work. We pad it with random * data */ padded_len = MIN_SIZE; cti->s_buf = realloc(cti->s_buf, MIN_SIZE); if (unlikely(!cti->s_buf)) fatal_goto(("Failed to realloc s_buf in compthread\n"), error); if (unlikely(!get_rand(control, cti->s_buf + cti->c_len, MIN_SIZE - cti->c_len))) goto error; } /* If compression fails for whatever reason multithreaded, then wait * for the previous thread to finish, serialising the work to decrease * the memory requirements, increasing the chance of success */ if (unlikely(ret && waited)) failure_goto(("Failed to compress in compthread\n"), error); if (!waited) { lock_mutex(control, &output_lock); while (output_thread != i) cond_wait(control, &output_cond, &output_lock); unlock_mutex(control, &output_lock); waited = 1; } if (unlikely(ret)) { print_maxverbose("Unable to compress in parallel, waiting for previous thread to complete before trying again\n"); goto retry; } /* Need to be big enough to fill one CBC_LEN */ if (ENCRYPT) write_len = 8; else write_len = ctis->chunk_bytes; if (!ctis->chunks++) { int j; if (STDOUT) { lock_mutex(control, &control->control_lock); if (!control->magic_written) write_magic(control); unlock_mutex(control, &control->control_lock); } print_maxverbose("Writing initial chunk bytes value %d at %lld\n", ctis->chunk_bytes, get_seek(control, ctis->fd)); /* Write chunk bytes of this block */ write_u8(control, ctis->chunk_bytes); /* Write whether this is the last chunk, followed by the size * of this chunk */ print_maxverbose("Writing EOF flag as %d\n", control->eof); write_u8(control, control->eof); if (!ENCRYPT) write_val(control, ctis->size, ctis->chunk_bytes); /* First chunk of this stream, write headers */ ctis->initial_pos = get_seek(control, ctis->fd); if (unlikely(ctis->initial_pos == -1)) goto error; print_maxverbose("Writing initial header at %lld\n", ctis->initial_pos); for (j = 0; j < ctis->num_streams; j++) { /* If encrypting, we leave SALT_LEN room to write in salt * later */ if (ENCRYPT) { if (unlikely(write_val(control, 0, SALT_LEN))) fatal_goto(("Failed to write_buf blank salt in compthread %d\n", i), error); ctis->cur_pos += SALT_LEN; } ctis->s[j].last_head = ctis->cur_pos + 1 + (write_len * 2); write_u8(control, CTYPE_NONE); write_val(control, 0, write_len); write_val(control, 0, write_len); write_val(control, 0, write_len); ctis->cur_pos += 1 + (write_len * 3); } } print_maxverbose("Compthread %ld seeking to %lld to store length %d\n", i, ctis->s[cti->streamno].last_head, write_len); if (unlikely(seekto(control, ctis, ctis->s[cti->streamno].last_head))) fatal_goto(("Failed to seekto in compthread %d\n", i), error); if (unlikely(write_val(control, ctis->cur_pos, write_len))) fatal_goto(("Failed to write_val cur_pos in compthread %d\n", i), error); if (ENCRYPT) rewrite_encrypted(control, ctis, ctis->s[cti->streamno].last_head - 17); ctis->s[cti->streamno].last_head = ctis->cur_pos + 1 + (write_len * 2) + (ENCRYPT ? SALT_LEN : 0); print_maxverbose("Compthread %ld seeking to %lld to write header\n", i, ctis->cur_pos); if (unlikely(seekto(control, ctis, ctis->cur_pos))) fatal_goto(("Failed to seekto cur_pos in compthread %d\n", i), error); print_maxverbose("Thread %ld writing %lld compressed bytes from stream %d\n", i, padded_len, cti->streamno); if (ENCRYPT) { if (unlikely(write_val(control, 0, SALT_LEN))) fatal_goto(("Failed to write_buf header salt in compthread %d\n", i), error); ctis->cur_pos += SALT_LEN; ctis->s[cti->streamno].last_headofs = ctis->cur_pos; } /* We store the actual c_len even though we might pad it out */ if (unlikely(write_u8(control, cti->c_type) || write_val(control, cti->c_len, write_len) || write_val(control, cti->s_len, write_len) || write_val(control, 0, write_len))) { fatal_goto(("Failed write in compthread %d\n", i), error); } ctis->cur_pos += 1 + (write_len * 3); if (ENCRYPT) { if (unlikely(!get_rand(control, cti->salt, SALT_LEN))) goto error; if (unlikely(write_buf(control, cti->salt, SALT_LEN))) fatal_goto(("Failed to write_buf block salt in compthread %d\n", i), error); if (unlikely(!lrz_encrypt(control, cti->s_buf, padded_len, cti->salt))) goto error; ctis->cur_pos += SALT_LEN; } print_maxverbose("Compthread %ld writing data at %lld\n", i, ctis->cur_pos); if (unlikely(write_buf(control, cti->s_buf, padded_len))) fatal_goto(("Failed to write_buf s_buf in compthread %d\n", i), error); ctis->cur_pos += padded_len; dealloc(cti->s_buf); lock_mutex(control, &output_lock); if (++output_thread == control->threads) output_thread = 0; cond_broadcast(control, &output_cond); unlock_mutex(control, &output_lock); error: cksem_post(control, &cti->cksem); return NULL; } static void clear_buffer(rzip_control *control, struct stream_info *sinfo, int streamno, int newbuf) { pthread_t *threads = control->pthreads; stream_thread_struct *s; static int i = 0; /* Make sure this thread doesn't already exist */ cksem_wait(control, &cthreads[i].cksem); cthreads[i].sinfo = sinfo; cthreads[i].streamno = streamno; cthreads[i].s_buf = sinfo->s[streamno].buf; cthreads[i].s_len = sinfo->s[streamno].buflen; print_maxverbose("Starting thread %ld to compress %lld bytes from stream %d\n", i, cthreads[i].s_len, streamno); s = malloc(sizeof(stream_thread_struct)); if (unlikely(!s)) { cksem_post(control, &cthreads[i].cksem); failure("Unable to malloc in clear_buffer"); } s->i = i; s->control = control; if (unlikely((!create_pthread(control, &threads[i], NULL, compthread, s)) || (!detach_pthread(control, &threads[i])))) failure("Unable to create compthread in clear_buffer"); if (newbuf) { /* The stream buffer has been given to the thread, allocate a * new one. */ sinfo->s[streamno].buf = malloc(sinfo->bufsize); if (unlikely(!sinfo->s[streamno].buf)) failure("Unable to malloc buffer of size %lld in flush_buffer\n", sinfo->bufsize); sinfo->s[streamno].buflen = 0; } if (++i == control->threads) i = 0; } /* flush out any data in a stream buffer */ void flush_buffer(rzip_control *control, struct stream_info *sinfo, int streamno) { clear_buffer(control, sinfo, streamno, 1); } static void *ucompthread(void *data) { stream_thread_struct *sts = data; rzip_control *control = sts->control; int waited = 0, ret = 0, i = sts->i; struct uncomp_thread *uci = &sts->sinfo->ucthreads[i]; dealloc(data); if (unlikely(setpriority(PRIO_PROCESS, 0, control->nice_val) == -1)) { print_err("Warning, unable to set thread nice value %d...Resetting to %d\n", control->nice_val, control->current_priority); setpriority(PRIO_PROCESS, 0, (control->nice_val=control->current_priority)); } retry: if (uci->c_type != CTYPE_NONE) { switch (uci->c_type) { case CTYPE_LZMA: ret = lzma_decompress_buf(control, uci); break; case CTYPE_LZO: ret = lzo_decompress_buf(control, uci); break; case CTYPE_BZIP2: ret = bzip2_decompress_buf(control, uci); break; case CTYPE_GZIP: ret = gzip_decompress_buf(control, uci); break; case CTYPE_ZPAQ: ret = zpaq_decompress_buf(control, uci, i); break; default: failure_return(("Dunno wtf decompression type to use!\n"), NULL); break; } } /* As per compression, serialise the decompression if it fails in * parallel */ if (unlikely(ret)) { if (unlikely(waited)) failure_return(("Failed to decompress in ucompthread\n"), (void*)1); print_maxverbose("Unable to decompress in parallel, waiting for previous thread to complete before trying again\n"); /* We do not strictly need to wait for this, so it's used when * decompression fails due to inadequate memory to try again * serialised. */ lock_mutex(control, &output_lock); while (output_thread != i) cond_wait(control, &output_cond, &output_lock); unlock_mutex(control, &output_lock); waited = 1; goto retry; } print_maxverbose("Thread %ld decompressed %lld bytes from stream %d\n", i, uci->u_len, uci->streamno); return NULL; } /* fill a buffer from a stream - return -1 on failure */ static int fill_buffer(rzip_control *control, struct stream_info *sinfo, struct stream *s, int streamno) { i64 u_len, c_len, last_head, padded_len, header_length, max_len; uchar enc_head[25 + SALT_LEN], blocksalt[SALT_LEN]; struct uncomp_thread *ucthreads = sinfo->ucthreads; pthread_t *threads = control->pthreads; stream_thread_struct *sts; uchar c_type, *s_buf; void *thr_return; dealloc(s->buf); if (s->eos) goto out; fill_another: if (unlikely(ucthreads[s->uthread_no].busy)) failure_return(("Trying to start a busy thread, this shouldn't happen!\n"), -1); if (unlikely(read_seekto(control, sinfo, s->last_head))) return -1; if (ENCRYPT) { if (unlikely(read_buf(control, sinfo->fd, enc_head, SALT_LEN))) return -1; sinfo->total_read += SALT_LEN; } if (unlikely(read_u8(control, sinfo->fd, &c_type))) return -1; /* Compatibility crap for versions < 0.4 */ if (control->major_version == 0 && control->minor_version < 4) { u32 c_len32, u_len32, last_head32; if (unlikely(read_u32(control, sinfo->fd, &c_len32))) return -1; if (unlikely(read_u32(control, sinfo->fd, &u_len32))) return -1; if (unlikely(read_u32(control, sinfo->fd, &last_head32))) return -1; c_len = c_len32; u_len = u_len32; last_head = last_head32; header_length = 13; } else { int read_len; print_maxverbose("Reading ucomp header at %lld\n", get_readseek(control, sinfo->fd)); if ((control->major_version == 0 && control->minor_version < 6) || ENCRYPT) read_len = 8; else read_len = sinfo->chunk_bytes; if (unlikely(read_val(control, sinfo->fd, &c_len, read_len))) return -1; if (unlikely(read_val(control, sinfo->fd, &u_len, read_len))) return -1; if (unlikely(read_val(control, sinfo->fd, &last_head, read_len))) return -1; header_length = 1 + (read_len * 3); } sinfo->total_read += header_length; if (ENCRYPT) { if (unlikely(!decrypt_header(control, enc_head, &c_type, &c_len, &u_len, &last_head))) return -1; if (unlikely(read_buf(control, sinfo->fd, blocksalt, SALT_LEN))) return -1; sinfo->total_read += SALT_LEN; } c_len = le64toh(c_len); u_len = le64toh(u_len); last_head = le64toh(last_head); print_maxverbose("Fill_buffer stream %d c_len %lld u_len %lld last_head %lld\n", streamno, c_len, u_len, last_head); /* It is possible for there to be an empty match block at the end of * incompressible data */ if (unlikely(c_len == 0 && u_len == 0 && streamno == 1 && last_head == 0)) { print_maxverbose("Skipping empty match block\n"); goto skip_empty; } /* Check for invalid data and that the last_head is actually moving * forward correctly. */ if (unlikely(c_len < 1 || u_len < 1 || last_head < 0 || (last_head && last_head <= s->last_head))) { fatal_return(("Invalid data compressed len %lld uncompressed %lld last_head %lld\n", c_len, u_len, last_head), -1); } padded_len = MAX(c_len, MIN_SIZE); sinfo->total_read += padded_len; fsync(control->fd_out); if (unlikely(u_len > control->maxram)) print_output("Warning, attempting to malloc very large buffer for this environment of size %lld\n", u_len); max_len = MAX(u_len, MIN_SIZE); max_len = MAX(max_len, c_len); s_buf = malloc(max_len); if (unlikely(!s_buf)) fatal_return(("Unable to malloc buffer of size %lld in fill_buffer\n", u_len), -1); sinfo->ram_alloced += u_len; if (unlikely(read_buf(control, sinfo->fd, s_buf, padded_len))) { dealloc(s_buf); return -1; } if (unlikely(ENCRYPT && !lrz_decrypt(control, s_buf, padded_len, blocksalt))) { dealloc(s_buf); return -1; } ucthreads[s->uthread_no].s_buf = s_buf; ucthreads[s->uthread_no].c_len = c_len; ucthreads[s->uthread_no].u_len = u_len; ucthreads[s->uthread_no].c_type = c_type; ucthreads[s->uthread_no].streamno = streamno; s->last_head = last_head; /* List this thread as busy */ ucthreads[s->uthread_no].busy = 1; print_maxverbose("Starting thread %ld to decompress %lld bytes from stream %d\n", s->uthread_no, padded_len, streamno); sts = malloc(sizeof(stream_thread_struct)); if (unlikely(!sts)) fatal_return(("Unable to malloc in fill_buffer"), -1); sts->i = s->uthread_no; sts->control = control; sts->sinfo = sinfo; if (unlikely(!create_pthread(control, &threads[s->uthread_no], NULL, ucompthread, sts))) { dealloc(sts); return -1; } if (++s->uthread_no == s->base_thread + s->total_threads) s->uthread_no = s->base_thread; skip_empty: /* Reached the end of this stream, no more data to read in, otherwise * see if the next thread is free to grab more data. We also check that * we're not going to be allocating too much ram to generate all these * threads. */ if (!last_head) s->eos = 1; else if (s->uthread_no != s->unext_thread && !ucthreads[s->uthread_no].busy && sinfo->ram_alloced < control->maxram) goto fill_another; out: lock_mutex(control, &output_lock); output_thread = s->unext_thread; cond_broadcast(control, &output_cond); unlock_mutex(control, &output_lock); /* join_pthread here will make it wait till the data is ready */ thr_return = NULL; if (unlikely(!join_pthread(control, threads[s->unext_thread], &thr_return) || !!thr_return)) return -1; ucthreads[s->unext_thread].busy = 0; print_maxverbose("Taking decompressed data from thread %ld\n", s->unext_thread); s->buf = ucthreads[s->unext_thread].s_buf; ucthreads[s->unext_thread].s_buf = NULL; s->buflen = ucthreads[s->unext_thread].u_len; sinfo->ram_alloced -= s->buflen; s->bufp = 0; if (++s->unext_thread == s->base_thread + s->total_threads) s->unext_thread = s->base_thread; return 0; } /* write some data to a stream. Return -1 on failure */ void write_stream(rzip_control *control, void *ss, int streamno, uchar *p, i64 len) { struct stream_info *sinfo = ss; while (len) { i64 n; n = MIN(sinfo->bufsize - sinfo->s[streamno].buflen, len); memcpy(sinfo->s[streamno].buf + sinfo->s[streamno].buflen, p, n); sinfo->s[streamno].buflen += n; p += n; len -= n; /* Flush the buffer every sinfo->bufsize into one thread */ if (sinfo->s[streamno].buflen == sinfo->bufsize) flush_buffer(control, sinfo, streamno); } } /* read some data from a stream. Return number of bytes read, or -1 on failure */ i64 read_stream(rzip_control *control, void *ss, int streamno, uchar *p, i64 len) { struct stream_info *sinfo = ss; struct stream *s = &sinfo->s[streamno]; i64 ret = 0; while (len) { i64 n; n = MIN(s->buflen - s->bufp, len); if (n > 0) { if (unlikely(!s->buf)) failure_return(("Stream ran out prematurely, likely corrupt archive\n"), -1); memcpy(p, s->buf + s->bufp, n); s->bufp += n; p += n; len -= n; ret += n; } if (len && s->bufp == s->buflen) { if (unlikely(fill_buffer(control, sinfo, s, streamno))) return -1; if (s->bufp == s->buflen) break; } } return ret; } /* flush and close down a stream. return -1 on failure */ int close_stream_out(rzip_control *control, void *ss) { struct stream_info *sinfo = ss; int i; for (i = 0; i < sinfo->num_streams; i++) clear_buffer(control, sinfo, i, 0); if (ENCRYPT) { /* Last two compressed blocks do not have an offset written * to them so we have to go back and encrypt them now, but we * must wait till the threads return. */ int close_thread = output_thread; for (i = 0; i < control->threads; i++) { cksem_wait(control, &cthreads[close_thread].cksem); cksem_post(control, &cthreads[close_thread].cksem); if (++close_thread == control->threads) close_thread = 0; } for (i = 0; i < sinfo->num_streams; i++) rewrite_encrypted(control, sinfo, sinfo->s[i].last_headofs); } /* Note that sinfo->s and sinfo are not released here but after compression * has completed as they cannot be freed immediately because their values * are read after the next stream has started. */ return 0; } /* Add to an runzip list to safely deallocate memory after all threads have * returned. */ static void add_to_rulist(rzip_control *control, struct stream_info *sinfo) { struct runzip_node *node = calloc(sizeof(struct runzip_node), 1); if (unlikely(!node)) failure("Failed to calloc struct node in add_rulist\n"); node->sinfo = sinfo; node->pthreads = control->pthreads; lock_mutex(control, &control->control_lock); node->prev = control->ruhead; control->ruhead = node; unlock_mutex(control, &control->control_lock); } /* close down an input stream */ int close_stream_in(rzip_control *control, void *ss) { struct stream_info *sinfo = ss; int i; print_maxverbose("Closing stream at %lld, want to seek to %lld\n", get_readseek(control, control->fd_in), sinfo->initial_pos + sinfo->total_read); if (unlikely(read_seekto(control, sinfo, sinfo->total_read))) return -1; for (i = 0; i < sinfo->num_streams; i++) dealloc(sinfo->s[i].buf); output_thread = 0; /* We cannot safely release the sinfo and pthread data here till all * threads are shut down. */ add_to_rulist(control, sinfo); return 0; } /* As others are slow and lz4 very fast, it is worth doing a quick lz4 pass to see if there is any compression at all with lz4 first. It is unlikely that others will be able to compress if lz4 is unable to drop a single byte so do not compress any block that is incompressible by lz4. */ static int lz4_compresses(rzip_control *control, uchar *s_buf, i64 s_len) { int dlen, test_len; char *c_buf = NULL, *test_buf = (char *)s_buf; int ret = 0; int workcounter = 0; /* count # of passes */ int best_dlen = INT_MAX; /* save best compression estimate */ if (!LZ4_TEST) return 1; dlen = MIN(s_len, STREAM_BUFSIZE); test_len = MIN(dlen, STREAM_BUFSIZE >> 8); c_buf = malloc(dlen); if (unlikely(!c_buf)) fatal_return(("Unable to allocate c_buf in lz4_compresses\n"), 0); /* Test progressively larger blocks at a time and as soon as anything compressible is found, jump out as a success */ do { int lz4_ret; workcounter++; lz4_ret = LZ4_compress_default((const char *)test_buf, c_buf, test_len, dlen); if (!lz4_ret) // Bigger than dlen lz4_ret = test_len; if (lz4_ret < best_dlen) best_dlen = lz4_ret; if (lz4_ret < test_len) { ret = 1; break; } /* expand test length */ test_len <<= 1; } while (test_len <= dlen); if (!ret) print_maxverbose("lz4 testing FAILED for chunk %ld. %d Passes\n", workcounter); else { print_maxverbose("lz4 testing OK for chunk %ld. Compressed size = %5.2F%% of chunk, %d Passes\n", s_len, 100 * ((double) best_dlen / (double) test_len), workcounter); } dealloc(c_buf); return ret; } lrzip-0.651/stream.h000066400000000000000000000041311421175057200143340ustar00rootroot00000000000000/* Copyright (C) 2006-2016 Con Kolivas Copyright (C) 2011 Peter Hyman Copyright (C) 1998-2003 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef LRZIP_STREAM_H #define LRZIP_STREAM_H #include "lrzip_private.h" #include bool create_pthread(rzip_control *control, pthread_t *thread, pthread_attr_t * attr, void * (*start_routine)(void *), void *arg); bool join_pthread(pthread_t th, void **thread_return); bool init_mutex(rzip_control *control, pthread_mutex_t *mutex); bool unlock_mutex(rzip_control *control, pthread_mutex_t *mutex); bool lock_mutex(rzip_control *control, pthread_mutex_t *mutex); ssize_t write_1g(rzip_control *control, void *buf, i64 len); ssize_t read_1g(rzip_control *control, int fd, void *buf, i64 len); i64 get_readseek(rzip_control *control, int fd); bool prepare_streamout_threads(rzip_control *control); bool close_streamout_threads(rzip_control *control); void *open_stream_out(rzip_control *control, int f, unsigned int n, i64 chunk_limit, char cbytes); void *open_stream_in(rzip_control *control, int f, int n, char cbytes); void flush_buffer(rzip_control *control, struct stream_info *sinfo, int stream); void write_stream(rzip_control *control, void *ss, int streamno, uchar *p, i64 len); i64 read_stream(rzip_control *control, void *ss, int streamno, uchar *p, i64 len); int close_stream_out(rzip_control *control, void *ss); int close_stream_in(rzip_control *control, void *ss); ssize_t put_fdout(rzip_control *control, void *offset_buf, ssize_t ret); #endif lrzip-0.651/util.c000066400000000000000000000335311421175057200140170ustar00rootroot00000000000000/* Copyright (C) 2006-2016,2021-2022 Con Kolivas Copyright (C) 2011 Serge Belyshev Copyright (C) 2008, 2011 Peter Hyman Copyright (C) 1998 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* Utilities used in rzip tridge, June 1996 */ /* * Realloc removed * Functions added * read_config() * Peter Hyman, December 2008 */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #ifdef HAVE_UNISTD_H # include #endif #include #ifdef _SC_PAGE_SIZE # define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) #else # define PAGE_SIZE (4096) #endif #include #include #include #include #include "lrzip_private.h" #include "util.h" #include "sha4.h" #include "aes.h" #ifdef HAVE_CTYPE_H # include #endif /* Macros for testing parameters */ #define isparameter( parmstring, value ) (!strcasecmp( parmstring, value )) #define iscaseparameter( parmvalue, value ) (!strcmp( parmvalue, value )) void register_infile(rzip_control *control, const char *name, char delete) { control->util_infile = name; control->delete_infile = delete; } void register_outfile(rzip_control *control, const char *name, char delete) { control->util_outfile = name; control->delete_outfile = delete; } void register_outputfile(rzip_control *control, FILE *f) { control->outputfile = f; } void unlink_files(rzip_control *control) { /* Delete temporary files generated for testing or faking stdio */ if (control->util_outfile && control->delete_outfile) unlink(control->util_outfile); if (control->util_infile && control->delete_infile) unlink(control->util_infile); } void fatal_exit(rzip_control *control) { struct termios termios_p; /* Make sure we haven't died after disabling stdin echo */ tcgetattr(fileno(stdin), &termios_p); termios_p.c_lflag |= ECHO; tcsetattr(fileno(stdin), 0, &termios_p); unlink_files(control); if (!STDOUT && !TEST_ONLY && control->outfile) { if (!KEEP_BROKEN) { print_verbose("Deleting broken file %s\n", control->outfile); unlink(control->outfile); } else print_verbose("Keeping broken file %s as requested\n", control->outfile); } fprintf(control->outputfile, "Fatal error - exiting\n"); fflush(control->outputfile); exit(1); } void setup_overhead(rzip_control *control) { /* Work out the compression overhead per compression thread for the * compression back-ends that need a lot of ram */ if (LZMA_COMPRESS) { int level = control->compression_level * 7 / 9; if (!level) level = 1; i64 dictsize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26))); control->overhead = (dictsize * 23 / 2) + (6 * 1024 * 1024) + 16384; /* LZMA spec shows memory requirements as 6MB, not 4MB and state size * where default is 16KB */ } else if (ZPAQ_COMPRESS) control->overhead = 112 * 1024 * 1024; } void setup_ram(rzip_control *control) { /* Use less ram when using STDOUT to store the temporary output file. */ if (STDOUT && ((STDIN && DECOMPRESS) || !(DECOMPRESS || TEST_ONLY))) control->maxram = control->ramsize / 6; else control->maxram = control->ramsize / 3; if (BITS32) { /* Decrease usable ram size on 32 bits due to kernel / * userspace split. Cannot allocate larger than a 1 * gigabyte chunk due to 32 bit signed long being * used in alloc, and at most 3GB can be malloced, and * 2/3 of that makes for a total of 2GB to be split * into thirds. */ control->usable_ram = MAX(control->ramsize - 900000000ll, 900000000ll); control->maxram = MIN(control->maxram, control->usable_ram); control->maxram = MIN(control->maxram, one_g * 2 / 3); } else control->usable_ram = control->maxram; round_to_page(&control->maxram); } void round_to_page(i64 *size) { *size -= *size % PAGE_SIZE; if (unlikely(!*size)) *size = PAGE_SIZE; } size_t round_up_page(rzip_control *control, size_t len) { int rem = len % control->page_size; if (rem) len += control->page_size - rem; return len; } bool get_rand(rzip_control *control, uchar *buf, int len) { int fd, i; fd = open("/dev/urandom", O_RDONLY); if (fd == -1) { for (i = 0; i < len; i++) buf[i] = (uchar)random(); } else { if (unlikely(read(fd, buf, len) != len)) fatal_return(("Failed to read fd in get_rand\n"), false); if (unlikely(close(fd))) fatal_return(("Failed to close fd in get_rand\n"), false); } return true; } bool read_config(rzip_control *control) { /* check for lrzip.conf in ., $HOME/.lrzip and /etc/lrzip */ char *HOME, homeconf[255]; char *parametervalue; char *parameter; char line[255]; FILE *fp; fp = fopen("lrzip.conf", "r"); if (fp) fprintf(control->msgout, "Using configuration file ./lrzip.conf\n"); if (fp == NULL) { HOME=getenv("HOME"); if (HOME) { snprintf(homeconf, sizeof(homeconf), "%s/.lrzip/lrzip.conf", HOME); fp = fopen(homeconf, "r"); if (fp) fprintf(control->msgout, "Using configuration file %s\n", homeconf); } } if (fp == NULL) { fp = fopen("/etc/lrzip/lrzip.conf", "r"); if (fp) fprintf(control->msgout, "Using configuration file /etc/lrzip/lrzip.conf\n"); } if (fp == NULL) return false; /* if we get here, we have a file. read until no more. */ while ((fgets(line, 255, fp)) != NULL) { if (strlen(line)) line[strlen(line) - 1] = '\0'; parameter = strtok(line, " ="); if (parameter == NULL) continue; /* skip if whitespace or # */ if (isspace(*parameter)) continue; if (*parameter == '#') continue; parametervalue = strtok(NULL, " ="); if (parametervalue == NULL) continue; /* have valid parameter line, now assign to control */ if (isparameter(parameter, "window")) control->window = atoi(parametervalue); else if (isparameter(parameter, "unlimited")) { if (isparameter(parametervalue, "yes")) control->flags |= FLAG_UNLIMITED; } else if (isparameter(parameter, "compressionlevel")) { control->compression_level = atoi(parametervalue); if ( control->compression_level < 1 || control->compression_level > 9 ) failure_return(("CONF.FILE error. Compression Level must between 1 and 9"), false); } else if (isparameter(parameter, "compressionmethod")) { /* valid are rzip, gzip, bzip2, lzo, lzma (default), and zpaq */ if (control->flags & FLAG_NOT_LZMA) failure_return(("CONF.FILE error. Can only specify one compression method"), false); if (isparameter(parametervalue, "bzip2")) control->flags |= FLAG_BZIP2_COMPRESS; else if (isparameter(parametervalue, "gzip")) control->flags |= FLAG_ZLIB_COMPRESS; else if (isparameter(parametervalue, "lzo")) control->flags |= FLAG_LZO_COMPRESS; else if (isparameter(parametervalue, "rzip")) control->flags |= FLAG_NO_COMPRESS; else if (isparameter(parametervalue, "zpaq")) control->flags |= FLAG_ZPAQ_COMPRESS; else if (!isparameter(parametervalue, "lzma")) /* oops, not lzma! */ failure_return(("CONF.FILE error. Invalid compression method %s specified\n",parametervalue), false); } else if (isparameter(parameter, "lzotest")) { /* default is yes */ if (isparameter(parametervalue, "no")) control->flags &= ~FLAG_THRESHOLD; } else if (isparameter(parameter, "hashcheck")) { if (isparameter(parametervalue, "yes")) { control->flags |= FLAG_CHECK; control->flags |= FLAG_HASH; } } else if (isparameter(parameter, "showhash")) { if (isparameter(parametervalue, "yes")) control->flags |= FLAG_HASH; } else if (isparameter(parameter, "outputdirectory")) { control->outdir = malloc(strlen(parametervalue) + 2); if (!control->outdir) fatal_return(("Fatal Memory Error in read_config"), false); strcpy(control->outdir, parametervalue); if (strcmp(parametervalue + strlen(parametervalue) - 1, "/")) strcat(control->outdir, "/"); } else if (isparameter(parameter,"verbosity")) { if (control->flags & FLAG_VERBOSE) failure_return(("CONF.FILE error. Verbosity already defined."), false); if (isparameter(parametervalue, "yes")) control->flags |= FLAG_VERBOSITY; else if (isparameter(parametervalue,"max")) control->flags |= FLAG_VERBOSITY_MAX; else /* oops, unrecognized value */ print_err("lrzip.conf: Unrecognized verbosity value %s. Ignored.\n", parametervalue); } else if (isparameter(parameter, "showprogress")) { /* Yes by default */ if (isparameter(parametervalue, "NO")) control->flags &= ~FLAG_SHOW_PROGRESS; } else if (isparameter(parameter,"nice")) { control->nice_val = atoi(parametervalue); if (control->nice_val < -20 || control->nice_val > 19) failure_return(("CONF.FILE error. Nice must be between -20 and 19"), false); } else if (isparameter(parameter, "keepbroken")) { if (isparameter(parametervalue, "yes" )) control->flags |= FLAG_KEEP_BROKEN; } else if (iscaseparameter(parameter, "DELETEFILES")) { /* delete files must be case sensitive */ if (iscaseparameter(parametervalue, "YES")) control->flags &= ~FLAG_KEEP_FILES; } else if (iscaseparameter(parameter, "REPLACEFILE")) { /* replace lrzip file must be case sensitive */ if (iscaseparameter(parametervalue, "YES")) control->flags |= FLAG_FORCE_REPLACE; } else if (isparameter(parameter, "tmpdir")) { control->tmpdir = realloc(NULL, strlen(parametervalue) + 2); if (!control->tmpdir) fatal_return(("Fatal Memory Error in read_config"), false); strcpy(control->tmpdir, parametervalue); if (strcmp(parametervalue + strlen(parametervalue) - 1, "/")) strcat(control->tmpdir, "/"); } else if (isparameter(parameter, "encrypt")) { if (isparameter(parameter, "YES")) control->flags |= FLAG_ENCRYPT; } else /* oops, we have an invalid parameter, display */ print_err("lrzip.conf: Unrecognized parameter value, %s = %s. Continuing.\n",\ parameter, parametervalue); } if (unlikely(fclose(fp))) fatal_return(("Failed to fclose fp in read_config\n"), false); /* fprintf(stderr, "\nWindow = %d \ \nCompression Level = %d \ \nThreshold = %1.2f \ \nOutput Directory = %s \ \nFlags = %d\n", control->window,control->compression_level, control->threshold, control->outdir, control->flags); */ return true; } static void xor128 (void *pa, const void *pb) { i64 *a = pa; const i64 *b = pb; a [0] ^= b [0]; a [1] ^= b [1]; } static void lrz_keygen(const rzip_control *control, const uchar *salt, uchar *key, uchar *iv) { uchar buf [HASH_LEN + SALT_LEN + PASS_LEN]; mlock(buf, HASH_LEN + SALT_LEN + PASS_LEN); memcpy(buf, control->hash, HASH_LEN); memcpy(buf + HASH_LEN, salt, SALT_LEN); memcpy(buf + HASH_LEN + SALT_LEN, control->salt_pass, control->salt_pass_len); sha4(buf, HASH_LEN + SALT_LEN + control->salt_pass_len, key, 0); memcpy(buf, key, HASH_LEN); memcpy(buf + HASH_LEN, salt, SALT_LEN); memcpy(buf + HASH_LEN + SALT_LEN, control->salt_pass, control->salt_pass_len); sha4(buf, HASH_LEN + SALT_LEN + control->salt_pass_len, iv, 0); memset(buf, 0, sizeof(buf)); munlock(buf, sizeof(buf)); } bool lrz_crypt(const rzip_control *control, uchar *buf, i64 len, const uchar *salt, int encrypt) { /* Encryption requires CBC_LEN blocks so we can use ciphertext * stealing to not have to pad the block */ uchar key[HASH_LEN], iv[HASH_LEN]; uchar tmp0[CBC_LEN], tmp1[CBC_LEN]; aes_context aes_ctx; i64 N, M; bool ret = false; /* Generate unique key and IV for each block of data based on salt */ mlock(&aes_ctx, sizeof(aes_ctx)); mlock(key, HASH_LEN); mlock(iv, HASH_LEN); lrz_keygen(control, salt, key, iv); M = len % CBC_LEN; N = len - M; if (encrypt == LRZ_ENCRYPT) { print_maxverbose("Encrypting data \n"); if (unlikely(aes_setkey_enc(&aes_ctx, key, 128))) failure_goto(("Failed to aes_setkey_enc in lrz_crypt\n"), error); aes_crypt_cbc(&aes_ctx, AES_ENCRYPT, N, iv, buf, buf); if (M) { memset(tmp0, 0, CBC_LEN); memcpy(tmp0, buf + N, M); aes_crypt_cbc(&aes_ctx, AES_ENCRYPT, CBC_LEN, iv, tmp0, tmp1); memcpy(buf + N, buf + N - CBC_LEN, M); memcpy(buf + N - CBC_LEN, tmp1, CBC_LEN); } } else { if (unlikely(aes_setkey_dec(&aes_ctx, key, 128))) failure_goto(("Failed to aes_setkey_dec in lrz_crypt\n"), error); print_maxverbose("Decrypting data \n"); if (M) { aes_crypt_cbc(&aes_ctx, AES_DECRYPT, N - CBC_LEN, iv, buf, buf); aes_crypt_ecb(&aes_ctx, AES_DECRYPT, buf + N - CBC_LEN, tmp0); memset(tmp1, 0, CBC_LEN); memcpy(tmp1, buf + N, M); xor128(tmp0, tmp1); memcpy(buf + N, tmp0, M); memcpy(tmp1 + M, tmp0 + M, CBC_LEN - M); aes_crypt_ecb(&aes_ctx, AES_DECRYPT, tmp1, buf + N - CBC_LEN); xor128(buf + N - CBC_LEN, iv); } else aes_crypt_cbc(&aes_ctx, AES_DECRYPT, len, iv, buf, buf); } ret = true; error: memset(&aes_ctx, 0, sizeof(aes_ctx)); memset(iv, 0, HASH_LEN); memset(key, 0, HASH_LEN); munlock(&aes_ctx, sizeof(aes_ctx)); munlock(iv, HASH_LEN); munlock(key, HASH_LEN); return ret; } void lrz_stretch(rzip_control *control) { sha4_context ctx; i64 j, n, counter; mlock(&ctx, sizeof(ctx)); sha4_starts(&ctx, 0); n = control->encloops * HASH_LEN / (control->salt_pass_len + sizeof(i64)); print_maxverbose("Hashing passphrase %lld (%lld) times \n", control->encloops, n); for (j = 0; j < n; j ++) { counter = htole64(j); sha4_update(&ctx, (uchar *)&counter, sizeof(counter)); sha4_update(&ctx, control->salt_pass, control->salt_pass_len); } sha4_finish(&ctx, control->hash); memset(&ctx, 0, sizeof(ctx)); munlock(&ctx, sizeof(ctx)); } lrzip-0.651/util.h000066400000000000000000000121161421175057200140200ustar00rootroot00000000000000/* Copyright (C) 2006-2016 Con Kolivas Copyright (C) 2011 Peter Hyman Copyright (C) 1998 Andrew Tridgell This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef LRZIP_UTIL_H #define LRZIP_UTIL_H #include "lrzip_private.h" #include #include #include #include #include void register_infile(rzip_control *control, const char *name, char delete); void register_outfile(rzip_control *control, const char *name, char delete); void unlink_files(rzip_control *control); void register_outputfile(rzip_control *control, FILE *f); void fatal_exit(rzip_control *control); /* Failure when there is likely to be a meaningful error in perror */ static inline void fatal(const rzip_control *control, unsigned int line, const char *file, const char *func, const char *format, ...) { va_list ap; va_start(ap, format); if (!control->log_cb) { vfprintf(stderr, format, ap); perror(NULL); } else control->log_cb(control->log_data, 0, line, file, func, format, ap); va_end(ap); if (!control->library_mode) fatal_exit((rzip_control*)control); } #ifdef fatal # undef fatal #endif #define fatal(...) fatal(control, __LINE__, __FILE__, __func__, __VA_ARGS__) #define fatal_return(stuff, ...) do { \ fatal stuff; \ return __VA_ARGS__; \ } while (0) #define fatal_goto(stuff, label) do { \ fatal stuff; \ goto label; \ } while (0) static inline void failure(const rzip_control *control, unsigned int line, const char *file, const char *func, const char *format, ...) { va_list ap; va_start(ap, format); if (!control->log_cb) vfprintf(stderr, format, ap); else control->log_cb(control->log_data, 0, line, file, func, format, ap); va_end(ap); if (!control->library_mode) fatal_exit((rzip_control*)control); } #ifdef failure # undef failure #endif #define failure(...) failure(control, __LINE__, __FILE__, __func__, __VA_ARGS__) #define failure_return(stuff, ...) do { \ failure stuff; \ return __VA_ARGS__; \ } while (0) #define failure_goto(stuff, label) do { \ failure stuff; \ goto label; \ } while (0) void setup_overhead(rzip_control *control); void setup_ram(rzip_control *control); void round_to_page(i64 *size); size_t round_up_page(rzip_control *control, size_t len); bool get_rand(rzip_control *control, uchar *buf, int len); bool read_config(rzip_control *control); void lrz_stretch(rzip_control *control); void lrz_stretch2(rzip_control *control); bool lrz_crypt(const rzip_control *control, uchar *buf, i64 len, const uchar *salt, int encrypt); #define LRZ_DECRYPT (0) #define LRZ_ENCRYPT (1) static inline bool lrz_encrypt(const rzip_control *control, uchar *buf, i64 len, const uchar *salt) { return lrz_crypt(control, buf, len, salt, LRZ_ENCRYPT); } static inline bool lrz_decrypt(const rzip_control *control, uchar *buf, i64 len, const uchar *salt) { return lrz_crypt(control, buf, len, salt, LRZ_DECRYPT); } /* ck specific wrappers for true unnamed semaphore usage on platforms * that support them and for apple which does not. We use a single byte across * a pipe to emulate semaphore behaviour there. */ #ifdef __APPLE__ static inline void cksem_init(const rzip_control *control, cksem_t *cksem) { int flags, fd, i; if (pipe(cksem->pipefd) == -1) fatal("Failed pipe errno=%d", errno); /* Make the pipes FD_CLOEXEC to allow them to close should we call * execv on restart. */ for (i = 0; i < 2; i++) { fd = cksem->pipefd[i]; flags = fcntl(fd, F_GETFD, 0); flags |= FD_CLOEXEC; if (fcntl(fd, F_SETFD, flags) == -1) fatal("Failed to fcntl errno=%d", errno); } } static inline void cksem_post(const rzip_control *control, cksem_t *cksem) { const char buf = 1; int ret; ret = write(cksem->pipefd[1], &buf, 1); if (unlikely(ret == 0)) fatal("Failed to write in cksem_post errno=%d", errno); } static inline void cksem_wait(const rzip_control *control, cksem_t *cksem) { char buf; int ret; ret = read(cksem->pipefd[0], &buf, 1); if (unlikely(ret == 0)) fatal("Failed to read in cksem_post errno=%d", errno); } #else static inline void cksem_init(const rzip_control *control, cksem_t *cksem) { int ret; if ((ret = sem_init(cksem, 0, 0))) fatal("Failed to sem_init ret=%d errno=%d", ret, errno); } static inline void cksem_post(const rzip_control *control, cksem_t *cksem) { if (unlikely(sem_post(cksem))) fatal("Failed to sem_post errno=%d cksem=0x%p", errno, cksem); } static inline void cksem_wait(const rzip_control *control, cksem_t *cksem) { if (unlikely(sem_wait(cksem))) fatal("Failed to sem_wait errno=%d cksem=0x%p", errno, cksem); } #endif #endif lrzip-0.651/util/000077500000000000000000000000001421175057200136465ustar00rootroot00000000000000lrzip-0.651/util/gitdesc.sh000077500000000000000000000036161421175057200156350ustar00rootroot00000000000000#!/bin/bash # Peter Hyman, pete@peterhyman.com # December 2020 # This program will return commit references based on Tags and Annotated Tags from git describe usage() { cat >&2 < 0 then add it and commit to micro version # Expected format is: # v#.###-g####### init() { describe_tag=$(git describe $tagopt --long --abbrev=7) describe_tag=${describe_tag/v/} describe_tag=${describe_tag/g/} commit=$(echo $describe_tag | cut -d- -f3) tagrev=$(echo $describe_tag | cut -d- -f2) version=$(echo $describe_tag | cut -d- -f1) micro=${version: -2} [ $tagrev -gt 0 ] && micro=$micro-$tagrev-$commit minor=${version: -3:1} major=$(echo $version | cut -d. -f1) } [ ! $(which git) ] && die "Something very wrong: git not found." [ $# -eq 0 ] && die "Must provide a command and optional argument." # are we getting a release only? if [ $# -eq 2 ]; then if [ $2 = "-r" ]; then tagopt="" else die "Invalid option. Must be -r or nothing." fi fi init case "$1" in "all" ) retval=$describe_tag ;; "commit" ) retval=$commit ;; "tagrev" ) retval=$tagrev ;; "version" ) retval=$version ;; "major" ) retval=$major ;; "minor" ) retval=$minor ;; "micro" ) retval=$micro ;; * ) die "Invalid command." ;; esac echo $retval exit 0