pigz-2.3/0000755000076500000240000000000012115050165011452 5ustar madlerstaffpigz-2.3/Makefile0000644000076500000240000000510212115034753013115 0ustar madlerstaffCC=cc CFLAGS=-O3 -Wall -Wextra pigz: pigz.o yarn.o zopfli/deflate.o zopfli/blocksplitter.o zopfli/tree.o zopfli/lz77.o zopfli/cache.o zopfli/hash.o zopfli/util.o zopfli/squeeze.o zopfli/katajainen.o $(CC) -o pigz $^ -lpthread -lz ln -f pigz unpigz pigz.o: pigz.c yarn.h zopfli/deflate.h zopfli/util.h yarn.o: yarn.c yarn.h zopfli/deflate.o: zopfli/deflate.c zopfli/deflate.h zopfli/blocksplitter.h zopfli/lz77.h zopfli/squeeze.h zopfli/tree.h zopfli/util.h zopfli/blocksplitter.o: zopfli/blocksplitter.c zopfli/blocksplitter.h zopfli/deflate.h zopfli/lz77.h zopfli/squeeze.h zopfli/tree.h zopfli/util.h zopfli/tree.o: zopfli/tree.c zopfli/tree.h zopfli/katajainen.h zopfli/util.h zopfli/lz77.o: zopfli/lz77.c zopfli/lz77.h zopfli/cache.h zopfli/hash.h zopfli/util.h zopfli/cache.o: zopfli/cache.c zopfli/cache.h zopfli/util.h zopfli/hash.o: zopfli/hash.c zopfli/hash.h zopfli/util.h zopfli/util.o: zopfli/util.c zopfli/util.h zopfli/squeeze.o: zopfli/squeeze.c zopfli/squeeze.h zopfli/blocksplitter.h zopfli/deflate.h zopfli/tree.h zopfli/util.h zopfli/lz77.h zopfli/katajainen.o: zopfli/katajainen.c zopfli/katajainen.h dev: pigz pigzt pigzn pigzt: pigzt.o yarnt.o $(CC) -o pigzt pigzt.o yarnt.o -lpthread -lz pigzt.o: pigz.c yarn.h $(CC) -Wall -O3 -DDEBUG -g -c -o pigzt.o pigz.c yarnt.o: yarn.c yarn.h $(CC) -Wall -O3 -DDEBUG -g -c -o yarnt.o yarn.c pigzn: pigzn.o $(CC) -o pigzn pigzn.o -lz pigzn.o: pigz.c $(CC) -Wall -O3 -DDEBUG -DNOTHREAD -g -c -o pigzn.o pigz.c test: pigz ./pigz -kf pigz.c ; ./pigz -t pigz.c.gz ./pigz -kfb 32 pigz.c ; ./pigz -t pigz.c.gz ./pigz -kfp 1 pigz.c ; ./pigz -t pigz.c.gz ./pigz -kfz pigz.c ; ./pigz -t pigz.c.zz ./pigz -kfK pigz.c ; ./pigz -t pigz.c.zip printf "" | ./pigz -cdf | wc -c | test `cat` -eq 0 printf "x" | ./pigz -cdf | wc -c | test `cat` -eq 1 printf "xy" | ./pigz -cdf | wc -c | test `cat` -eq 2 printf "xyz" | ./pigz -cdf | wc -c | test `cat` -eq 3 (printf "w" | gzip ; printf "x") | ./pigz -cdf | wc -c | test `cat` -eq 2 (printf "w" | gzip ; printf "xy") | ./pigz -cdf | wc -c | test `cat` -eq 3 (printf "w" | gzip ; printf "xyz") | ./pigz -cdf | wc -c | test `cat` -eq 4 -@if test "`whereis compress | grep /`" != ""; then \ echo 'compress -f < pigz.c | ./unpigz | cmp - pigz.c' ;\ compress -f < pigz.c | ./unpigz | cmp - pigz.c ;\ fi @rm -f pigz.c.gz pigz.c.zz pigz.c.zip tests: dev test ./pigzn -kf pigz.c ; ./pigz -t pigz.c.gz @rm -f pigz.c.gz docs: pigz.pdf pigz.pdf: pigz.1 groff -mandoc -f H -T ps pigz.1 | ps2pdf - pigz.pdf clean: @rm -f *.o zopfli/*.o pigz unpigz pigzn pigzt pigz.c.gz pigz.c.zz pigz.c.zip pigz-2.3/pigz.10000644000076500000240000001134412115037061012510 0ustar madlerstaff.TH PIGZ 1 local .SH NAME pigz, unpigz \- compress or expand files .SH SYNOPSIS .ll +8 .B pigz .RB [ " \-cdfhikKlLnNqrRtTz0..9,11 " ] [ .B -b .I blocksize ] [ .B -p .I threads ] [ .B -S .I suffix ] [ .I "name \&..." ] .ll -8 .br .B unpigz .RB [ " \-cfhikKlLnNqrRtTz " ] [ .B -b .I blocksize ] [ .B -p .I threads ] [ .B -S .I suffix ] [ .I "name \&..." ] .SH DESCRIPTION .I Pigz compresses using threads to make use of multiple processors and cores. The input is broken up into 128 KB chunks with each compressed in parallel. The individual check value for each chunk is also calculated in parallel. The compressed data is written in order to the output, and a combined check value is calculated from the individual check values. .PP The compressed data format generated is in the gzip, zlib, or single-entry zip format using the deflate compression method. The compression produces partial raw deflate streams which are concatenated by a single write thread and wrapped with the appropriate header and trailer, where the trailer contains the combined check value. .PP Each partial raw deflate stream is terminated by an empty stored block (using the Z_SYNC_FLUSH option of zlib), in order to end that partial bit stream at a byte boundary. That allows the partial streams to be concatenated simply as sequences of bytes. This adds a very small four to five byte overhead to the output for each input chunk. .PP The default input block size is 128K, but can be changed with the .B -b option. The number of compress threads is set by default to the number of online processors, which can be changed using the .B -p option. Specifying .B -p 1 avoids the use of threads entirely. .PP The input blocks, while compressed independently, have the last 32K of the previous block loaded as a preset dictionary to preserve the compression effectiveness of deflating in a single thread. This can be turned off using the .B -i or .B --independent option, so that the blocks can be decompressed independently for partial error recovery or for random access. .PP Decompression can't be parallelized, at least not without specially prepared deflate streams for that purpose. As a result, .I pigz uses a single thread (the main thread) for decompression, but will create three other threads for reading, writing, and check calculation, which can speed up decompression under some circumstances. Parallel decompression can be turned off by specifying one process ( .B -dp 1 or .B -tp 1 ). .PP Compressed files can be restored to their original form using .I pigz -d or .I unpigz. .SH OPTIONS .TP .B -# --fast --best Regulate the speed of compression using the specified digit .IR # , where .B \-1 or .B \-\-fast indicates the fastest compression method (less compression) and .B \-9 or .B \-\-best indicates the slowest compression method (best compression). .B -0 is no compression. .B \-11 gives a few percent better compression at a severe cost in execution time. The default is .B \-6. .TP .B -b --blocksize mmm Set compression block size to mmmK (default 128KiB). .TP .B -c --stdout --to-stdout Write all processed output to stdout (won't delete). .TP .B -d --decompress --uncompress Decompress the compressed input. .TP .B -f --force Force overwrite, compress .gz, links, and to terminal. .TP .B -h --help Display a help screen and quit. .TP .B -i --independent Compress blocks independently for damage recovery. .TP .B -k --keep Do not delete original file after processing. .TP .B -K --zip Compress to PKWare zip (.zip) single entry format. .TP .B -l --list List the contents of the compressed input. .TP .B -L --license Display the .I pigz license and quit. .TP .B -n --no-name Do not store or restore file name in/from header. .TP .B -N --name Store/restore file name and mod time in/from header. .TP .B -p --processes n Allow up to n processes (default is the number of online processors) .TP .B -q --quiet --silent Print no messages, even on error. .TP .B -r --recursive Process the contents of all subdirectories. .TP .B -R --rsyncable Input-determined block locations for rsync. .TP .B -S --suffix .sss Use suffix .sss instead of .gz (for compression). .TP .B -t --test Test the integrity of the compressed input. .TP .B -T --no-time Do not store or restore mod time in/from header. .TP .B -v --verbose Provide more verbose output. .TP .B -V --version Show the version of pigz. .TP .B -z --zlib Compress to zlib (.zz) instead of gzip format. .TP .B -- All arguments after "--" are treated as file names (for names that start with "-") .SH "COPYRIGHT NOTICE" This software is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages arising from the use of this software. .PP Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Mark Adler pigz-2.3/pigz.c0000644000076500000240000042273212115050165012601 0ustar madlerstaff/* pigz.c -- parallel implementation of gzip * Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Mark Adler * Version 2.3 3 Mar 2013 Mark Adler */ /* This software is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. Mark Adler madler@alumni.caltech.edu Mark accepts donations for providing this software. Donations are not required or expected. Any amount that you feel is appropriate would be appreciated. You can use this link: https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=536055 */ /* Version history: 1.0 17 Jan 2007 First version, pipe only 1.1 28 Jan 2007 Avoid void * arithmetic (some compilers don't get that) Add note about requiring zlib 1.2.3 Allow compression level 0 (no compression) Completely rewrite parallelism -- add a write thread Use deflateSetDictionary() to make use of history Tune argument defaults to best performance on four cores 1.2.1 1 Feb 2007 Add long command line options, add all gzip options Add debugging options 1.2.2 19 Feb 2007 Add list (--list) function Process file names on command line, write .gz output Write name and time in gzip header, set output file time Implement all command line options except --recursive Add --keep option to prevent deleting input files Add thread tracing information with -vv used Copy crc32_combine() from zlib (shared libraries issue) 1.3 25 Feb 2007 Implement --recursive Expand help to show all options Show help if no arguments or output piping are provided Process options in GZIP environment variable Add progress indicator to write thread if --verbose 1.4 4 Mar 2007 Add --independent to facilitate damaged file recovery Reallocate jobs for new --blocksize or --processes Do not delete original if writing to stdout Allow --processes 1, which does no threading Add NOTHREAD define to compile without threads Incorporate license text from zlib in source code 1.5 25 Mar 2007 Reinitialize jobs for new compression level Copy attributes and owner from input file to output file Add decompression and testing Add -lt (or -ltv) to show all entries and proper lengths Add decompression, testing, listing of LZW (.Z) files Only generate and show trace log if DEBUG defined Take "-" argument to mean read file from stdin 1.6 30 Mar 2007 Add zlib stream compression (--zlib), and decompression 1.7 29 Apr 2007 Decompress first entry of a zip file (if deflated) Avoid empty deflate blocks at end of deflate stream Show zlib check value (Adler-32) when listing Don't complain when decompressing empty file Warn about trailing junk for gzip and zlib streams Make listings consistent, ignore gzip extra flags Add zip stream compression (--zip) 1.8 13 May 2007 Document --zip option in help output 2.0 19 Oct 2008 Complete rewrite of thread usage and synchronization Use polling threads and a pool of memory buffers Remove direct pthread library use, hide in yarn.c 2.0.1 20 Oct 2008 Check version of zlib at compile time, need >= 1.2.3 2.1 24 Oct 2008 Decompress with read, write, inflate, and check threads Remove spurious use of ctime_r(), ctime() more portable Change application of job->calc lock to be a semaphore Detect size of off_t at run time to select %lu vs. %llu #define large file support macro even if not __linux__ Remove _LARGEFILE64_SOURCE, _FILE_OFFSET_BITS is enough Detect file-too-large error and report, blame build Replace check combination routines with those from zlib 2.1.1 28 Oct 2008 Fix a leak for files with an integer number of blocks Update for yarn 1.1 (yarn_prefix and yarn_abort) 2.1.2 30 Oct 2008 Work around use of beta zlib in production systems 2.1.3 8 Nov 2008 Don't use zlib combination routines, put back in pigz 2.1.4 9 Nov 2008 Fix bug when decompressing very short files 2.1.5 20 Jul 2009 Added 2008, 2009 to --license statement Allow numeric parameter immediately after -p or -b Enforce parameter after -p, -b, -s, before other options Enforce numeric parameters to have only numeric digits Try to determine the number of processors for -p default Fix --suffix short option to be -S to match gzip [Bloch] Decompress if executable named "unpigz" [Amundsen] Add a little bit of testing to Makefile 2.1.6 17 Jan 2010 Added pigz.spec to distribution for RPM systems [Brown] Avoid some compiler warnings Process symbolic links if piping to stdout [Hoffstätte] Decompress if executable named "gunzip" [Hoffstätte] Allow ".tgz" suffix [Chernookiy] Fix adler32 comparison on .zz files 2.1.7 17 Dec 2011 Avoid unused parameter warning in reenter() Don't assume 2's complement ints in compress_thread() Replicate gzip -cdf cat-like behavior Replicate gzip -- option to suppress option decoding Test output from make test instead of showing it Updated pigz.spec to install unpigz, pigz.1 [Obermaier] Add PIGZ environment variable [Mueller] Replicate gzip suffix search when decoding or listing Fix bug in load() to set in_left to zero on end of file Do not check suffix when input file won't be modified Decompress to stdout if name is "*cat" [Hayasaka] Write data descriptor signature to be like Info-ZIP Update and sort options list in help Use CC variable for compiler in Makefile Exit with code 2 if a warning has been issued Fix thread synchronization problem when tracing Change macro name MAX to MAX2 to avoid library conflicts Determine number of processors on HP-UX [Lloyd] 2.2 31 Dec 2011 Check for expansion bound busting (e.g. modified zlib) Make the "threads" list head global variable volatile Fix construction and printing of 32-bit check values Add --rsyncable functionality 2.2.1 1 Jan 2012 Fix bug in --rsyncable buffer management 2.2.2 1 Jan 2012 Fix another bug in --rsyncable buffer management 2.2.3 15 Jan 2012 Remove volatile in yarn.c Reduce the number of input buffers Change initial rsyncable hash to comparison value Improve the efficiency of arriving at a byte boundary Add thread portability #defines from yarn.c Have rsyncable compression be independent of threading Fix bug where constructed dictionaries not being used 2.2.4 11 Mar 2012 Avoid some return value warnings Improve the portability of printing the off_t type Check for existence of compress binary before using Update zlib version checking to 1.2.6 for new functions Fix bug in zip (-K) output Fix license in pigz.spec Remove thread portability #defines in pigz.c 2.2.5 28 Jul 2012 Avoid race condition in free_pool() Change suffix to .tar when decompressing or listing .tgz Print name of executable in error messages Show help properly when the name is unpigz or gunzip Fix permissions security problem before output is closed 2.3 3 Mar 2013 Don't complain about missing suffix when not writing output file Put all global variables in one global structure for readability Do not decompress concatenated zlib streams -- only gzip streams Add option for compression level 11 to use zopfli Fix handling of junk after compressed data */ #define VERSION "pigz 2.3\n" /* To-do: - make source portable for Windows, VMS, etc. (see gzip source code) - make build portable (currently good for Unixish) */ /* pigz compresses using threads to make use of multiple processors and cores. The input is broken up into 128 KB chunks with each compressed in parallel. The individual check value for each chunk is also calculated in parallel. The compressed data is written in order to the output, and a combined check value is calculated from the individual check values. The compressed data format generated is in the gzip, zlib, or single-entry zip format using the deflate compression method. The compression produces partial raw deflate streams which are concatenated by a single write thread and wrapped with the appropriate header and trailer, where the trailer contains the combined check value. Each partial raw deflate stream is terminated by an empty stored block (using the Z_SYNC_FLUSH option of zlib), in order to end that partial bit stream at a byte boundary, unless that partial stream happens to already end at a byte boundary (the latter requires zlib 1.2.6 or later). Ending on a byte boundary allows the partial streams to be concatenated simply as sequences of bytes. This adds a very small four to five byte overhead (average 3.75 bytes) to the output for each input chunk. The default input block size is 128K, but can be changed with the -b option. The number of compress threads is set by default to 8, which can be changed using the -p option. Specifying -p 1 avoids the use of threads entirely. pigz will try to determine the number of processors in the machine, in which case if that number is two or greater, pigz will use that as the default for -p instead of 8. The input blocks, while compressed independently, have the last 32K of the previous block loaded as a preset dictionary to preserve the compression effectiveness of deflating in a single thread. This can be turned off using the --independent or -i option, so that the blocks can be decompressed independently for partial error recovery or for random access. Decompression can't be parallelized, at least not without specially prepared deflate streams for that purpose. As a result, pigz uses a single thread (the main thread) for decompression, but will create three other threads for reading, writing, and check calculation, which can speed up decompression under some circumstances. Parallel decompression can be turned off by specifying one process (-dp 1 or -tp 1). pigz requires zlib 1.2.1 or later to allow setting the dictionary when doing raw deflate. Since zlib 1.2.3 corrects security vulnerabilities in zlib version 1.2.1 and 1.2.2, conditionals check for zlib 1.2.3 or later during the compilation of pigz.c. zlib 1.2.4 includes some improvements to Z_FULL_FLUSH and deflateSetDictionary() that permit identical output for pigz with and without threads, which is not possible with zlib 1.2.3. This may be important for uses of pigz -R where small changes in the contents should result in small changes in the archive for rsync. Note that due to the details of how the lower levels of compression result in greater speed, compression level 3 and below does not permit identical pigz output with and without threads. pigz uses the POSIX pthread library for thread control and communication, through the yarn.h interface to yarn.c. yarn.c can be replaced with equivalent implementations using other thread libraries. pigz can be compiled with NOTHREAD #defined to not use threads at all (in which case pigz will not be able to live up to the "parallel" in its name). */ /* Details of parallel compression implementation: When doing parallel compression, pigz uses the main thread to read the input in 'size' sized chunks (see -b), and puts those in a compression job list, each with a sequence number to keep track of the ordering. If it is not the first chunk, then that job also points to the previous input buffer, from which the last 32K will be used as a dictionary (unless -i is specified). This sets a lower limit of 32K on 'size'. pigz launches up to 'procs' compression threads (see -p). Each compression thread continues to look for jobs in the compression list and perform those jobs until instructed to return. When a job is pulled, the dictionary, if provided, will be loaded into the deflate engine and then that input buffer is dropped for reuse. Then the input data is compressed into an output buffer that grows in size if necessary to hold the compressed data. The job is then put into the write job list, sorted by the sequence number. The compress thread however continues to calculate the check value on the input data, either a CRC-32 or Adler-32, possibly in parallel with the write thread writing the output data. Once that's done, the compress thread drops the input buffer and also releases the lock on the check value so that the write thread can combine it with the previous check values. The compress thread has then completed that job, and goes to look for another. All of the compress threads are left running and waiting even after the last chunk is processed, so that they can support the next input to be compressed (more than one input file on the command line). Once pigz is done, it will call all the compress threads home (that'll do pig, that'll do). Before starting to read the input, the main thread launches the write thread so that it is ready pick up jobs immediately. The compress thread puts the write jobs in the list in sequence sorted order, so that the first job in the list is always has the lowest sequence number. The write thread waits for the next write job in sequence, and then gets that job. The job still holds its input buffer, from which the write thread gets the input buffer length for use in check value combination. Then the write thread drops that input buffer to allow its reuse. Holding on to the input buffer until the write thread starts also has the benefit that the read and compress threads can't get way ahead of the write thread and build up a large backlog of unwritten compressed data. The write thread will write the compressed data, drop the output buffer, and then wait for the check value to be unlocked by the compress thread. Then the write thread combines the check value for this chunk with the total check value for eventual use in the trailer. If this is not the last chunk, the write thread then goes back to look for the next output chunk in sequence. After the last chunk, the write thread returns and joins the main thread. Unlike the compress threads, a new write thread is launched for each input stream. The write thread writes the appropriate header and trailer around the compressed data. The input and output buffers are reused through their collection in pools. Each buffer has a use count, which when decremented to zero returns the buffer to the respective pool. Each input buffer has up to three parallel uses: as the input for compression, as the data for the check value calculation, and as a dictionary for compression. Each output buffer has only one use, which is as the output of compression followed serially as data to be written. The input pool is limited in the number of buffers, so that reading does not get way ahead of compression and eat up memory with more input than can be used. The limit is approximately two times the number of compression threads. In the case that reading is fast as compared to compression, that number allows a second set of buffers to be read while the first set of compressions are being performed. The number of output buffers is not directly limited, but is indirectly limited by the release of input buffers to about the same number. */ /* use large file functions if available */ #define _FILE_OFFSET_BITS 64 /* included headers and what is expected from each */ #include /* fflush(), fprintf(), fputs(), getchar(), putc(), */ /* puts(), printf(), vasprintf(), stderr, EOF, NULL, SEEK_END, size_t, off_t */ #include /* exit(), malloc(), free(), realloc(), atol(), */ /* atoi(), getenv() */ #include /* va_start(), va_end(), va_list */ #include /* memset(), memchr(), memcpy(), strcmp(), strcpy() */ /* strncpy(), strlen(), strcat(), strrchr() */ #include /* errno, EEXIST */ #include /* assert() */ #include /* ctime(), time(), time_t, mktime() */ #include /* signal(), SIGINT */ #include /* ssize_t */ #include /* chmod(), stat(), fstat(), lstat(), struct stat, */ /* S_IFDIR, S_IFLNK, S_IFMT, S_IFREG */ #include /* utimes(), gettimeofday(), struct timeval */ #include /* unlink(), _exit(), read(), write(), close(), */ /* lseek(), isatty(), chown() */ #include /* open(), O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, */ /* O_WRONLY */ #include /* opendir(), readdir(), closedir(), DIR, */ /* struct dirent */ #include /* PATH_MAX, UINT_MAX */ #if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3 # include /* intmax_t */ #endif #ifdef __hpux # include # include #endif #include "zlib.h" /* deflateInit2(), deflateReset(), deflate(), */ /* deflateEnd(), deflateSetDictionary(), crc32(), inflateBackInit(), inflateBack(), inflateBackEnd(), Z_DEFAULT_COMPRESSION, Z_DEFAULT_STRATEGY, Z_DEFLATED, Z_NO_FLUSH, Z_NULL, Z_OK, Z_SYNC_FLUSH, z_stream */ #if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1230 # error Need zlib version 1.2.3 or later #endif #ifndef NOTHREAD # include "yarn.h" /* thread, launch(), join(), join_all(), */ /* lock, new_lock(), possess(), twist(), wait_for(), release(), peek_lock(), free_lock(), yarn_name */ #endif #include "zopfli/deflate.h" /* DeflatePart(), Options */ /* for local functions and globals */ #define local static /* prevent end-of-line conversions on MSDOSish operating systems */ #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) # include /* setmode(), O_BINARY */ # define SET_BINARY_MODE(fd) setmode(fd, O_BINARY) #else # define SET_BINARY_MODE(fd) #endif /* release an allocated pointer, if allocated, and mark as unallocated */ #define RELEASE(ptr) \ do { \ if ((ptr) != NULL) { \ free(ptr); \ ptr = NULL; \ } \ } while (0) /* sliding dictionary size for deflate */ #define DICT 32768U /* largest power of 2 that fits in an unsigned int -- used to limit requests to zlib functions that use unsigned int lengths */ #define MAXP2 (UINT_MAX - (UINT_MAX >> 1)) /* rsyncable constants -- RSYNCBITS is the number of bits in the mask for comparison. For random input data, there will be a hit on average every 1<> 1) /* initial pool counts and sizes -- INBUFS is the limit on the number of input spaces as a function of the number of processors (used to throttle the creation of compression jobs), OUTPOOL is the initial size of the output data buffer, chosen to make resizing of the buffer very unlikely and to allow prepending with a dictionary for use as an input buffer for zopfli */ #define INBUFS(p) (((p)<<1)+3) #define OUTPOOL(s) ((s)+((s)>>4)+DICT) /* input buffer size */ #define BUF 32768U /* globals (modified by main thread only when it's the only thread) */ local struct { char *prog; /* name by which pigz was invoked */ int ind; /* input file descriptor */ int outd; /* output file descriptor */ char inf[PATH_MAX+1]; /* input file name (accommodate recursion) */ char *outf; /* output file name (allocated if not NULL) */ int verbosity; /* 0 = quiet, 1 = normal, 2 = verbose, 3 = trace */ int headis; /* 1 to store name, 2 to store date, 3 both */ int pipeout; /* write output to stdout even if file */ int keep; /* true to prevent deletion of input file */ int force; /* true to overwrite, compress links, cat */ int form; /* gzip = 0, zlib = 1, zip = 2 or 3 */ unsigned char magic1; /* first byte of possible header when decoding */ int recurse; /* true to dive down into directory structure */ char *sufx; /* suffix to use (".gz" or user supplied) */ char *name; /* name for gzip header */ time_t mtime; /* time stamp from input file for gzip header */ int list; /* true to list files instead of compress */ int first; /* true if we need to print listing header */ int decode; /* 0 to compress, 1 to decompress, 2 to test */ int level; /* compression level */ int rsync; /* true for rsync blocking */ int procs; /* maximum number of compression threads (>= 1) */ int setdict; /* true to initialize dictionary in each thread */ size_t block; /* uncompressed input size per thread (>= 32K) */ int warned; /* true if a warning has been given */ /* saved gzip/zip header data for decompression, testing, and listing */ time_t stamp; /* time stamp from gzip header */ char *hname; /* name from header (allocated) */ unsigned long zip_crc; /* local header crc */ unsigned long zip_clen; /* local header compressed length */ unsigned long zip_ulen; /* local header uncompressed length */ /* globals for decompression and listing buffered reading */ unsigned char in_buf[BUF]; /* input buffer */ unsigned char *in_next; /* next unused byte in buffer */ size_t in_left; /* number of unused bytes in buffer */ int in_eof; /* true if reached end of file on input */ int in_short; /* true if last read didn't fill buffer */ off_t in_tot; /* total bytes read from input */ off_t out_tot; /* total bytes written to output */ unsigned long out_check; /* check value of output */ #ifndef NOTHREAD /* globals for decompression parallel reading */ unsigned char in_buf2[BUF]; /* second buffer for parallel reads */ size_t in_len; /* data waiting in next buffer */ int in_which; /* -1: start, 0: in_buf2, 1: in_buf */ lock *load_state; /* value = 0 to wait, 1 to read a buffer */ thread *load_thread; /* load_read() thread for joining */ #endif } g; /* display a complaint with the program name on stderr */ local int complain(char *fmt, ...) { va_list ap; if (g.verbosity > 0) { fprintf(stderr, "%s: ", g.prog); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); putc('\n', stderr); fflush(stderr); g.warned = 1; } return 0; } /* exit with error, delete output file if in the middle of writing it */ local int bail(char *why, char *what) { if (g.outd != -1 && g.outf != NULL) unlink(g.outf); complain("abort: %s%s", why, what); exit(1); return 0; } #ifdef DEBUG /* starting time of day for tracing */ local struct timeval start; /* trace log */ local struct log { struct timeval when; /* time of entry */ char *msg; /* message */ struct log *next; /* next entry */ } *log_head, **log_tail = NULL; #ifndef NOTHREAD local lock *log_lock = NULL; #endif /* maximum log entry length */ #define MAXMSG 256 /* set up log (call from main thread before other threads launched) */ local void log_init(void) { if (log_tail == NULL) { #ifndef NOTHREAD log_lock = new_lock(0); #endif log_head = NULL; log_tail = &log_head; } } /* add entry to trace log */ local void log_add(char *fmt, ...) { struct timeval now; struct log *me; va_list ap; char msg[MAXMSG]; gettimeofday(&now, NULL); me = malloc(sizeof(struct log)); if (me == NULL) bail("not enough memory", ""); me->when = now; va_start(ap, fmt); vsnprintf(msg, MAXMSG, fmt, ap); va_end(ap); me->msg = malloc(strlen(msg) + 1); if (me->msg == NULL) { free(me); bail("not enough memory", ""); } strcpy(me->msg, msg); me->next = NULL; #ifndef NOTHREAD assert(log_lock != NULL); possess(log_lock); #endif *log_tail = me; log_tail = &(me->next); #ifndef NOTHREAD twist(log_lock, BY, +1); #endif } /* pull entry from trace log and print it, return false if empty */ local int log_show(void) { struct log *me; struct timeval diff; if (log_tail == NULL) return 0; #ifndef NOTHREAD possess(log_lock); #endif me = log_head; if (me == NULL) { #ifndef NOTHREAD release(log_lock); #endif return 0; } log_head = me->next; if (me->next == NULL) log_tail = &log_head; #ifndef NOTHREAD twist(log_lock, BY, -1); #endif diff.tv_usec = me->when.tv_usec - start.tv_usec; diff.tv_sec = me->when.tv_sec - start.tv_sec; if (diff.tv_usec < 0) { diff.tv_usec += 1000000L; diff.tv_sec--; } fprintf(stderr, "trace %ld.%06ld %s\n", (long)diff.tv_sec, (long)diff.tv_usec, me->msg); fflush(stderr); free(me->msg); free(me); return 1; } /* release log resources (need to do log_init() to use again) */ local void log_free(void) { struct log *me; if (log_tail != NULL) { #ifndef NOTHREAD possess(log_lock); #endif while ((me = log_head) != NULL) { log_head = me->next; free(me->msg); free(me); } #ifndef NOTHREAD twist(log_lock, TO, 0); free_lock(log_lock); log_lock = NULL; #endif log_tail = NULL; } } /* show entries until no more, free log */ local void log_dump(void) { if (log_tail == NULL) return; while (log_show()) ; log_free(); } /* debugging macro */ #define Trace(x) \ do { \ if (g.verbosity > 2) { \ log_add x; \ } \ } while (0) #else /* !DEBUG */ #define log_dump() #define Trace(x) #endif /* read up to len bytes into buf, repeating read() calls as needed */ local size_t readn(int desc, unsigned char *buf, size_t len) { ssize_t ret; size_t got; got = 0; while (len) { ret = read(desc, buf, len); if (ret < 0) bail("read error on ", g.inf); if (ret == 0) break; buf += ret; len -= ret; got += ret; } return got; } /* write len bytes, repeating write() calls as needed */ local void writen(int desc, unsigned char *buf, size_t len) { ssize_t ret; while (len) { ret = write(desc, buf, len); if (ret < 1) { complain("write error code %d", errno); bail("write error on ", g.outf); } buf += ret; len -= ret; } } /* convert Unix time to MS-DOS date and time, assuming current timezone (you got a better idea?) */ local unsigned long time2dos(time_t t) { struct tm *tm; unsigned long dos; if (t == 0) t = time(NULL); tm = localtime(&t); if (tm->tm_year < 80 || tm->tm_year > 207) return 0; dos = (tm->tm_year - 80) << 25; dos += (tm->tm_mon + 1) << 21; dos += tm->tm_mday << 16; dos += tm->tm_hour << 11; dos += tm->tm_min << 5; dos += (tm->tm_sec + 1) >> 1; /* round to double-seconds */ return dos; } /* put a 4-byte integer into a byte array in LSB order or MSB order */ #define PUT2L(a,b) (*(a)=(b)&0xff,(a)[1]=(b)>>8) #define PUT4L(a,b) (PUT2L(a,(b)&0xffff),PUT2L((a)+2,(b)>>16)) #define PUT4M(a,b) (*(a)=(b)>>24,(a)[1]=(b)>>16,(a)[2]=(b)>>8,(a)[3]=(b)) /* write a gzip, zlib, or zip header using the information in the globals */ local unsigned long put_header(void) { unsigned long len; unsigned char head[30]; if (g.form > 1) { /* zip */ /* write local header */ PUT4L(head, 0x04034b50UL); /* local header signature */ PUT2L(head + 4, 20); /* version needed to extract (2.0) */ PUT2L(head + 6, 8); /* flags: data descriptor follows data */ PUT2L(head + 8, 8); /* deflate */ PUT4L(head + 10, time2dos(g.mtime)); PUT4L(head + 14, 0); /* crc (not here) */ PUT4L(head + 18, 0); /* compressed length (not here) */ PUT4L(head + 22, 0); /* uncompressed length (not here) */ PUT2L(head + 26, g.name == NULL ? 1 : /* length of name */ strlen(g.name)); PUT2L(head + 28, 9); /* length of extra field (see below) */ writen(g.outd, head, 30); /* write local header */ len = 30; /* write file name (use "-" for stdin) */ if (g.name == NULL) writen(g.outd, (unsigned char *)"-", 1); else writen(g.outd, (unsigned char *)g.name, strlen(g.name)); len += g.name == NULL ? 1 : strlen(g.name); /* write extended timestamp extra field block (9 bytes) */ PUT2L(head, 0x5455); /* extended timestamp signature */ PUT2L(head + 2, 5); /* number of data bytes in this block */ head[4] = 1; /* flag presence of mod time */ PUT4L(head + 5, g.mtime); /* mod time */ writen(g.outd, head, 9); /* write extra field block */ len += 9; } else if (g.form) { /* zlib */ head[0] = 0x78; /* deflate, 32K window */ head[1] = (g.level >= 9 ? 3 : (g.level == 1 ? 0 : (g.level >= 6 || g.level == Z_DEFAULT_COMPRESSION ? 1 : 2))) << 6; head[1] += 31 - (((head[0] << 8) + head[1]) % 31); writen(g.outd, head, 2); len = 2; } else { /* gzip */ head[0] = 31; head[1] = 139; head[2] = 8; /* deflate */ head[3] = g.name != NULL ? 8 : 0; PUT4L(head + 4, g.mtime); head[8] = g.level >= 9 ? 2 : (g.level == 1 ? 4 : 0); head[9] = 3; /* unix */ writen(g.outd, head, 10); len = 10; if (g.name != NULL) writen(g.outd, (unsigned char *)g.name, strlen(g.name) + 1); if (g.name != NULL) len += strlen(g.name) + 1; } return len; } /* write a gzip, zlib, or zip trailer */ local void put_trailer(unsigned long ulen, unsigned long clen, unsigned long check, unsigned long head) { unsigned char tail[46]; if (g.form > 1) { /* zip */ unsigned long cent; /* write data descriptor (as promised in local header) */ PUT4L(tail, 0x08074b50UL); PUT4L(tail + 4, check); PUT4L(tail + 8, clen); PUT4L(tail + 12, ulen); writen(g.outd, tail, 16); /* write central file header */ PUT4L(tail, 0x02014b50UL); /* central header signature */ tail[4] = 63; /* obeyed version 6.3 of the zip spec */ tail[5] = 255; /* ignore external attributes */ PUT2L(tail + 6, 20); /* version needed to extract (2.0) */ PUT2L(tail + 8, 8); /* data descriptor is present */ PUT2L(tail + 10, 8); /* deflate */ PUT4L(tail + 12, time2dos(g.mtime)); PUT4L(tail + 16, check); /* crc */ PUT4L(tail + 20, clen); /* compressed length */ PUT4L(tail + 24, ulen); /* uncompressed length */ PUT2L(tail + 28, g.name == NULL ? 1 : /* length of name */ strlen(g.name)); PUT2L(tail + 30, 9); /* length of extra field (see below) */ PUT2L(tail + 32, 0); /* no file comment */ PUT2L(tail + 34, 0); /* disk number 0 */ PUT2L(tail + 36, 0); /* internal file attributes */ PUT4L(tail + 38, 0); /* external file attributes (ignored) */ PUT4L(tail + 42, 0); /* offset of local header */ writen(g.outd, tail, 46); /* write central file header */ cent = 46; /* write file name (use "-" for stdin) */ if (g.name == NULL) writen(g.outd, (unsigned char *)"-", 1); else writen(g.outd, (unsigned char *)g.name, strlen(g.name)); cent += g.name == NULL ? 1 : strlen(g.name); /* write extended timestamp extra field block (9 bytes) */ PUT2L(tail, 0x5455); /* extended timestamp signature */ PUT2L(tail + 2, 5); /* number of data bytes in this block */ tail[4] = 1; /* flag presence of mod time */ PUT4L(tail + 5, g.mtime); /* mod time */ writen(g.outd, tail, 9); /* write extra field block */ cent += 9; /* write end of central directory record */ PUT4L(tail, 0x06054b50UL); /* end of central directory signature */ PUT2L(tail + 4, 0); /* number of this disk */ PUT2L(tail + 6, 0); /* disk with start of central directory */ PUT2L(tail + 8, 1); /* number of entries on this disk */ PUT2L(tail + 10, 1); /* total number of entries */ PUT4L(tail + 12, cent); /* size of central directory */ PUT4L(tail + 16, head + clen + 16); /* offset of central directory */ PUT2L(tail + 20, 0); /* no zip file comment */ writen(g.outd, tail, 22); /* write end of central directory record */ } else if (g.form) { /* zlib */ PUT4M(tail, check); writen(g.outd, tail, 4); } else { /* gzip */ PUT4L(tail, check); PUT4L(tail + 4, ulen); writen(g.outd, tail, 8); } } /* compute check value depending on format */ #define CHECK(a,b,c) (g.form == 1 ? adler32(a,b,c) : crc32(a,b,c)) #ifndef NOTHREAD /* -- threaded portions of pigz -- */ /* -- check value combination routines for parallel calculation -- */ #define COMB(a,b,c) (g.form == 1 ? adler32_comb(a,b,c) : crc32_comb(a,b,c)) /* combine two crc-32's or two adler-32's (copied from zlib 1.2.3 so that pigz can be compatible with older versions of zlib) */ /* we copy the combination routines from zlib here, in order to avoid linkage issues with the zlib 1.2.3 builds on Sun, Ubuntu, and others */ local unsigned long gf2_matrix_times(unsigned long *mat, unsigned long vec) { unsigned long sum; sum = 0; while (vec) { if (vec & 1) sum ^= *mat; vec >>= 1; mat++; } return sum; } local void gf2_matrix_square(unsigned long *square, unsigned long *mat) { int n; for (n = 0; n < 32; n++) square[n] = gf2_matrix_times(mat, mat[n]); } local unsigned long crc32_comb(unsigned long crc1, unsigned long crc2, size_t len2) { int n; unsigned long row; unsigned long even[32]; /* even-power-of-two zeros operator */ unsigned long odd[32]; /* odd-power-of-two zeros operator */ /* degenerate case */ if (len2 == 0) return crc1; /* put operator for one zero bit in odd */ odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ row = 1; for (n = 1; n < 32; n++) { odd[n] = row; row <<= 1; } /* put operator for two zero bits in even */ gf2_matrix_square(even, odd); /* put operator for four zero bits in odd */ gf2_matrix_square(odd, even); /* apply len2 zeros to crc1 (first square will put the operator for one zero byte, eight zero bits, in even) */ do { /* apply zeros operator for this bit of len2 */ gf2_matrix_square(even, odd); if (len2 & 1) crc1 = gf2_matrix_times(even, crc1); len2 >>= 1; /* if no more bits set, then done */ if (len2 == 0) break; /* another iteration of the loop with odd and even swapped */ gf2_matrix_square(odd, even); if (len2 & 1) crc1 = gf2_matrix_times(odd, crc1); len2 >>= 1; /* if no more bits set, then done */ } while (len2 != 0); /* return combined crc */ crc1 ^= crc2; return crc1; } #define BASE 65521U /* largest prime smaller than 65536 */ #define LOW16 0xffff /* mask lower 16 bits */ local unsigned long adler32_comb(unsigned long adler1, unsigned long adler2, size_t len2) { unsigned long sum1; unsigned long sum2; unsigned rem; /* the derivation of this formula is left as an exercise for the reader */ rem = (unsigned)(len2 % BASE); sum1 = adler1 & LOW16; sum2 = (rem * sum1) % BASE; sum1 += (adler2 & LOW16) + BASE - 1; sum2 += ((adler1 >> 16) & LOW16) + ((adler2 >> 16) & LOW16) + BASE - rem; if (sum1 >= BASE) sum1 -= BASE; if (sum1 >= BASE) sum1 -= BASE; if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1); if (sum2 >= BASE) sum2 -= BASE; return sum1 | (sum2 << 16); } /* -- pool of spaces for buffer management -- */ /* These routines manage a pool of spaces. Each pool specifies a fixed size buffer to be contained in each space. Each space has a use count, which when decremented to zero returns the space to the pool. If a space is requested from the pool and the pool is empty, a space is immediately created unless a specified limit on the number of spaces has been reached. Only if the limit is reached will it wait for a space to be returned to the pool. Each space knows what pool it belongs to, so that it can be returned. */ /* a space (one buffer for each space) */ struct space { lock *use; /* use count -- return to pool when zero */ unsigned char *buf; /* buffer of size size */ size_t size; /* current size of this buffer */ size_t len; /* for application usage (initially zero) */ struct pool *pool; /* pool to return to */ struct space *next; /* for pool linked list */ }; /* pool of spaces (one pool for each type needed) */ struct pool { lock *have; /* unused spaces available, lock for list */ struct space *head; /* linked list of available buffers */ size_t size; /* size of new buffers in this pool */ int limit; /* number of new spaces allowed, or -1 */ int made; /* number of buffers made */ }; /* initialize a pool (pool structure itself provided, not allocated) -- the limit is the maximum number of spaces in the pool, or -1 to indicate no limit, i.e., to never wait for a buffer to return to the pool */ local void new_pool(struct pool *pool, size_t size, int limit) { pool->have = new_lock(0); pool->head = NULL; pool->size = size; pool->limit = limit; pool->made = 0; } /* get a space from a pool -- the use count is initially set to one, so there is no need to call use_space() for the first use */ local struct space *get_space(struct pool *pool) { struct space *space; /* if can't create any more, wait for a space to show up */ possess(pool->have); if (pool->limit == 0) wait_for(pool->have, NOT_TO_BE, 0); /* if a space is available, pull it from the list and return it */ if (pool->head != NULL) { space = pool->head; possess(space->use); pool->head = space->next; twist(pool->have, BY, -1); /* one less in pool */ twist(space->use, TO, 1); /* initially one user */ space->len = 0; return space; } /* nothing available, don't want to wait, make a new space */ assert(pool->limit != 0); if (pool->limit > 0) pool->limit--; pool->made++; release(pool->have); space = malloc(sizeof(struct space)); if (space == NULL) bail("not enough memory", ""); space->use = new_lock(1); /* initially one user */ space->buf = malloc(pool->size); if (space->buf == NULL) bail("not enough memory", ""); space->size = pool->size; space->len = 0; space->pool = pool; /* remember the pool this belongs to */ return space; } /* compute next size up by multiplying by about 2**(1/3) and round to the next power of 2 if we're close (so three applications results in doubling) -- if small, go up to at least 16, if overflow, go to max size_t value */ local size_t grow(size_t size) { size_t was, top; int shift; was = size; size += size >> 2; top = size; for (shift = 0; top > 7; shift++) top >>= 1; if (top == 7) size = (size_t)1 << (shift + 3); if (size < 16) size = 16; if (size <= was) size = (size_t)0 - 1; return size; } /* increase the size of the buffer in space */ local void grow_space(struct space *space) { size_t more; /* compute next size up */ more = grow(space->size); if (more == space->size) bail("not enough memory", ""); /* reallocate the buffer */ space->buf = realloc(space->buf, more); if (space->buf == NULL) bail("not enough memory", ""); space->size = more; } /* increment the use count to require one more drop before returning this space to the pool */ local void use_space(struct space *space) { possess(space->use); twist(space->use, BY, +1); } /* drop a space, returning it to the pool if the use count is zero */ local void drop_space(struct space *space) { int use; struct pool *pool; possess(space->use); use = peek_lock(space->use); assert(use != 0); if (use == 1) { pool = space->pool; possess(pool->have); space->next = pool->head; pool->head = space; twist(pool->have, BY, +1); } twist(space->use, BY, -1); } /* free the memory and lock resources of a pool -- return number of spaces for debugging and resource usage measurement */ local int free_pool(struct pool *pool) { int count; struct space *space; possess(pool->have); count = 0; while ((space = pool->head) != NULL) { pool->head = space->next; free(space->buf); free_lock(space->use); free(space); count++; } assert(count == pool->made); release(pool->have); free_lock(pool->have); return count; } /* input and output buffer pools */ local struct pool in_pool; local struct pool out_pool; local struct pool dict_pool; local struct pool lens_pool; /* -- parallel compression -- */ /* compress or write job (passed from compress list to write list) -- if seq is equal to -1, compress_thread is instructed to return; if more is false then this is the last chunk, which after writing tells write_thread to return */ struct job { long seq; /* sequence number */ int more; /* true if this is not the last chunk */ struct space *in; /* input data to compress */ struct space *out; /* dictionary or resulting compressed data */ struct space *lens; /* coded list of flush block lengths */ unsigned long check; /* check value for input data */ lock *calc; /* released when check calculation complete */ struct job *next; /* next job in the list (either list) */ }; /* list of compress jobs (with tail for appending to list) */ local lock *compress_have = NULL; /* number of compress jobs waiting */ local struct job *compress_head, **compress_tail; /* list of write jobs */ local lock *write_first; /* lowest sequence number in list */ local struct job *write_head; /* number of compression threads running */ local int cthreads = 0; /* write thread if running */ local thread *writeth = NULL; /* setup job lists (call from main thread) */ local void setup_jobs(void) { /* set up only if not already set up*/ if (compress_have != NULL) return; /* allocate locks and initialize lists */ compress_have = new_lock(0); compress_head = NULL; compress_tail = &compress_head; write_first = new_lock(-1); write_head = NULL; /* initialize buffer pools (initial size for out_pool not critical, since buffers will be grown in size if needed -- initial size chosen to make this unlikely -- same for lens_pool) */ new_pool(&in_pool, g.block, INBUFS(g.procs)); new_pool(&out_pool, OUTPOOL(g.block), -1); new_pool(&dict_pool, DICT, -1); new_pool(&lens_pool, g.block >> (RSYNCBITS - 1), -1); } /* command the compress threads to all return, then join them all (call from main thread), free all the thread-related resources */ local void finish_jobs(void) { struct job job; int caught; /* only do this once */ if (compress_have == NULL) return; /* command all of the extant compress threads to return */ possess(compress_have); job.seq = -1; job.next = NULL; compress_head = &job; compress_tail = &(job.next); twist(compress_have, BY, +1); /* will wake them all up */ /* join all of the compress threads, verify they all came back */ caught = join_all(); Trace(("-- joined %d compress threads", caught)); assert(caught == cthreads); cthreads = 0; /* free the resources */ caught = free_pool(&lens_pool); Trace(("-- freed %d block lengths buffers", caught)); caught = free_pool(&dict_pool); Trace(("-- freed %d dictionary buffers", caught)); caught = free_pool(&out_pool); Trace(("-- freed %d output buffers", caught)); caught = free_pool(&in_pool); Trace(("-- freed %d input buffers", caught)); free_lock(write_first); free_lock(compress_have); compress_have = NULL; } /* compress all strm->avail_in bytes at strm->next_in to out->buf, updating out->len, grow the size of the buffer (out->size) if necessary -- respect the size limitations of the zlib stream data types (size_t may be larger than unsigned) */ local void deflate_engine(z_stream *strm, struct space *out, int flush) { size_t room; do { room = out->size - out->len; if (room == 0) { grow_space(out); room = out->size - out->len; } strm->next_out = out->buf + out->len; strm->avail_out = room < UINT_MAX ? (unsigned)room : UINT_MAX; (void)deflate(strm, flush); out->len = strm->next_out - out->buf; } while (strm->avail_out == 0); assert(strm->avail_in == 0); } /* get the next compression job from the head of the list, compress and compute the check value on the input, and put a job in the write list with the results -- keep looking for more jobs, returning when a job is found with a sequence number of -1 (leave that job in the list for other incarnations to find) */ local void compress_thread(void *dummy) { struct job *job; /* job pulled and working on */ struct job *here, **prior; /* pointers for inserting in write list */ unsigned long check; /* check value of input */ unsigned char *next; /* pointer for blocks, check value data */ size_t left; /* input left to process */ size_t len; /* remaining bytes to compress/check */ #if ZLIB_VERNUM >= 0x1260 int bits; /* deflate pending bits */ #endif struct space *temp; /* temporary space for zopfli input */ Options opts; /* zopfli options */ z_stream strm; /* deflate stream */ (void)dummy; /* initialize the deflate stream for this thread */ strm.zfree = Z_NULL; strm.zalloc = Z_NULL; strm.opaque = Z_NULL; if (deflateInit2(&strm, 6, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) bail("not enough memory", ""); /* keep looking for work */ for (;;) { /* get a job (like I tell my son) */ possess(compress_have); wait_for(compress_have, NOT_TO_BE, 0); job = compress_head; assert(job != NULL); if (job->seq == -1) break; compress_head = job->next; if (job->next == NULL) compress_tail = &compress_head; twist(compress_have, BY, -1); /* got a job -- initialize and set the compression level (note that if deflateParams() is called immediately after deflateReset(), there is no need to initialize the input/output for the stream) */ Trace(("-- compressing #%ld", job->seq)); if (g.level <= 9) { (void)deflateReset(&strm); (void)deflateParams(&strm, g.level, Z_DEFAULT_STRATEGY); } else { /* default zopfli options as set by InitOptions(): verbose = 0 numiterations = 15 blocksplitting = 1 blocksplittinglast = 0 blocksplittingmax = 15 */ InitOptions(&opts); temp = get_space(&out_pool); temp->len = 0; } /* set dictionary if provided, release that input or dictionary buffer (not NULL if dict is true and if this is not the first work unit) */ if (job->out != NULL) { len = job->out->len; left = len < DICT ? len : DICT; if (g.level <= 9) deflateSetDictionary(&strm, job->out->buf + (len - left), left); else { memcpy(temp->buf, job->out->buf + (len - left), left); temp->len = left; } drop_space(job->out); } /* set up input and output */ job->out = get_space(&out_pool); if (g.level <= 9) { strm.next_in = job->in->buf; strm.next_out = job->out->buf; } else memcpy(temp->buf + temp->len, job->in->buf, job->in->len); /* compress each block, either flushing or finishing */ next = job->lens == NULL ? NULL : job->lens->buf; left = job->in->len; job->out->len = 0; do { /* decode next block length from blocks list */ len = next == NULL ? 128 : *next++; if (len < 128) /* 64..32831 */ len = (len << 8) + (*next++) + 64; else if (len == 128) /* end of list */ len = left; else if (len < 192) /* 1..63 */ len &= 0x3f; else { /* 32832..4227135 */ len = ((len & 0x3f) << 16) + (*next++ << 8) + 32832U; len += *next++; } left -= len; if (g.level <= 9) { /* run MAXP2-sized amounts of input through deflate -- this loop is needed for those cases where the unsigned type is smaller than the size_t type, or when len is close to the limit of the size_t type */ while (len > MAXP2) { strm.avail_in = MAXP2; deflate_engine(&strm, job->out, Z_NO_FLUSH); len -= MAXP2; } /* run the last piece through deflate -- end on a byte boundary, using a sync marker if necessary, or finish the deflate stream if this is the last block */ strm.avail_in = (unsigned)len; if (left || job->more) { #if ZLIB_VERNUM >= 0x1260 deflate_engine(&strm, job->out, Z_BLOCK); /* add enough empty blocks to get to a byte boundary */ (void)deflatePending(&strm, Z_NULL, &bits); if (bits & 1) deflate_engine(&strm, job->out, Z_SYNC_FLUSH); else if (bits & 7) { do { /* add static empty blocks */ bits = deflatePrime(&strm, 10, 2); assert(bits == Z_OK); (void)deflatePending(&strm, Z_NULL, &bits); } while (bits & 7); deflate_engine(&strm, job->out, Z_BLOCK); } #else deflate_engine(&strm, job->out, Z_SYNC_FLUSH); #endif } else deflate_engine(&strm, job->out, Z_FINISH); } else { /* compress len bytes using zopfli, bring to byte boundary */ unsigned char bits, *out; size_t outsize; out = NULL; outsize = 0; bits = 0; DeflatePart(&opts, 2, !(left || job->more), temp->buf, temp->len, temp->len + len, &bits, &out, &outsize); assert(job->out->len + outsize + 5 <= job->out->size); memcpy(job->out->buf + job->out->len, out, outsize); free(out); job->out->len += outsize; if (left || job->more) { bits &= 7; if (bits & 1) { if (bits == 7) job->out->buf[job->out->len++] = 0; job->out->buf[job->out->len++] = 0; job->out->buf[job->out->len++] = 0; job->out->buf[job->out->len++] = 0xff; job->out->buf[job->out->len++] = 0xff; } else if (bits) { do { job->out->buf[job->out->len - 1] += 2 << bits; job->out->buf[job->out->len++] = 0; bits += 2; } while (bits < 8); } } temp->len += len; } } while (left); if (g.level > 9) drop_space(temp); if (job->lens != NULL) { drop_space(job->lens); job->lens = NULL; } Trace(("-- compressed #%ld%s", job->seq, job->more ? "" : " (last)")); /* reserve input buffer until check value has been calculated */ use_space(job->in); /* insert write job in list in sorted order, alert write thread */ possess(write_first); prior = &write_head; while ((here = *prior) != NULL) { if (here->seq > job->seq) break; prior = &(here->next); } job->next = here; *prior = job; twist(write_first, TO, write_head->seq); /* calculate the check value in parallel with writing, alert the write thread that the calculation is complete, and drop this usage of the input buffer */ len = job->in->len; next = job->in->buf; check = CHECK(0L, Z_NULL, 0); while (len > MAXP2) { check = CHECK(check, next, MAXP2); len -= MAXP2; next += MAXP2; } check = CHECK(check, next, (unsigned)len); drop_space(job->in); job->check = check; Trace(("-- checked #%ld%s", job->seq, job->more ? "" : " (last)")); possess(job->calc); twist(job->calc, TO, 1); /* done with that one -- go find another job */ } /* found job with seq == -1 -- free deflate memory and return to join */ release(compress_have); (void)deflateEnd(&strm); } /* collect the write jobs off of the list in sequence order and write out the compressed data until the last chunk is written -- also write the header and trailer and combine the individual check values of the input buffers */ local void write_thread(void *dummy) { long seq; /* next sequence number looking for */ struct job *job; /* job pulled and working on */ size_t len; /* input length */ int more; /* true if more chunks to write */ unsigned long head; /* header length */ unsigned long ulen; /* total uncompressed size (overflow ok) */ unsigned long clen; /* total compressed size (overflow ok) */ unsigned long check; /* check value of uncompressed data */ (void)dummy; /* build and write header */ Trace(("-- write thread running")); head = put_header(); /* process output of compress threads until end of input */ ulen = clen = 0; check = CHECK(0L, Z_NULL, 0); seq = 0; do { /* get next write job in order */ possess(write_first); wait_for(write_first, TO_BE, seq); job = write_head; write_head = job->next; twist(write_first, TO, write_head == NULL ? -1 : write_head->seq); /* update lengths, save uncompressed length for COMB */ more = job->more; len = job->in->len; drop_space(job->in); ulen += (unsigned long)len; clen += (unsigned long)(job->out->len); /* write the compressed data and drop the output buffer */ Trace(("-- writing #%ld", seq)); writen(g.outd, job->out->buf, job->out->len); drop_space(job->out); Trace(("-- wrote #%ld%s", seq, more ? "" : " (last)")); /* wait for check calculation to complete, then combine, once the compress thread is done with the input, release it */ possess(job->calc); wait_for(job->calc, TO_BE, 1); release(job->calc); check = COMB(check, job->check, len); /* free the job */ free_lock(job->calc); free(job); /* get the next buffer in sequence */ seq++; } while (more); /* write trailer */ put_trailer(ulen, clen, check, head); /* verify no more jobs, prepare for next use */ possess(compress_have); assert(compress_head == NULL && peek_lock(compress_have) == 0); release(compress_have); possess(write_first); assert(write_head == NULL); twist(write_first, TO, -1); } /* encode a hash hit to the block lengths list -- hit == 0 ends the list */ local void append_len(struct job *job, size_t len) { struct space *lens; assert(len < 4227136UL); if (job->lens == NULL) job->lens = get_space(&lens_pool); lens = job->lens; if (lens->size < lens->len + 3) grow_space(lens); if (len < 64) lens->buf[lens->len++] = len + 128; else if (len < 32832U) { len -= 64; lens->buf[lens->len++] = len >> 8; lens->buf[lens->len++] = len; } else { len -= 32832U; lens->buf[lens->len++] = (len >> 16) + 192; lens->buf[lens->len++] = len >> 8; lens->buf[lens->len++] = len; } } /* compress ind to outd, using multiple threads for the compression and check value calculations and one other thread for writing the output -- compress threads will be launched and left running (waiting actually) to support subsequent calls of parallel_compress() */ local void parallel_compress(void) { long seq; /* sequence number */ struct space *curr; /* input data to compress */ struct space *next; /* input data that follows curr */ struct space *hold; /* input data that follows next */ struct space *dict; /* dictionary for next compression */ struct job *job; /* job for compress, then write */ int more; /* true if more input to read */ unsigned hash; /* hash for rsyncable */ unsigned char *scan; /* next byte to compute hash on */ unsigned char *end; /* after end of data to compute hash on */ unsigned char *last; /* position after last hit */ size_t left; /* last hit in curr to end of curr */ size_t len; /* for various length computations */ /* if first time or after an option change, setup the job lists */ setup_jobs(); /* start write thread */ writeth = launch(write_thread, NULL); /* read from input and start compress threads (write thread will pick up the output of the compress threads) */ seq = 0; next = get_space(&in_pool); next->len = readn(g.ind, next->buf, next->size); hold = NULL; dict = NULL; scan = next->buf; hash = RSYNCHIT; left = 0; do { /* create a new job */ job = malloc(sizeof(struct job)); if (job == NULL) bail("not enough memory", ""); job->calc = new_lock(0); /* update input spaces */ curr = next; next = hold; hold = NULL; /* get more input if we don't already have some */ if (next == NULL) { next = get_space(&in_pool); next->len = readn(g.ind, next->buf, next->size); } /* if rsyncable, generate block lengths and prepare curr for job to likely have less than size bytes (up to the last hash hit) */ job->lens = NULL; if (g.rsync && curr->len) { /* compute the hash function starting where we last left off to cover either size bytes or to EOF, whichever is less, through the data in curr (and in the next loop, through next) -- save the block lengths resulting from the hash hits in the job->lens list */ if (left == 0) { /* scan is in curr */ last = curr->buf; end = curr->buf + curr->len; while (scan < end) { hash = ((hash << 1) ^ *scan++) & RSYNCMASK; if (hash == RSYNCHIT) { len = scan - last; append_len(job, len); last = scan; } } /* continue scan in next */ left = scan - last; scan = next->buf; } /* scan in next for enough bytes to fill curr, or what is available in next, whichever is less (if next isn't full, then we're at the end of the file) -- the bytes in curr since the last hit, stored in left, counts towards the size of the first block */ last = next->buf; len = curr->size - curr->len; if (len > next->len) len = next->len; end = next->buf + len; while (scan < end) { hash = ((hash << 1) ^ *scan++) & RSYNCMASK; if (hash == RSYNCHIT) { len = (scan - last) + left; left = 0; append_len(job, len); last = scan; } } append_len(job, 0); /* create input in curr for job up to last hit or entire buffer if no hits at all -- save remainder in next and possibly hold */ len = (job->lens->len == 1 ? scan : last) - next->buf; if (len) { /* got hits in next, or no hits in either -- copy to curr */ memcpy(curr->buf + curr->len, next->buf, len); curr->len += len; memmove(next->buf, next->buf + len, next->len - len); next->len -= len; scan -= len; left = 0; } else if (job->lens->len != 1 && left && next->len) { /* had hits in curr, but none in next, and last hit in curr wasn't right at the end, so we have input there to save -- use curr up to the last hit, save the rest, moving next to hold */ hold = next; next = get_space(&in_pool); memcpy(next->buf, curr->buf + (curr->len - left), left); next->len = left; curr->len -= left; } else { /* else, last match happened to be right at the end of curr, or we're at the end of the input compressing the rest */ left = 0; } } /* compress curr->buf to curr->len -- compress thread will drop curr */ job->in = curr; /* set job->more if there is more to compress after curr */ more = next->len != 0; job->more = more; /* provide dictionary for this job, prepare dictionary for next job */ job->out = dict; if (more && g.setdict) { if (curr->len >= DICT || job->out == NULL) { dict = curr; use_space(dict); } else { dict = get_space(&dict_pool); len = DICT - curr->len; memcpy(dict->buf, job->out->buf + (job->out->len - len), len); memcpy(dict->buf + len, curr->buf, curr->len); dict->len = DICT; } } /* preparation of job is complete */ job->seq = seq; Trace(("-- read #%ld%s", seq, more ? "" : " (last)")); if (++seq < 1) bail("input too long: ", g.inf); /* start another compress thread if needed */ if (cthreads < seq && cthreads < g.procs) { (void)launch(compress_thread, NULL); cthreads++; } /* put job at end of compress list, let all the compressors know */ possess(compress_have); job->next = NULL; *compress_tail = job; compress_tail = &(job->next); twist(compress_have, BY, +1); } while (more); drop_space(next); /* wait for the write thread to complete (we leave the compress threads out there and waiting in case there is another stream to compress) */ join(writeth); writeth = NULL; Trace(("-- write thread joined")); } #endif /* repeated code in single_compress to compress available input and write it */ #define DEFLATE_WRITE(flush) \ do { \ do { \ strm->avail_out = out_size; \ strm->next_out = out; \ (void)deflate(strm, flush); \ writen(g.outd, out, out_size - strm->avail_out); \ clen += out_size - strm->avail_out; \ } while (strm->avail_out == 0); \ assert(strm->avail_in == 0); \ } while (0) /* do a simple compression in a single thread from ind to outd -- if reset is true, instead free the memory that was allocated and retained for input, output, and deflate */ local void single_compress(int reset) { size_t got; /* amount read */ size_t more; /* amount of next read (0 if eof) */ size_t start; /* start of next read */ size_t have; /* bytes in current block for -i */ unsigned hash; /* hash for rsyncable */ #if ZLIB_VERNUM >= 0x1260 int bits; /* deflate pending bits */ #endif unsigned char *scan; /* pointer for hash computation */ size_t left; /* bytes left to compress after hash hit */ unsigned long head; /* header length */ unsigned long ulen; /* total uncompressed size (overflow ok) */ unsigned long clen; /* total compressed size (overflow ok) */ unsigned long check; /* check value of uncompressed data */ static unsigned out_size; /* size of output buffer */ static unsigned char *in, *next, *out; /* reused i/o buffers */ static z_stream *strm = NULL; /* reused deflate structure */ /* if requested, just release the allocations and return */ if (reset) { if (strm != NULL) { (void)deflateEnd(strm); free(strm); free(out); free(next); free(in); strm = NULL; } return; } /* initialize the deflate structure if this is the first time */ if (strm == NULL) { out_size = g.block > MAXP2 ? MAXP2 : (unsigned)g.block; if ((in = malloc(g.block)) == NULL || (next = malloc(g.block)) == NULL || (out = malloc(out_size)) == NULL || (strm = malloc(sizeof(z_stream))) == NULL) bail("not enough memory", ""); strm->zfree = Z_NULL; strm->zalloc = Z_NULL; strm->opaque = Z_NULL; if (deflateInit2(strm, 6, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) bail("not enough memory", ""); } /* write header */ head = put_header(); /* set compression level in case it changed */ if (g.level > 9) bail("compression level 11 not yet implemented for one thread", ""); (void)deflateReset(strm); (void)deflateParams(strm, g.level, Z_DEFAULT_STRATEGY); /* do raw deflate and calculate check value */ got = 0; more = readn(g.ind, next, g.block); ulen = (unsigned)more; start = 0; clen = 0; have = 0; check = CHECK(0L, Z_NULL, 0); hash = RSYNCHIT; do { /* get data to compress, see if there is any more input */ if (got == 0) { scan = in; in = next; next = scan; strm->next_in = in + start; got = more; more = readn(g.ind, next, g.block); ulen += (unsigned long)more; start = 0; } /* if rsyncable, compute hash until a hit or the end of the block */ left = 0; if (g.rsync && got) { scan = strm->next_in; left = got; do { if (left == 0) { /* went to the end -- if no more or no hit in size bytes, then proceed to do a flush or finish with got bytes */ if (more == 0 || got == g.block) break; /* fill in[] with what's left there and as much as possible from next[] -- set up to continue hash hit search */ memmove(in, strm->next_in, got); strm->next_in = in; scan = in + got; left = more > g.block - got ? g.block - got : more; memcpy(scan, next + start, left); got += left; more -= left; start += left; /* if that emptied the next buffer, try to refill it */ if (more == 0) { more = readn(g.ind, next, g.block); ulen += (unsigned long)more; start = 0; } } left--; hash = ((hash << 1) ^ *scan++) & RSYNCMASK; } while (hash != RSYNCHIT); got -= left; } /* clear history for --independent option */ if (!g.setdict) { have += got; if (have > g.block) { (void)deflateReset(strm); have = got; } } /* compress MAXP2-size chunks in case unsigned type is small */ while (got > MAXP2) { strm->avail_in = MAXP2; check = CHECK(check, strm->next_in, strm->avail_in); DEFLATE_WRITE(Z_NO_FLUSH); got -= MAXP2; } /* compress the remainder, emit a block -- finish if end of input */ strm->avail_in = (unsigned)got; got = left; check = CHECK(check, strm->next_in, strm->avail_in); if (more || got) { #if ZLIB_VERNUM >= 0x1260 DEFLATE_WRITE(Z_BLOCK); (void)deflatePending(strm, Z_NULL, &bits); if (bits & 1) DEFLATE_WRITE(Z_SYNC_FLUSH); else if (bits & 7) { do { bits = deflatePrime(strm, 10, 2); assert(bits == Z_OK); (void)deflatePending(strm, Z_NULL, &bits); } while (bits & 7); DEFLATE_WRITE(Z_NO_FLUSH); } #else DEFLATE_WRITE(Z_SYNC_FLUSH); #endif } else DEFLATE_WRITE(Z_FINISH); /* do until no more input */ } while (more || got); /* write trailer */ put_trailer(ulen, clen, check, head); } /* --- decompression --- */ #ifndef NOTHREAD /* parallel read thread */ local void load_read(void *dummy) { size_t len; (void)dummy; Trace(("-- launched decompress read thread")); do { possess(g.load_state); wait_for(g.load_state, TO_BE, 1); g.in_len = len = readn(g.ind, g.in_which ? g.in_buf : g.in_buf2, BUF); Trace(("-- decompress read thread read %lu bytes", len)); twist(g.load_state, TO, 0); } while (len == BUF); Trace(("-- exited decompress read thread")); } #endif /* load() is called when the input has been consumed in order to provide more input data: load the input buffer with BUF or fewer bytes (fewer if at end of file) from the file g.ind, set g.in_next to point to the g.in_left bytes read, update g.in_tot, and return g.in_left -- g.in_eof is set to true when g.in_left has gone to zero and there is no more data left to read */ local size_t load(void) { /* if already detected end of file, do nothing */ if (g.in_short) { g.in_eof = 1; g.in_left = 0; return 0; } #ifndef NOTHREAD /* if first time in or procs == 1, read a buffer to have something to return, otherwise wait for the previous read job to complete */ if (g.procs > 1) { /* if first time, fire up the read thread, ask for a read */ if (g.in_which == -1) { g.in_which = 1; g.load_state = new_lock(1); g.load_thread = launch(load_read, NULL); } /* wait for the previously requested read to complete */ possess(g.load_state); wait_for(g.load_state, TO_BE, 0); release(g.load_state); /* set up input buffer with the data just read */ g.in_next = g.in_which ? g.in_buf : g.in_buf2; g.in_left = g.in_len; /* if not at end of file, alert read thread to load next buffer, alternate between g.in_buf and g.in_buf2 */ if (g.in_len == BUF) { g.in_which = 1 - g.in_which; possess(g.load_state); twist(g.load_state, TO, 1); } /* at end of file -- join read thread (already exited), clean up */ else { join(g.load_thread); free_lock(g.load_state); g.in_which = -1; } } else #endif { /* don't use threads -- simply read a buffer into g.in_buf */ g.in_left = readn(g.ind, g.in_next = g.in_buf, BUF); } /* note end of file */ if (g.in_left < BUF) { g.in_short = 1; /* if we got bupkis, now is the time to mark eof */ if (g.in_left == 0) g.in_eof = 1; } /* update the total and return the available bytes */ g.in_tot += g.in_left; return g.in_left; } /* initialize for reading new input */ local void in_init(void) { g.in_left = 0; g.in_eof = 0; g.in_short = 0; g.in_tot = 0; #ifndef NOTHREAD g.in_which = -1; #endif } /* buffered reading macros for decompression and listing */ #define GET() (g.in_eof || (g.in_left == 0 && load() == 0) ? EOF : \ (g.in_left--, *g.in_next++)) #define GET2() (tmp2 = GET(), tmp2 + ((unsigned)(GET()) << 8)) #define GET4() (tmp4 = GET2(), tmp4 + ((unsigned long)(GET2()) << 16)) #define SKIP(dist) \ do { \ size_t togo = (dist); \ while (togo > g.in_left) { \ togo -= g.in_left; \ if (load() == 0) \ return -1; \ } \ g.in_left -= togo; \ g.in_next += togo; \ } while (0) /* pull LSB order or MSB order integers from an unsigned char buffer */ #define PULL2L(p) ((p)[0] + ((unsigned)((p)[1]) << 8)) #define PULL4L(p) (PULL2L(p) + ((unsigned long)(PULL2L((p) + 2)) << 16)) #define PULL2M(p) (((unsigned)((p)[0]) << 8) + (p)[1]) #define PULL4M(p) (((unsigned long)(PULL2M(p)) << 16) + PULL2M((p) + 2)) /* convert MS-DOS date and time to a Unix time, assuming current timezone (you got a better idea?) */ local time_t dos2time(unsigned long dos) { struct tm tm; if (dos == 0) return time(NULL); tm.tm_year = ((int)(dos >> 25) & 0x7f) + 80; tm.tm_mon = ((int)(dos >> 21) & 0xf) - 1; tm.tm_mday = (int)(dos >> 16) & 0x1f; tm.tm_hour = (int)(dos >> 11) & 0x1f; tm.tm_min = (int)(dos >> 5) & 0x3f; tm.tm_sec = (int)(dos << 1) & 0x3e; tm.tm_isdst = -1; /* figure out if DST or not */ return mktime(&tm); } /* convert an unsigned 32-bit integer to signed, even if long > 32 bits */ local long tolong(unsigned long val) { return (long)(val & 0x7fffffffUL) - (long)(val & 0x80000000UL); } #define LOW32 0xffffffffUL /* process zip extra field to extract zip64 lengths and Unix mod time */ local int read_extra(unsigned len, int save) { unsigned id, size, tmp2; unsigned long tmp4; /* process extra blocks */ while (len >= 4) { id = GET2(); size = GET2(); if (g.in_eof) return -1; len -= 4; if (size > len) break; len -= size; if (id == 0x0001) { /* Zip64 Extended Information Extra Field */ if (g.zip_ulen == LOW32 && size >= 8) { g.zip_ulen = GET4(); SKIP(4); size -= 8; } if (g.zip_clen == LOW32 && size >= 8) { g.zip_clen = GET4(); SKIP(4); size -= 8; } } if (save) { if ((id == 0x000d || id == 0x5855) && size >= 8) { /* PKWare Unix or Info-ZIP Type 1 Unix block */ SKIP(4); g.stamp = tolong(GET4()); size -= 8; } if (id == 0x5455 && size >= 5) { /* Extended Timestamp block */ size--; if (GET() & 1) { g.stamp = tolong(GET4()); size -= 4; } } } SKIP(size); } SKIP(len); return 0; } /* read a gzip, zip, zlib, or lzw header from ind and return the method in the range 0..256 (256 implies a zip method greater than 255), or on error return negative: -1 is immediate EOF, -2 is not a recognized compressed format, -3 is premature EOF within the header, -4 is unexpected header flag values, -5 is the zip central directory; a method of 257 is lzw -- if the return value is not negative, then get_header() sets g.form to indicate gzip (0), zlib (1), or zip (2, or 3 if the entry is followed by a data descriptor) */ local int get_header(int save) { unsigned magic; /* magic header */ int method; /* compression method */ int flags; /* header flags */ unsigned fname, extra; /* name and extra field lengths */ unsigned tmp2; /* for macro */ unsigned long tmp4; /* for macro */ /* clear return information */ if (save) { g.stamp = 0; RELEASE(g.hname); } /* see if it's a gzip, zlib, or lzw file */ g.form = -1; g.magic1 = GET(); if (g.in_eof) return -1; magic = g.magic1 << 8; magic += GET(); if (g.in_eof) return -2; if (magic % 31 == 0) { /* it's zlib */ g.form = 1; return (int)((magic >> 8) & 0xf); } if (magic == 0x1f9d) /* it's lzw */ return 257; if (magic == 0x504b) { /* it's zip */ magic = GET2(); /* the rest of the signature */ if (g.in_eof) return -3; if (magic == 0x0201 || magic == 0x0806) return -5; /* central header or archive extra */ if (magic != 0x0403) return -4; /* not a local header */ SKIP(2); flags = GET2(); if (g.in_eof) return -3; if (flags & 0xfff0) return -4; method = GET(); /* return low byte of method or 256 */ if (GET() != 0 || flags & 1) method = 256; /* unknown or encrypted */ if (g.in_eof) return -3; if (save) g.stamp = dos2time(GET4()); else SKIP(4); g.zip_crc = GET4(); g.zip_clen = GET4(); g.zip_ulen = GET4(); fname = GET2(); extra = GET2(); if (save) { char *next = g.hname = malloc(fname + 1); if (g.hname == NULL) bail("not enough memory", ""); while (fname > g.in_left) { memcpy(next, g.in_next, g.in_left); fname -= g.in_left; next += g.in_left; if (load() == 0) return -3; } memcpy(next, g.in_next, fname); g.in_left -= fname; g.in_next += fname; next += fname; *next = 0; } else SKIP(fname); read_extra(extra, save); g.form = 2 + ((flags & 8) >> 3); return g.in_eof ? -3 : method; } if (magic != 0x1f8b) { /* not gzip */ g.in_left++; /* unget second magic byte */ g.in_next--; return -2; } /* it's gzip -- get method and flags */ method = GET(); flags = GET(); if (g.in_eof) return -1; if (flags & 0xe0) return -4; /* get time stamp */ if (save) g.stamp = tolong(GET4()); else SKIP(4); /* skip extra field and OS */ SKIP(2); /* skip extra field, if present */ if (flags & 4) { extra = GET2(); if (g.in_eof) return -3; SKIP(extra); } /* read file name, if present, into allocated memory */ if ((flags & 8) && save) { unsigned char *end; size_t copy, have, size = 128; g.hname = malloc(size); if (g.hname == NULL) bail("not enough memory", ""); have = 0; do { if (g.in_left == 0 && load() == 0) return -3; end = memchr(g.in_next, 0, g.in_left); copy = end == NULL ? g.in_left : (size_t)(end - g.in_next) + 1; if (have + copy > size) { while (have + copy > (size <<= 1)) ; g.hname = realloc(g.hname, size); if (g.hname == NULL) bail("not enough memory", ""); } memcpy(g.hname + have, g.in_next, copy); have += copy; g.in_left -= copy; g.in_next += copy; } while (end == NULL); } else if (flags & 8) while (GET() != 0) if (g.in_eof) return -3; /* skip comment */ if (flags & 16) while (GET() != 0) if (g.in_eof) return -3; /* skip header crc */ if (flags & 2) SKIP(2); /* return gzip compression method */ g.form = 0; return method; } /* --- list contents of compressed input (gzip, zlib, or lzw) */ /* find standard compressed file suffix, return length of suffix */ local size_t compressed_suffix(char *nm) { size_t len; len = strlen(nm); if (len > 4) { nm += len - 4; len = 4; if (strcmp(nm, ".zip") == 0 || strcmp(nm, ".ZIP") == 0 || strcmp(nm, ".tgz") == 0) return 4; } if (len > 3) { nm += len - 3; len = 3; if (strcmp(nm, ".gz") == 0 || strcmp(nm, "-gz") == 0 || strcmp(nm, ".zz") == 0 || strcmp(nm, "-zz") == 0) return 3; } if (len > 2) { nm += len - 2; if (strcmp(nm, ".z") == 0 || strcmp(nm, "-z") == 0 || strcmp(nm, "_z") == 0 || strcmp(nm, ".Z") == 0) return 2; } return 0; } /* listing file name lengths for -l and -lv */ #define NAMEMAX1 48 /* name display limit at verbosity 1 */ #define NAMEMAX2 16 /* name display limit at verbosity 2 */ /* print gzip or lzw file information */ local void show_info(int method, unsigned long check, off_t len, int cont) { size_t max; /* maximum name length for current verbosity */ size_t n; /* name length without suffix */ time_t now; /* for getting current year */ char mod[26]; /* modification time in text */ char tag[NAMEMAX1+1]; /* header or file name, possibly truncated */ /* create abbreviated name from header file name or actual file name */ max = g.verbosity > 1 ? NAMEMAX2 : NAMEMAX1; memset(tag, 0, max + 1); if (cont) strncpy(tag, "<...>", max + 1); else if (g.hname == NULL) { n = strlen(g.inf) - compressed_suffix(g.inf); strncpy(tag, g.inf, n > max + 1 ? max + 1 : n); if (strcmp(g.inf + n, ".tgz") == 0 && n < max + 1) strncpy(tag + n, ".tar", max + 1 - n); } else strncpy(tag, g.hname, max + 1); if (tag[max]) strcpy(tag + max - 3, "..."); /* convert time stamp to text */ if (g.stamp) { strcpy(mod, ctime(&g.stamp)); now = time(NULL); if (strcmp(mod + 20, ctime(&now) + 20) != 0) strcpy(mod + 11, mod + 19); } else strcpy(mod + 4, "------ -----"); mod[16] = 0; /* if first time, print header */ if (g.first) { if (g.verbosity > 1) fputs("method check timestamp ", stdout); if (g.verbosity > 0) puts("compressed original reduced name"); g.first = 0; } /* print information */ if (g.verbosity > 1) { if (g.form == 3 && !g.decode) printf("zip%3d -------- %s ", method, mod + 4); else if (g.form > 1) printf("zip%3d %08lx %s ", method, check, mod + 4); else if (g.form == 1) printf("zlib%2d %08lx %s ", method, check, mod + 4); else if (method == 257) printf("lzw -------- %s ", mod + 4); else printf("gzip%2d %08lx %s ", method, check, mod + 4); } if (g.verbosity > 0) { if ((g.form == 3 && !g.decode) || (method == 8 && g.in_tot > (len + (len >> 10) + 12)) || (method == 257 && g.in_tot > len + (len >> 1) + 3)) #if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3 printf("%10jd %10jd? unk %s\n", (intmax_t)g.in_tot, (intmax_t)len, tag); else printf("%10jd %10jd %6.1f%% %s\n", (intmax_t)g.in_tot, (intmax_t)len, len == 0 ? 0 : 100 * (len - g.in_tot)/(double)len, tag); #else printf(sizeof(off_t) == sizeof(long) ? "%10ld %10ld? unk %s\n" : "%10lld %10lld? unk %s\n", g.in_tot, len, tag); else printf(sizeof(off_t) == sizeof(long) ? "%10ld %10ld %6.1f%% %s\n" : "%10lld %10lld %6.1f%% %s\n", g.in_tot, len, len == 0 ? 0 : 100 * (len - g.in_tot)/(double)len, tag); #endif } } /* list content information about the gzip file at ind (only works if the gzip file contains a single gzip stream with no junk at the end, and only works well if the uncompressed length is less than 4 GB) */ local void list_info(void) { int method; /* get_header() return value */ size_t n; /* available trailer bytes */ off_t at; /* used to calculate compressed length */ unsigned char tail[8]; /* trailer containing check and length */ unsigned long check, len; /* check value and length from trailer */ /* initialize input buffer */ in_init(); /* read header information and position input after header */ method = get_header(1); if (method < 0) { RELEASE(g.hname); if (method != -1 && g.verbosity > 1) complain("%s not a compressed file -- skipping", g.inf); return; } /* list zip file */ if (g.form > 1) { g.in_tot = g.zip_clen; show_info(method, g.zip_crc, g.zip_ulen, 0); return; } /* list zlib file */ if (g.form == 1) { at = lseek(g.ind, 0, SEEK_END); if (at == -1) { check = 0; do { len = g.in_left < 4 ? g.in_left : 4; g.in_next += g.in_left - len; while (len--) check = (check << 8) + *g.in_next++; } while (load() != 0); check &= LOW32; } else { g.in_tot = at; lseek(g.ind, -4, SEEK_END); readn(g.ind, tail, 4); check = PULL4M(tail); } g.in_tot -= 6; show_info(method, check, 0, 0); return; } /* list lzw file */ if (method == 257) { at = lseek(g.ind, 0, SEEK_END); if (at == -1) while (load() != 0) ; else g.in_tot = at; g.in_tot -= 3; show_info(method, 0, 0, 0); return; } /* skip to end to get trailer (8 bytes), compute compressed length */ if (g.in_short) { /* whole thing already read */ if (g.in_left < 8) { complain("%s not a valid gzip file -- skipping", g.inf); return; } g.in_tot = g.in_left - 8; /* compressed size */ memcpy(tail, g.in_next + (g.in_left - 8), 8); } else if ((at = lseek(g.ind, -8, SEEK_END)) != -1) { g.in_tot = at - g.in_tot + g.in_left; /* compressed size */ readn(g.ind, tail, 8); /* get trailer */ } else { /* can't seek */ at = g.in_tot - g.in_left; /* save header size */ do { n = g.in_left < 8 ? g.in_left : 8; memcpy(tail, g.in_next + (g.in_left - n), n); load(); } while (g.in_left == BUF); /* read until end */ if (g.in_left < 8) { if (n + g.in_left < 8) { complain("%s not a valid gzip file -- skipping", g.inf); return; } if (g.in_left) { if (n + g.in_left > 8) memcpy(tail, tail + n - (8 - g.in_left), 8 - g.in_left); memcpy(tail + 8 - g.in_left, g.in_next, g.in_left); } } else memcpy(tail, g.in_next + (g.in_left - 8), 8); g.in_tot -= at + 8; } if (g.in_tot < 2) { complain("%s not a valid gzip file -- skipping", g.inf); return; } /* convert trailer to check and uncompressed length (modulo 2^32) */ check = PULL4L(tail); len = PULL4L(tail + 4); /* list information about contents */ show_info(method, check, len, 0); RELEASE(g.hname); } /* --- copy input to output (when acting like cat) --- */ local void cat(void) { /* write first magic byte (if we're here, there's at least one byte) */ writen(g.outd, &g.magic1, 1); g.out_tot = 1; /* copy the remainder of the input to the output (if there were any more bytes of input, then g.in_left is non-zero and g.in_next is pointing to the second magic byte) */ while (g.in_left) { writen(g.outd, g.in_next, g.in_left); g.out_tot += g.in_left; g.in_left = 0; load(); } } /* --- decompress deflate input --- */ /* call-back input function for inflateBack() */ local unsigned inb(void *desc, unsigned char **buf) { (void)desc; load(); *buf = g.in_next; return g.in_left; } /* output buffers and window for infchk() and unlzw() */ #define OUTSIZE 32768U /* must be at least 32K for inflateBack() window */ local unsigned char out_buf[OUTSIZE]; #ifndef NOTHREAD /* output data for parallel write and check */ local unsigned char out_copy[OUTSIZE]; local size_t out_len; /* outb threads states */ local lock *outb_write_more = NULL; local lock *outb_check_more; /* output write thread */ local void outb_write(void *dummy) { size_t len; (void)dummy; Trace(("-- launched decompress write thread")); do { possess(outb_write_more); wait_for(outb_write_more, TO_BE, 1); len = out_len; if (len && g.decode == 1) writen(g.outd, out_copy, len); Trace(("-- decompress wrote %lu bytes", len)); twist(outb_write_more, TO, 0); } while (len); Trace(("-- exited decompress write thread")); } /* output check thread */ local void outb_check(void *dummy) { size_t len; (void)dummy; Trace(("-- launched decompress check thread")); do { possess(outb_check_more); wait_for(outb_check_more, TO_BE, 1); len = out_len; g.out_check = CHECK(g.out_check, out_copy, len); Trace(("-- decompress checked %lu bytes", len)); twist(outb_check_more, TO, 0); } while (len); Trace(("-- exited decompress check thread")); } #endif /* call-back output function for inflateBack() -- wait for the last write and check calculation to complete, copy the write buffer, and then alert the write and check threads and return for more decompression while that's going on (or just write and check if no threads or if proc == 1) */ local int outb(void *desc, unsigned char *buf, unsigned len) { #ifndef NOTHREAD static thread *wr, *ch; (void)desc; if (g.procs > 1) { /* if first time, initialize state and launch threads */ if (outb_write_more == NULL) { outb_write_more = new_lock(0); outb_check_more = new_lock(0); wr = launch(outb_write, NULL); ch = launch(outb_check, NULL); } /* wait for previous write and check threads to complete */ possess(outb_check_more); wait_for(outb_check_more, TO_BE, 0); possess(outb_write_more); wait_for(outb_write_more, TO_BE, 0); /* copy the output and alert the worker bees */ out_len = len; g.out_tot += len; memcpy(out_copy, buf, len); twist(outb_write_more, TO, 1); twist(outb_check_more, TO, 1); /* if requested with len == 0, clean up -- terminate and join write and check threads, free lock */ if (len == 0) { join(ch); join(wr); free_lock(outb_check_more); free_lock(outb_write_more); outb_write_more = NULL; } /* return for more decompression while last buffer is being written and having its check value calculated -- we wait for those to finish the next time this function is called */ return 0; } #endif /* if just one process or no threads, then do it without threads */ if (len) { if (g.decode == 1) writen(g.outd, buf, len); g.out_check = CHECK(g.out_check, buf, len); g.out_tot += len; } return 0; } /* inflate for decompression or testing -- decompress from ind to outd unless decode != 1, in which case just test ind, and then also list if list != 0; look for and decode multiple, concatenated gzip and/or zlib streams; read and check the gzip, zlib, or zip trailer */ local void infchk(void) { int ret, cont, was; unsigned long check, len; z_stream strm; unsigned tmp2; unsigned long tmp4; off_t clen; cont = 0; do { /* header already read -- set up for decompression */ g.in_tot = g.in_left; /* track compressed data length */ g.out_tot = 0; g.out_check = CHECK(0L, Z_NULL, 0); strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; ret = inflateBackInit(&strm, 15, out_buf); if (ret != Z_OK) bail("not enough memory", ""); /* decompress, compute lengths and check value */ strm.avail_in = g.in_left; strm.next_in = g.in_next; ret = inflateBack(&strm, inb, NULL, outb, NULL); if (ret != Z_STREAM_END) bail("corrupted input -- invalid deflate data: ", g.inf); g.in_left = strm.avail_in; g.in_next = strm.next_in; inflateBackEnd(&strm); outb(NULL, NULL, 0); /* finish off final write and check */ /* compute compressed data length */ clen = g.in_tot - g.in_left; /* read and check trailer */ if (g.form > 1) { /* zip local trailer (if any) */ if (g.form == 3) { /* data descriptor follows */ /* read original version of data descriptor */ g.zip_crc = GET4(); g.zip_clen = GET4(); g.zip_ulen = GET4(); if (g.in_eof) bail("corrupted zip entry -- missing trailer: ", g.inf); /* if crc doesn't match, try info-zip variant with sig */ if (g.zip_crc != g.out_check) { if (g.zip_crc != 0x08074b50UL || g.zip_clen != g.out_check) bail("corrupted zip entry -- crc32 mismatch: ", g.inf); g.zip_crc = g.zip_clen; g.zip_clen = g.zip_ulen; g.zip_ulen = GET4(); } /* handle incredibly rare cases where crc equals signature */ else if (g.zip_crc == 0x08074b50UL && g.zip_clen == g.zip_crc && ((clen & LOW32) != g.zip_crc || g.zip_ulen == g.zip_crc)) { g.zip_crc = g.zip_clen; g.zip_clen = g.zip_ulen; g.zip_ulen = GET4(); } /* if second length doesn't match, try 64-bit lengths */ if (g.zip_ulen != (g.out_tot & LOW32)) { g.zip_ulen = GET4(); (void)GET4(); } if (g.in_eof) bail("corrupted zip entry -- missing trailer: ", g.inf); } if (g.zip_clen != (clen & LOW32) || g.zip_ulen != (g.out_tot & LOW32)) bail("corrupted zip entry -- length mismatch: ", g.inf); check = g.zip_crc; } else if (g.form == 1) { /* zlib (big-endian) trailer */ check = (unsigned long)(GET()) << 24; check += (unsigned long)(GET()) << 16; check += (unsigned)(GET()) << 8; check += GET(); if (g.in_eof) bail("corrupted zlib stream -- missing trailer: ", g.inf); if (check != g.out_check) bail("corrupted zlib stream -- adler32 mismatch: ", g.inf); } else { /* gzip trailer */ check = GET4(); len = GET4(); if (g.in_eof) bail("corrupted gzip stream -- missing trailer: ", g.inf); if (check != g.out_check) bail("corrupted gzip stream -- crc32 mismatch: ", g.inf); if (len != (g.out_tot & LOW32)) bail("corrupted gzip stream -- length mismatch: ", g.inf); } /* show file information if requested */ if (g.list) { g.in_tot = clen; show_info(8, check, g.out_tot, cont); cont = 1; } /* if a gzip entry follows a gzip entry, decompress it (don't replace saved header information from first entry) */ was = g.form; } while (was == 0 && (ret = get_header(0)) == 8 && g.form == 0); /* gzip -cdf copies junk after gzip stream directly to output */ if (was == 0 && ret == -2 && g.force && g.pipeout && g.decode != 2 && !g.list) cat(); else if (was > 1 && get_header(0) != -5) complain("entries after the first in %s were ignored", g.inf); else if ((was == 0 && ret != -1) || (was == 1 && GET() != EOF)) complain("%s OK, has trailing junk which was ignored", g.inf); } /* --- decompress Unix compress (LZW) input --- */ /* memory for unlzw() -- the first 256 entries of prefix[] and suffix[] are never used, could have offset the index, but it's faster to waste the memory */ unsigned short prefix[65536]; /* index to LZW prefix string */ unsigned char suffix[65536]; /* one-character LZW suffix */ unsigned char match[65280 + 2]; /* buffer for reversed match */ /* throw out what's left in the current bits byte buffer (this is a vestigial aspect of the compressed data format derived from an implementation that made use of a special VAX machine instruction!) */ #define FLUSHCODE() \ do { \ left = 0; \ rem = 0; \ if (chunk > g.in_left) { \ chunk -= g.in_left; \ if (load() == 0) \ break; \ if (chunk > g.in_left) { \ chunk = g.in_left = 0; \ break; \ } \ } \ g.in_left -= chunk; \ g.in_next += chunk; \ chunk = 0; \ } while (0) /* Decompress a compress (LZW) file from ind to outd. The compress magic header (two bytes) has already been read and verified. */ local void unlzw(void) { int got; /* byte just read by GET() */ unsigned chunk; /* bytes left in current chunk */ int left; /* bits left in rem */ unsigned rem; /* unused bits from input */ int bits; /* current bits per code */ unsigned code; /* code, table traversal index */ unsigned mask; /* mask for current bits codes */ int max; /* maximum bits per code for this stream */ int flags; /* compress flags, then block compress flag */ unsigned end; /* last valid entry in prefix/suffix tables */ unsigned temp; /* current code */ unsigned prev; /* previous code */ unsigned final; /* last character written for previous code */ unsigned stack; /* next position for reversed string */ unsigned outcnt; /* bytes in output buffer */ unsigned char *p; /* process remainder of compress header -- a flags byte */ g.out_tot = 0; flags = GET(); if (g.in_eof) bail("missing lzw data: ", g.inf); if (flags & 0x60) bail("unknown lzw flags set: ", g.inf); max = flags & 0x1f; if (max < 9 || max > 16) bail("lzw bits out of range: ", g.inf); if (max == 9) /* 9 doesn't really mean 9 */ max = 10; flags &= 0x80; /* true if block compress */ /* clear table */ bits = 9; mask = 0x1ff; end = flags ? 256 : 255; /* set up: get first 9-bit code, which is the first decompressed byte, but don't create a table entry until the next code */ got = GET(); if (g.in_eof) /* no compressed data is ok */ return; final = prev = (unsigned)got; /* low 8 bits of code */ got = GET(); if (g.in_eof || (got & 1) != 0) /* missing a bit or code >= 256 */ bail("invalid lzw code: ", g.inf); rem = (unsigned)got >> 1; /* remaining 7 bits */ left = 7; chunk = bits - 2; /* 7 bytes left in this chunk */ out_buf[0] = (unsigned char)final; /* write first decompressed byte */ outcnt = 1; /* decode codes */ stack = 0; for (;;) { /* if the table will be full after this, increment the code size */ if (end >= mask && bits < max) { FLUSHCODE(); bits++; mask <<= 1; mask++; } /* get a code of length bits */ if (chunk == 0) /* decrement chunk modulo bits */ chunk = bits; code = rem; /* low bits of code */ got = GET(); if (g.in_eof) { /* EOF is end of compressed data */ /* write remaining buffered output */ g.out_tot += outcnt; if (outcnt && g.decode == 1) writen(g.outd, out_buf, outcnt); return; } code += (unsigned)got << left; /* middle (or high) bits of code */ left += 8; chunk--; if (bits > left) { /* need more bits */ got = GET(); if (g.in_eof) /* can't end in middle of code */ bail("invalid lzw code: ", g.inf); code += (unsigned)got << left; /* high bits of code */ left += 8; chunk--; } code &= mask; /* mask to current code length */ left -= bits; /* number of unused bits */ rem = (unsigned)got >> (8 - left); /* unused bits from last byte */ /* process clear code (256) */ if (code == 256 && flags) { FLUSHCODE(); bits = 9; /* initialize bits and mask */ mask = 0x1ff; end = 255; /* empty table */ continue; /* get next code */ } /* special code to reuse last match */ temp = code; /* save the current code */ if (code > end) { /* Be picky on the allowed code here, and make sure that the code we drop through (prev) will be a valid index so that random input does not cause an exception. The code != end + 1 check is empirically derived, and not checked in the original uncompress code. If this ever causes a problem, that check could be safely removed. Leaving this check in greatly improves pigz's ability to detect random or corrupted input after a compress header. In any case, the prev > end check must be retained. */ if (code != end + 1 || prev > end) bail("invalid lzw code: ", g.inf); match[stack++] = (unsigned char)final; code = prev; } /* walk through linked list to generate output in reverse order */ p = match + stack; while (code >= 256) { *p++ = suffix[code]; code = prefix[code]; } stack = p - match; match[stack++] = (unsigned char)code; final = code; /* link new table entry */ if (end < mask) { end++; prefix[end] = (unsigned short)prev; suffix[end] = (unsigned char)final; } /* set previous code for next iteration */ prev = temp; /* write output in forward order */ while (stack > OUTSIZE - outcnt) { while (outcnt < OUTSIZE) out_buf[outcnt++] = match[--stack]; g.out_tot += outcnt; if (g.decode == 1) writen(g.outd, out_buf, outcnt); outcnt = 0; } p = match + stack; do { out_buf[outcnt++] = *--p; } while (p > match); stack = 0; /* loop for next code with final and prev as the last match, rem and left provide the first 0..7 bits of the next code, end is the last valid table entry */ } } /* --- file processing --- */ /* extract file name from path */ local char *justname(char *path) { char *p; p = path + strlen(path); while (--p >= path) if (*p == '/') break; return p + 1; } /* Copy file attributes, from -> to, as best we can. This is best effort, so no errors are reported. The mode bits, including suid, sgid, and the sticky bit are copied (if allowed), the owner's user id and group id are copied (again if allowed), and the access and modify times are copied. */ local void copymeta(char *from, char *to) { struct stat st; struct timeval times[2]; /* get all of from's Unix meta data, return if not a regular file */ if (stat(from, &st) != 0 || (st.st_mode & S_IFMT) != S_IFREG) return; /* set to's mode bits, ignore errors */ (void)chmod(to, st.st_mode & 07777); /* copy owner's user and group, ignore errors */ (void)chown(to, st.st_uid, st.st_gid); /* copy access and modify times, ignore errors */ times[0].tv_sec = st.st_atime; times[0].tv_usec = 0; times[1].tv_sec = st.st_mtime; times[1].tv_usec = 0; (void)utimes(to, times); } /* set the access and modify times of fd to t */ local void touch(char *path, time_t t) { struct timeval times[2]; times[0].tv_sec = t; times[0].tv_usec = 0; times[1].tv_sec = t; times[1].tv_usec = 0; (void)utimes(path, times); } /* process provided input file, or stdin if path is NULL -- process() can call itself for recursive directory processing */ local void process(char *path) { int method = -1; /* get_header() return value */ size_t len; /* length of base name (minus suffix) */ struct stat st; /* to get file type and mod time */ /* all compressed suffixes for decoding search, in length order */ static char *sufs[] = {".z", "-z", "_z", ".Z", ".gz", "-gz", ".zz", "-zz", ".zip", ".ZIP", ".tgz", NULL}; /* open input file with name in, descriptor ind -- set name and mtime */ if (path == NULL) { strcpy(g.inf, ""); g.ind = 0; g.name = NULL; g.mtime = g.headis & 2 ? (fstat(g.ind, &st) ? time(NULL) : st.st_mtime) : 0; len = 0; } else { /* set input file name (already set if recursed here) */ if (path != g.inf) { strncpy(g.inf, path, sizeof(g.inf)); if (g.inf[sizeof(g.inf) - 1]) bail("name too long: ", path); } len = strlen(g.inf); /* try to stat input file -- if not there and decoding, look for that name with compressed suffixes */ if (lstat(g.inf, &st)) { if (errno == ENOENT && (g.list || g.decode)) { char **try = sufs; do { if (*try == NULL || len + strlen(*try) >= sizeof(g.inf)) break; strcpy(g.inf + len, *try++); errno = 0; } while (lstat(g.inf, &st) && errno == ENOENT); } #ifdef EOVERFLOW if (errno == EOVERFLOW || errno == EFBIG) bail(g.inf, " too large -- not compiled with large file support"); #endif if (errno) { g.inf[len] = 0; complain("%s does not exist -- skipping", g.inf); return; } len = strlen(g.inf); } /* only process regular files, but allow symbolic links if -f, recurse into directory if -r */ if ((st.st_mode & S_IFMT) != S_IFREG && (st.st_mode & S_IFMT) != S_IFLNK && (st.st_mode & S_IFMT) != S_IFDIR) { complain("%s is a special file or device -- skipping", g.inf); return; } if ((st.st_mode & S_IFMT) == S_IFLNK && !g.force && !g.pipeout) { complain("%s is a symbolic link -- skipping", g.inf); return; } if ((st.st_mode & S_IFMT) == S_IFDIR && !g.recurse) { complain("%s is a directory -- skipping", g.inf); return; } /* recurse into directory (assumes Unix) */ if ((st.st_mode & S_IFMT) == S_IFDIR) { char *roll, *item, *cut, *base, *bigger; size_t len, hold; DIR *here; struct dirent *next; /* accumulate list of entries (need to do this, since readdir() behavior not defined if directory modified between calls) */ here = opendir(g.inf); if (here == NULL) return; hold = 512; roll = malloc(hold); if (roll == NULL) bail("not enough memory", ""); *roll = 0; item = roll; while ((next = readdir(here)) != NULL) { if (next->d_name[0] == 0 || (next->d_name[0] == '.' && (next->d_name[1] == 0 || (next->d_name[1] == '.' && next->d_name[2] == 0)))) continue; len = strlen(next->d_name) + 1; if (item + len + 1 > roll + hold) { do { /* make roll bigger */ hold <<= 1; } while (item + len + 1 > roll + hold); bigger = realloc(roll, hold); if (bigger == NULL) { free(roll); bail("not enough memory", ""); } item = bigger + (item - roll); roll = bigger; } strcpy(item, next->d_name); item += len; *item = 0; } closedir(here); /* run process() for each entry in the directory */ cut = base = g.inf + strlen(g.inf); if (base > g.inf && base[-1] != (unsigned char)'/') { if ((size_t)(base - g.inf) >= sizeof(g.inf)) bail("path too long", g.inf); *base++ = '/'; } item = roll; while (*item) { strncpy(base, item, sizeof(g.inf) - (base - g.inf)); if (g.inf[sizeof(g.inf) - 1]) { strcpy(g.inf + (sizeof(g.inf) - 4), "..."); bail("path too long: ", g.inf); } process(g.inf); item += strlen(item) + 1; } *cut = 0; /* release list of entries */ free(roll); return; } /* don't compress .gz (or provided suffix) files, unless -f */ if (!(g.force || g.list || g.decode) && len >= strlen(g.sufx) && strcmp(g.inf + len - strlen(g.sufx), g.sufx) == 0) { complain("%s ends with %s -- skipping", g.inf, g.sufx); return; } /* create output file only if input file has compressed suffix */ if (g.decode == 1 && !g.pipeout && !g.list) { int suf = compressed_suffix(g.inf); if (suf == 0) { complain("%s does not have compressed suffix -- skipping", g.inf); return; } len -= suf; } /* open input file */ g.ind = open(g.inf, O_RDONLY, 0); if (g.ind < 0) bail("read error on ", g.inf); /* prepare gzip header information for compression */ g.name = g.headis & 1 ? justname(g.inf) : NULL; g.mtime = g.headis & 2 ? st.st_mtime : 0; } SET_BINARY_MODE(g.ind); /* if decoding or testing, try to read gzip header */ g.hname = NULL; if (g.decode) { in_init(); method = get_header(1); if (method != 8 && method != 257 && /* gzip -cdf acts like cat on uncompressed input */ !(method == -2 && g.force && g.pipeout && g.decode != 2 && !g.list)) { RELEASE(g.hname); if (g.ind != 0) close(g.ind); if (method != -1) complain(method < 0 ? "%s is not compressed -- skipping" : "%s has unknown compression method -- skipping", g.inf); return; } /* if requested, test input file (possibly a special list) */ if (g.decode == 2) { if (method == 8) infchk(); else { unlzw(); if (g.list) { g.in_tot -= 3; show_info(method, 0, g.out_tot, 0); } } RELEASE(g.hname); if (g.ind != 0) close(g.ind); return; } } /* if requested, just list information about input file */ if (g.list) { list_info(); RELEASE(g.hname); if (g.ind != 0) close(g.ind); return; } /* create output file out, descriptor outd */ if (path == NULL || g.pipeout) { /* write to stdout */ g.outf = malloc(strlen("") + 1); if (g.outf == NULL) bail("not enough memory", ""); strcpy(g.outf, ""); g.outd = 1; if (!g.decode && !g.force && isatty(g.outd)) bail("trying to write compressed data to a terminal", " (use -f to force)"); } else { char *to, *repl; /* use header name for output when decompressing with -N */ to = g.inf; if (g.decode && (g.headis & 1) != 0 && g.hname != NULL) { to = g.hname; len = strlen(g.hname); } /* replace .tgx with .tar when decoding */ repl = g.decode && strcmp(to + len, ".tgz") ? "" : ".tar"; /* create output file and open to write */ g.outf = malloc(len + (g.decode ? strlen(repl) : strlen(g.sufx)) + 1); if (g.outf == NULL) bail("not enough memory", ""); memcpy(g.outf, to, len); strcpy(g.outf + len, g.decode ? repl : g.sufx); g.outd = open(g.outf, O_CREAT | O_TRUNC | O_WRONLY | (g.force ? 0 : O_EXCL), 0600); /* if exists and not -f, give user a chance to overwrite */ if (g.outd < 0 && errno == EEXIST && isatty(0) && g.verbosity) { int ch, reply; fprintf(stderr, "%s exists -- overwrite (y/n)? ", g.outf); fflush(stderr); reply = -1; do { ch = getchar(); if (reply < 0 && ch != ' ' && ch != '\t') reply = ch == 'y' || ch == 'Y' ? 1 : 0; } while (ch != EOF && ch != '\n' && ch != '\r'); if (reply == 1) g.outd = open(g.outf, O_CREAT | O_TRUNC | O_WRONLY, 0600); } /* if exists and no overwrite, report and go on to next */ if (g.outd < 0 && errno == EEXIST) { complain("%s exists -- skipping", g.outf); RELEASE(g.outf); RELEASE(g.hname); if (g.ind != 0) close(g.ind); return; } /* if some other error, give up */ if (g.outd < 0) bail("write error on ", g.outf); } SET_BINARY_MODE(g.outd); RELEASE(g.hname); /* process ind to outd */ if (g.verbosity > 1) fprintf(stderr, "%s to %s ", g.inf, g.outf); if (g.decode) { if (method == 8) infchk(); else if (method == 257) unlzw(); else cat(); } #ifndef NOTHREAD else if (g.procs > 1) parallel_compress(); #endif else single_compress(0); if (g.verbosity > 1) { putc('\n', stderr); fflush(stderr); } /* finish up, copy attributes, set times, delete original */ if (g.ind != 0) close(g.ind); if (g.outd != 1) { if (close(g.outd)) bail("write error on ", g.outf); g.outd = -1; /* now prevent deletion on interrupt */ if (g.ind != 0) { copymeta(g.inf, g.outf); if (!g.keep) unlink(g.inf); } if (g.decode && (g.headis & 2) != 0 && g.stamp) touch(g.outf, g.stamp); } RELEASE(g.outf); } local char *helptext[] = { "Usage: pigz [options] [files ...]", " will compress files in place, adding the suffix '.gz'. If no files are", #ifdef NOTHREAD " specified, stdin will be compressed to stdout. pigz does what gzip does.", #else " specified, stdin will be compressed to stdout. pigz does what gzip does,", " but spreads the work over multiple processors and cores when compressing.", #endif "", "Options:", " -0 to -9, -11 Compression level (11 is much slower, a few % better)", " --fast, --best Compression levels 1 and 9 respectively", " -b, --blocksize mmm Set compression block size to mmmK (default 128K)", " -c, --stdout Write all processed output to stdout (won't delete)", " -d, --decompress Decompress the compressed input", " -f, --force Force overwrite, compress .gz, links, and to terminal", " -h, --help Display a help screen and quit", " -i, --independent Compress blocks independently for damage recovery", " -k, --keep Do not delete original file after processing", " -K, --zip Compress to PKWare zip (.zip) single entry format", " -l, --list List the contents of the compressed input", " -L, --license Display the pigz license and quit", " -n, --no-name Do not store or restore file name in/from header", " -N, --name Store/restore file name and mod time in/from header", #ifndef NOTHREAD " -p, --processes n Allow up to n compression threads (default is the", " number of online processors, or 8 if unknown)", #endif " -q, --quiet Print no messages, even on error", " -r, --recursive Process the contents of all subdirectories", " -R, --rsyncable Input-determined block locations for rsync", " -S, --suffix .sss Use suffix .sss instead of .gz (for compression)", " -t, --test Test the integrity of the compressed input", " -T, --no-time Do not store or restore mod time in/from header", #ifdef DEBUG " -v, --verbose Provide more verbose output (-vv to debug)", #else " -v, --verbose Provide more verbose output", #endif " -V --version Show the version of pigz", " -z, --zlib Compress to zlib (.zz) instead of gzip format", " -- All arguments after \"--\" are treated as files" }; /* display the help text above */ local void help(void) { int n; if (g.verbosity == 0) return; for (n = 0; n < (int)(sizeof(helptext) / sizeof(char *)); n++) fprintf(stderr, "%s\n", helptext[n]); fflush(stderr); exit(0); } #ifndef NOTHREAD /* try to determine the number of processors */ local int nprocs(int n) { # ifdef _SC_NPROCESSORS_ONLN n = (int)sysconf(_SC_NPROCESSORS_ONLN); # else # ifdef _SC_NPROC_ONLN n = (int)sysconf(_SC_NPROC_ONLN); # else # ifdef __hpux struct pst_dynamic psd; if (pstat_getdynamic(&psd, sizeof(psd), (size_t)1, 0) != -1) n = psd.psd_proc_cnt; # endif # endif # endif return n; } #endif /* set option defaults */ local void defaults(void) { g.level = Z_DEFAULT_COMPRESSION; #ifdef NOTHREAD g.procs = 1; #else g.procs = nprocs(8); #endif g.block = 131072UL; /* 128K */ g.rsync = 0; /* don't do rsync blocking */ g.setdict = 1; /* initialize dictionary each thread */ g.verbosity = 1; /* normal message level */ g.headis = 3; /* store/restore name and timestamp */ g.pipeout = 0; /* don't force output to stdout */ g.sufx = ".gz"; /* compressed file suffix */ g.decode = 0; /* compress */ g.list = 0; /* compress */ g.keep = 0; /* delete input file once compressed */ g.force = 0; /* don't overwrite, don't compress links */ g.recurse = 0; /* don't go into directories */ g.form = 0; /* use gzip format */ } /* long options conversion to short options */ local char *longopts[][2] = { {"LZW", "Z"}, {"ascii", "a"}, {"best", "9"}, {"bits", "Z"}, {"blocksize", "b"}, {"decompress", "d"}, {"fast", "1"}, {"force", "f"}, {"help", "h"}, {"independent", "i"}, {"keep", "k"}, {"license", "L"}, {"list", "l"}, {"name", "N"}, {"no-name", "n"}, {"no-time", "T"}, {"processes", "p"}, {"quiet", "q"}, {"recursive", "r"}, {"rsyncable", "R"}, {"silent", "q"}, {"stdout", "c"}, {"suffix", "S"}, {"test", "t"}, {"to-stdout", "c"}, {"uncompress", "d"}, {"verbose", "v"}, {"version", "V"}, {"zip", "K"}, {"zlib", "z"}}; #define NLOPTS (sizeof(longopts) / (sizeof(char *) << 1)) /* either new buffer size, new compression level, or new number of processes -- get rid of old buffers and threads to force the creation of new ones with the new settings */ local void new_opts(void) { single_compress(1); #ifndef NOTHREAD finish_jobs(); #endif } /* verify that arg is only digits, and if so, return the decimal value */ local size_t num(char *arg) { char *str = arg; size_t val = 0; if (*str == 0) bail("internal error: empty parameter", ""); do { if (*str < '0' || *str > '9') bail("invalid numeric parameter: ", arg); val = val * 10 + (*str - '0'); /* %% need to detect overflow here */ } while (*++str); return val; } /* process an option, return true if a file name and not an option */ local int option(char *arg) { static int get = 0; /* if not zero, look for option parameter */ char bad[3] = "-X"; /* for error messages (X is replaced) */ /* if no argument or dash option, check status of get */ if (get && (arg == NULL || *arg == '-')) { bad[1] = "bpS"[get - 1]; bail("missing parameter after ", bad); } if (arg == NULL) return 0; /* process long option or short options */ if (*arg == '-') { /* a single dash will be interpreted as stdin */ if (*++arg == 0) return 1; /* process long option (fall through with equivalent short option) */ if (*arg == '-') { int j; arg++; for (j = NLOPTS - 1; j >= 0; j--) if (strcmp(arg, longopts[j][0]) == 0) { arg = longopts[j][1]; break; } if (j < 0) bail("invalid option: ", arg - 2); } /* process short options (more than one allowed after dash) */ do { /* if looking for a parameter, don't process more single character options until we have the parameter */ if (get) { if (get == 3) bail("invalid usage: -s must be followed by space", ""); break; /* allow -pnnn and -bnnn, fall to parameter code */ } /* process next single character option */ bad[1] = *arg; switch (*arg) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': g.level = *arg - '0'; while (arg[1] >= '0' && arg[1] <= '9') g.level = g.level * 10 + *++arg - '0'; if (g.level == 10 || g.level > 11) bail("only levels 0..9 and 11 are allowed", ""); new_opts(); break; case 'K': g.form = 2; g.sufx = ".zip"; break; case 'L': fputs(VERSION, stderr); fputs("Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013" " Mark Adler\n", stderr); fputs("Subject to the terms of the zlib license.\n", stderr); fputs("No warranty is provided or implied.\n", stderr); exit(0); case 'N': g.headis = 3; break; case 'T': g.headis &= ~2; break; case 'R': g.rsync = 1; break; case 'S': get = 3; break; case 'V': fputs(VERSION, stderr); exit(0); case 'Z': bail("invalid option: LZW output not supported: ", bad); case 'a': bail("invalid option: ascii conversion not supported: ", bad); case 'b': get = 1; break; case 'c': g.pipeout = 1; break; case 'd': g.decode = 1; g.headis = 0; break; case 'f': g.force = 1; break; case 'h': help(); break; case 'i': g.setdict = 0; break; case 'k': g.keep = 1; break; case 'l': g.list = 1; break; case 'n': g.headis &= ~1; break; case 'p': get = 2; break; case 'q': g.verbosity = 0; break; case 'r': g.recurse = 1; break; case 't': g.decode = 2; break; case 'v': g.verbosity++; break; case 'z': g.form = 1; g.sufx = ".zz"; break; default: bail("invalid option: ", bad); } } while (*++arg); if (*arg == 0) return 0; } /* process option parameter for -b, -p, or -S */ if (get) { size_t n; if (get == 1) { n = num(arg); g.block = n << 10; /* chunk size */ if (g.block < DICT) bail("block size too small (must be >= 32K)", ""); if (n != g.block >> 10 || OUTPOOL(g.block) < g.block || (ssize_t)OUTPOOL(g.block) < 0 || g.block > (1UL << 22)) bail("block size too large: ", arg); new_opts(); } else if (get == 2) { n = num(arg); g.procs = (int)n; /* # processes */ if (g.procs < 1) bail("invalid number of processes: ", arg); if ((size_t)g.procs != n || INBUFS(g.procs) < 1) bail("too many processes: ", arg); #ifdef NOTHREAD if (g.procs > 1) bail("compiled without threads", ""); #endif new_opts(); } else if (get == 3) g.sufx = arg; /* gz suffix */ get = 0; return 0; } /* neither an option nor parameter */ return 1; } /* catch termination signal */ local void cut_short(int sig) { (void)sig; Trace(("termination by user")); if (g.outd != -1 && g.outf != NULL) unlink(g.outf); log_dump(); _exit(1); } /* Process arguments, compress in the gzip format. Note that procs must be at least two in order to provide a dictionary in one work unit for the other work unit, and that size must be at least 32K to store a full dictionary. */ int main(int argc, char **argv) { int n; /* general index */ int noop; /* true to suppress option decoding */ unsigned long done; /* number of named files processed */ char *opts, *p; /* environment default options, marker */ /* initialize globals */ g.outf = NULL; g.first = 1; g.warned = 0; g.hname = NULL; /* save pointer to program name for error messages */ p = strrchr(argv[0], '/'); p = p == NULL ? argv[0] : p + 1; g.prog = *p ? p : "pigz"; /* prepare for interrupts and logging */ signal(SIGINT, cut_short); #ifndef NOTHREAD yarn_prefix = g.prog; /* prefix for yarn error messages */ yarn_abort = cut_short; /* call on thread error */ #endif #ifdef DEBUG gettimeofday(&start, NULL); /* starting time for log entries */ log_init(); /* initialize logging */ #endif /* set all options to defaults */ defaults(); /* process user environment variable defaults in GZIP */ opts = getenv("GZIP"); if (opts != NULL) { while (*opts) { while (*opts == ' ' || *opts == '\t') opts++; p = opts; while (*p && *p != ' ' && *p != '\t') p++; n = *p; *p = 0; if (option(opts)) bail("cannot provide files in GZIP environment variable", ""); opts = p + (n ? 1 : 0); } option(NULL); } /* process user environment variable defaults in PIGZ as well */ opts = getenv("PIGZ"); if (opts != NULL) { while (*opts) { while (*opts == ' ' || *opts == '\t') opts++; p = opts; while (*p && *p != ' ' && *p != '\t') p++; n = *p; *p = 0; if (option(opts)) bail("cannot provide files in PIGZ environment variable", ""); opts = p + (n ? 1 : 0); } option(NULL); } /* decompress if named "unpigz" or "gunzip", to stdout if "*cat" */ if (strcmp(g.prog, "unpigz") == 0 || strcmp(g.prog, "gunzip") == 0) g.decode = 1, g.headis = 0; if ((n = strlen(g.prog)) > 2 && strcmp(g.prog + n - 3, "cat") == 0) g.decode = 1, g.headis = 0, g.pipeout = 1; /* if no arguments and compressed data to or from a terminal, show help */ if (argc < 2 && isatty(g.decode ? 0 : 1)) help(); /* process command-line arguments, no options after "--" */ done = noop = 0; for (n = 1; n < argc; n++) if (noop == 0 && strcmp(argv[n], "--") == 0) { noop = 1; option(NULL); } else if (noop || option(argv[n])) { /* true if file name, process it */ if (done == 1 && g.pipeout && !g.decode && !g.list && g.form > 1) complain("warning: output will be concatenated zip files -- " "will not be able to extract"); process(strcmp(argv[n], "-") ? argv[n] : NULL); done++; } option(NULL); /* list stdin or compress stdin to stdout if no file names provided */ if (done == 0) process(NULL); /* done -- release resources, show log */ new_opts(); log_dump(); return g.warned ? 2 : 0; } pigz-2.3/pigz.pdf0000644000076500000240000002106612115037061013123 0ustar madlerstaff%PDF-1.4 %쏢 5 0 obj <> stream xZr}W fpe87+Rr$(".ͧszsȍTA+ӧ|{!~nWxyU=\Co9x_⁜pRg /cp1$>]oO>_ypìH|z?\o˫՗\=»B ȺkOtc_ ^+$=T?o Ȕ$ԿǛ|c*Sj`Yd’4f׏7wǐs:DbۍD%ٟP=s8/`M!)AL֍$\}pw\8Wz"Q.dE!wRsS[*16ϊ4/r-gALD&TD˨EB]XJ,C(o},TF39E%l}&{cGH& O8u~V'c5Y&Ava3db>6w .#t)P&_X/+nѫow@Rs@(i%<KEu"fiM<,Z{ٟ{ǽWMyDzB,n#iy{[(>nz{*Um&, ݔ?X7 J+SL UAMSF*2l )ͩ)ǙY-$Kfe,Lj1 {m9tS $دDZj鮮V=epwIS*xc焁dTݦE%]PDZywp(<8pH3kp 8[zi'&,UxhSËIQ7{ F4urM9D᜙\x,c~[Y GnE:7L9\Ko\ j{[)?㲾sfl?Y_o7H `vp_'¤{6վ ̺3q]&?n)x*ccԎJ:a{m\$I:o' Ho+4{R;f0W@" 8הN <{ Se >yl"0-LHc'ұ,=Y;(w|ȗ<< h$ìTԥڪAbinL Nn=לJ0н-ʚURO"hJ\OD=B xD#~qv Uո?Y?CK`Zf+沕/gaa%4Q3>a֤jِ TsRɶ[5 WENqUj祚cTGyfEd;دv BPXOԪ ]șdh~ ym[}4#T9.5ΣEeLłA'X7jSLbmCw /?ɝꭂÓRujK&L ܄J)?fTH(sdcx|nHp(xl ] pj|RLSb.vJfpJ|pr"!nnRz"*wL0qf^ĺuȡVVx"G۔,vgI]˪^4I]/a6NΥd3)SMfRpH!s"?[oh\^T[u9!۰pa W9?+rawYLNyLWP*Q!W["7 dfrѷ֗#Kowp.3+PQi'Bj{M\$nqji1V @q(9օzxIen.O$!_254(t8l;1Ŏ7_%f JjOvy(rҼ5N)[7N4Q(S4Iv Fp4:\՛^(@MTlS AkB`k[;z:K_ͬ#&bMZd2-X,r!| 704LI*)>˾rPȩ3 M4]O)rf3Zڙ຦k00 PN|9q> stream xWnF}W,7\ ES'hݤ-Z"bG|tYN #B̜9sfxl.M}U̞٦l3 U~cAD#Xؼw3򧿯 qm9~0 )L+cI\V&CEUj4li:W+S<ذ M6Il$22ll9"^(pq+ɨ]z#pӿyvԿQYNhsm|]IUe3ƚ?́_ [x4wHa״(jžoQ%kՆGWhJ,zr*^0BٷN/'qaF#l:>LBD~~%endstream endobj 15 0 obj 1593 endobj 4 0 obj <> /Contents 5 0 R >> endobj 13 0 obj <> /Contents 14 0 R >> endobj 3 0 obj << /Type /Pages /Kids [ 4 0 R 13 0 R ] /Count 2 >> endobj 1 0 obj <> endobj 7 0 obj <>endobj 11 0 obj <> endobj 12 0 obj <> endobj 16 0 obj <> endobj 17 0 obj <> endobj 10 0 obj <> endobj 18 0 obj <> endobj 9 0 obj <> endobj 19 0 obj <> endobj 8 0 obj <> endobj 20 0 obj <> endobj 21 0 obj <>stream 2013-03-03T22:22:02-08:00 2013-03-03T22:22:02-08:00 groff version 1.19.2 Untitled endstream endobj 2 0 obj <>endobj xref 0 22 0000000000 65535 f 0000005877 00000 n 0000008011 00000 n 0000005811 00000 n 0000005489 00000 n 0000000015 00000 n 0000003783 00000 n 0000005942 00000 n 0000006439 00000 n 0000006288 00000 n 0000006143 00000 n 0000005983 00000 n 0000006013 00000 n 0000005649 00000 n 0000003803 00000 n 0000005468 00000 n 0000006063 00000 n 0000006093 00000 n 0000006232 00000 n 0000006373 00000 n 0000006519 00000 n 0000006588 00000 n trailer << /Size 22 /Root 1 0 R /Info 2 0 R /ID [<3233760EEA2A97C2C90BAB158FE756FA><3233760EEA2A97C2C90BAB158FE756FA>] >> startxref 8165 %%EOF pigz-2.3/pigz.spec0000644000076500000240000000166312005046640013306 0ustar madlerstaffSummary: pigz is a parallel implementation of gzip which utilizes multiple cores Name: pigz Version: 2.2.6 Release: 1 Source0: %{name}-%{version}.tar.gz License: zlib Group: Applications/Tools Packager: Duncan Brown BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) URL: http://www.zlib.net/pigz %description pigz, which stands for parallel implementation of gzip, is a fully functional replacement for gzip that exploits multiple processors and multiple cores to the hilt when compressing data. pigz was written by Mark Adler, and uses the zlib and pthread libraries. %clean rm -rf $RPM_BUILD_ROOT %prep mkdir -p $RPM_BUILD_ROOT %setup -q %build make mkdir -p ${RPM_BUILD_ROOT}/usr/bin mkdir -p ${RPM_BUILD_ROOT}/usr/man/man1 mv pigz unpigz ${RPM_BUILD_ROOT}/usr/bin mv pigz.1 ${RPM_BUILD_ROOT}/usr/man/man1 %files %defattr(-,root,root) /usr/bin/pigz /usr/bin/unpigz %doc /usr/man/man1/pigz.1 pigz-2.3/README0000644000076500000240000000421412005046640012334 0ustar madlerstaffpigz 2.2.6 (xx Oct 2012) by Mark Adler pigz, which stands for Parallel Implementation of GZip, is a fully functional replacement for gzip that exploits multiple processors and multiple cores to the hilt when compressing data. pigz was written by Mark Adler, and uses the zlib and pthread libraries. This version of pigz is written to be portable across Unix-style operating systems that provide the zlib and pthread libraries. Type "make" in this directory to build the "pigz" executable. You can then install the executable wherever you like in your path (e.g. /usr/local/bin/). Type "pigz" to see the command help and all of the command options. The latest version of pigz can be found at http://zlib.net/pigz/ . You need zlib version 1.2.3 or later to compile pigz. You can find the latest version of zlib at http://zlib.net/ . You can look in pigz.c for the change history. Questions, comments, bug reports, fixes, etc. can be emailed to Mark at his address in the license below. The license from pigz.c is copied here: This software is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. Mark Adler madler@alumni.caltech.edu Mark accepts donations for providing this software. Donations are not required or expected. Any amount that you feel is appropriate would be appreciated. You can use this link: https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=536055 pigz-2.3/yarn.c0000644000076500000240000002550711704133221012576 0ustar madlerstaff/* yarn.c -- generic thread operations implemented using pthread functions * Copyright (C) 2008, 2012 Mark Adler * Version 1.3 13 Jan 2012 Mark Adler * For conditions of distribution and use, see copyright notice in yarn.h */ /* Basic thread operations implemented using the POSIX pthread library. All pthread references are isolated within this module to allow alternate implementations with other thread libraries. See yarn.h for the description of these operations. */ /* Version history: 1.0 19 Oct 2008 First version 1.1 26 Oct 2008 No need to set the stack size -- remove Add yarn_abort() function for clean-up on error exit 1.2 19 Dec 2011 (changes reversed in 1.3) 1.3 13 Jan 2012 Add large file #define for consistency with pigz.c Update thread portability #defines per IEEE 1003.1-2008 Fix documentation in yarn.h for yarn_prefix */ /* for thread portability */ #define _XOPEN_SOURCE 700 #define _POSIX_C_SOURCE 200809L #define _THREAD_SAFE /* use large file functions if available */ #define _FILE_OFFSET_BITS 64 /* external libraries and entities referenced */ #include /* fprintf(), stderr */ #include /* exit(), malloc(), free(), NULL */ #include /* pthread_t, pthread_create(), pthread_join(), */ /* pthread_attr_t, pthread_attr_init(), pthread_attr_destroy(), PTHREAD_CREATE_JOINABLE, pthread_attr_setdetachstate(), pthread_self(), pthread_equal(), pthread_mutex_t, PTHREAD_MUTEX_INITIALIZER, pthread_mutex_init(), pthread_mutex_lock(), pthread_mutex_unlock(), pthread_mutex_destroy(), pthread_cond_t, PTHREAD_COND_INITIALIZER, pthread_cond_init(), pthread_cond_broadcast(), pthread_cond_wait(), pthread_cond_destroy() */ #include /* ENOMEM, EAGAIN, EINVAL */ /* interface definition */ #include "yarn.h" /* constants */ #define local static /* for non-exported functions and globals */ /* error handling external globals, resettable by application */ char *yarn_prefix = "yarn"; void (*yarn_abort)(int) = NULL; /* immediately exit -- use for errors that shouldn't ever happen */ local void fail(int err) { fprintf(stderr, "%s: %s (%d) -- aborting\n", yarn_prefix, err == ENOMEM ? "out of memory" : "internal pthread error", err); if (yarn_abort != NULL) yarn_abort(err); exit(err == ENOMEM || err == EAGAIN ? err : EINVAL); } /* memory handling routines provided by user -- if none are provided, malloc() and free() are used, which are therefore assumed to be thread-safe */ typedef void *(*malloc_t)(size_t); typedef void (*free_t)(void *); local malloc_t my_malloc_f = malloc; local free_t my_free = free; /* use user-supplied allocation routines instead of malloc() and free() */ void yarn_mem(malloc_t lease, free_t vacate) { my_malloc_f = lease; my_free = vacate; } /* memory allocation that cannot fail (from the point of view of the caller) */ local void *my_malloc(size_t size) { void *block; if ((block = my_malloc_f(size)) == NULL) fail(ENOMEM); return block; } /* -- lock functions -- */ struct lock_s { pthread_mutex_t mutex; pthread_cond_t cond; long value; }; lock *new_lock(long initial) { int ret; lock *bolt; bolt = my_malloc(sizeof(struct lock_s)); if ((ret = pthread_mutex_init(&(bolt->mutex), NULL)) || (ret = pthread_cond_init(&(bolt->cond), NULL))) fail(ret); bolt->value = initial; return bolt; } void possess(lock *bolt) { int ret; if ((ret = pthread_mutex_lock(&(bolt->mutex))) != 0) fail(ret); } void release(lock *bolt) { int ret; if ((ret = pthread_mutex_unlock(&(bolt->mutex))) != 0) fail(ret); } void twist(lock *bolt, enum twist_op op, long val) { int ret; if (op == TO) bolt->value = val; else if (op == BY) bolt->value += val; if ((ret = pthread_cond_broadcast(&(bolt->cond))) || (ret = pthread_mutex_unlock(&(bolt->mutex)))) fail(ret); } #define until(a) while(!(a)) void wait_for(lock *bolt, enum wait_op op, long val) { int ret; switch (op) { case TO_BE: until (bolt->value == val) if ((ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex))) != 0) fail(ret); break; case NOT_TO_BE: until (bolt->value != val) if ((ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex))) != 0) fail(ret); break; case TO_BE_MORE_THAN: until (bolt->value > val) if ((ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex))) != 0) fail(ret); break; case TO_BE_LESS_THAN: until (bolt->value < val) if ((ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex))) != 0) fail(ret); } } long peek_lock(lock *bolt) { return bolt->value; } void free_lock(lock *bolt) { int ret; if ((ret = pthread_cond_destroy(&(bolt->cond))) || (ret = pthread_mutex_destroy(&(bolt->mutex)))) fail(ret); my_free(bolt); } /* -- thread functions (uses lock functions above) -- */ struct thread_s { pthread_t id; int done; /* true if this thread has exited */ thread *next; /* for list of all launched threads */ }; /* list of threads launched but not joined, count of threads exited but not joined (incremented by ignition() just before exiting) */ local lock threads_lock = { PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER, 0 /* number of threads exited but not joined */ }; local thread *threads = NULL; /* list of extant threads */ /* structure in which to pass the probe and its payload to ignition() */ struct capsule { void (*probe)(void *); void *payload; }; /* mark the calling thread as done and alert join_all() */ local void reenter(void *dummy) { thread *match, **prior; pthread_t me; (void)dummy; /* find this thread in the threads list by matching the thread id */ me = pthread_self(); possess(&(threads_lock)); prior = &(threads); while ((match = *prior) != NULL) { if (pthread_equal(match->id, me)) break; prior = &(match->next); } if (match == NULL) fail(EINVAL); /* mark this thread as done and move it to the head of the list */ match->done = 1; if (threads != match) { *prior = match->next; match->next = threads; threads = match; } /* update the count of threads to be joined and alert join_all() */ twist(&(threads_lock), BY, +1); } /* all threads go through this routine so that just before the thread exits, it marks itself as done in the threads list and alerts join_all() so that the thread resources can be released -- use cleanup stack so that the marking occurs even if the thread is cancelled */ local void *ignition(void *arg) { struct capsule *capsule = arg; /* run reenter() before leaving */ pthread_cleanup_push(reenter, NULL); /* execute the requested function with argument */ capsule->probe(capsule->payload); my_free(capsule); /* mark this thread as done and let join_all() know */ pthread_cleanup_pop(1); /* exit thread */ return NULL; } /* not all POSIX implementations create threads as joinable by default, so that is made explicit here */ thread *launch(void (*probe)(void *), void *payload) { int ret; thread *th; struct capsule *capsule; pthread_attr_t attr; /* construct the requested call and argument for the ignition() routine (allocated instead of automatic so that we're sure this will still be there when ignition() actually starts up -- ignition() will free this allocation) */ capsule = my_malloc(sizeof(struct capsule)); capsule->probe = probe; capsule->payload = payload; /* assure this thread is in the list before join_all() or ignition() looks for it */ possess(&(threads_lock)); /* create the thread and call ignition() from that thread */ th = my_malloc(sizeof(struct thread_s)); if ((ret = pthread_attr_init(&attr)) || (ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE)) || (ret = pthread_create(&(th->id), &attr, ignition, capsule)) || (ret = pthread_attr_destroy(&attr))) fail(ret); /* put the thread in the threads list for join_all() */ th->done = 0; th->next = threads; threads = th; release(&(threads_lock)); return th; } void join(thread *ally) { int ret; thread *match, **prior; /* wait for thread to exit and return its resources */ if ((ret = pthread_join(ally->id, NULL)) != 0) fail(ret); /* find the thread in the threads list */ possess(&(threads_lock)); prior = &(threads); while ((match = *prior) != NULL) { if (match == ally) break; prior = &(match->next); } if (match == NULL) fail(EINVAL); /* remove thread from list and update exited count, free thread */ if (match->done) threads_lock.value--; *prior = match->next; release(&(threads_lock)); my_free(ally); } /* This implementation of join_all() only attempts to join threads that have announced that they have exited (see ignition()). When there are many threads, this is faster than waiting for some random thread to exit while a bunch of other threads have already exited. */ int join_all(void) { int ret, count; thread *match, **prior; /* grab the threads list and initialize the joined count */ count = 0; possess(&(threads_lock)); /* do until threads list is empty */ while (threads != NULL) { /* wait until at least one thread has reentered */ wait_for(&(threads_lock), NOT_TO_BE, 0); /* find the first thread marked done (should be at or near the top) */ prior = &(threads); while ((match = *prior) != NULL) { if (match->done) break; prior = &(match->next); } if (match == NULL) fail(EINVAL); /* join the thread (will be almost immediate), remove from the threads list, update the reenter count, and free the thread */ if ((ret = pthread_join(match->id, NULL)) != 0) fail(ret); threads_lock.value--; *prior = match->next; my_free(match); count++; } /* let go of the threads list and return the number of threads joined */ release(&(threads_lock)); return count; } /* cancel and join the thread -- the thread will cancel when it gets to a file operation, a sleep or pause, or a condition wait */ void destruct(thread *off_course) { int ret; if ((ret = pthread_cancel(off_course->id)) != 0) fail(ret); join(off_course); } pigz-2.3/yarn.h0000644000076500000240000001432611704133221012600 0ustar madlerstaff/* yarn.h -- generic interface for thread operations * Copyright (C) 2008, 2011 Mark Adler * Version 1.3 13 Jan 2012 Mark Adler */ /* This software is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. Mark Adler madler@alumni.caltech.edu */ /* Basic thread operations This interface isolates the local operating system implementation of threads from the application in order to facilitate platform independent use of threads. All of the implementation details are deliberately hidden. Assuming adequate system resources and proper use, none of these functions can fail. As a result, any errors encountered will cause an exit() to be executed. These functions allow the simple launching and joining of threads, and the locking of objects and synchronization of changes of objects. The latter is implemented with a single lock type that contains an integer value. The value can be ignored for simple exclusive access to an object, or the value can be used to signal and wait for changes to an object. -- Arguments -- thread *thread; identifier for launched thread, used by join void probe(void *); pointer to function "probe", run when thread starts void *payload; single argument passed to the probe function lock *lock; a lock with a value -- used for exclusive access to an object and to synchronize threads waiting for changes to an object long val; value to set lock, increment lock, or wait for int n; number of threads joined -- Thread functions -- thread = launch(probe, payload) - launch a thread -- exit via probe() return join(thread) - join a thread and by joining end it, waiting for the thread to exit if it hasn't already -- will free the resources allocated by launch() (don't try to join the same thread more than once) n = join_all() - join all threads launched by launch() that are not joined yet and free the resources allocated by the launches, usually to clean up when the thread processing is done -- join_all() returns an int with the count of the number of threads joined (join_all() should only be called from the main thread, and should only be called after any calls of join() have completed) destruct(thread) - terminate the thread in mid-execution and join it (depending on the implementation, the termination may not be immediate, but may wait for the thread to execute certain thread or file i/o operations) -- Lock functions -- lock = new_lock(val) - create a new lock with initial value val (lock is created in the released state) possess(lock) - acquire exclusive possession of a lock, waiting if necessary twist(lock, [TO | BY], val) - set lock to or increment lock by val, signal all threads waiting on this lock and then release the lock -- must possess the lock before calling (twist releases, so don't do a release() after a twist() on the same lock) wait_for(lock, [TO_BE | NOT_TO_BE | TO_BE_MORE_THAN | TO_BE_LESS_THAN], val) - wait on lock value to be, not to be, be greater than, or be less than val -- must possess the lock before calling, will possess the lock on return but the lock is released while waiting to permit other threads to use twist() to change the value and signal the change (so make sure that the object is in a usable state when waiting) release(lock) - release a possessed lock (do not try to release a lock that the current thread does not possess) val = peek_lock(lock) - return the value of the lock (assumes that lock is already possessed, no possess or release is done by peek_lock()) free_lock(lock) - free the resources allocated by new_lock() (application must assure that the lock is released before calling free_lock()) -- Memory allocation --- yarn_mem(better_malloc, better_free) - set the memory allocation and free routines for use by the yarn routines where the supplied routines have the same interface and operation as malloc() and free(), and may be provided in order to supply thread-safe memory allocation routines or for any other reason -- by default malloc() and free() will be used -- Error control -- yarn_prefix - a char pointer to a string that will be the prefix for any error messages that these routines generate before exiting -- if not changed by the application, "yarn" will be used yarn_abort - an external function that will be executed when there is an internal yarn error, due to out of memory or misuse -- this function may exit to abort the application, or if it returns, the yarn error handler will exit (set to NULL by default for no action) */ extern char *yarn_prefix; extern void (*yarn_abort)(int); void yarn_mem(void *(*)(size_t), void (*)(void *)); typedef struct thread_s thread; thread *launch(void (*)(void *), void *); void join(thread *); int join_all(void); void destruct(thread *); typedef struct lock_s lock; lock *new_lock(long); void possess(lock *); void release(lock *); enum twist_op { TO, BY }; void twist(lock *, enum twist_op, long); enum wait_op { TO_BE, /* or */ NOT_TO_BE, /* that is the question */ TO_BE_MORE_THAN, TO_BE_LESS_THAN }; void wait_for(lock *, enum wait_op, long); long peek_lock(lock *); void free_lock(lock *); pigz-2.3/zopfli/0000755000076500000240000000000012115033074012755 5ustar madlerstaffpigz-2.3/zopfli/blocksplitter.c0000644000076500000240000002212012114721072016000 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #include "blocksplitter.h" #include #include #include #include "deflate.h" #include "lz77.h" #include "squeeze.h" #include "tree.h" /* The "f" for the FindMinimum function below. i: the current parameter of f(i) context: for your implementation */ typedef double FindMinimumFun(size_t i, void* context); /* Finds minimum of function f(i) where is is of type size_t, f(i) is of type double, i is in range start-end (excluding end). */ static size_t FindMinimum(FindMinimumFun f, void* context, size_t start, size_t end) { if (end - start < 1024) { double best = LARGE_FLOAT; size_t result = start; size_t i; for (i = start; i < end; i++) { double v = f(i, context); if (v < best) { best = v; result = i; } } return result; } else { /* Try to find minimum faster by recursively checking multiple points. */ #define NUM 9 /* Good value: 9. */ size_t i; size_t p[NUM]; double vp[NUM]; size_t besti; double best; double lastbest = LARGE_FLOAT; size_t pos = start; for (;;) { if (end - start <= NUM) break; for (i = 0; i < NUM; i++) { p[i] = start + (i + 1) * ((end - start) / (NUM + 1)); vp[i] = f(p[i], context); } besti = 0; best = vp[0]; for (i = 1; i < NUM; i++) { if (vp[i] < best) { best = vp[i]; besti = i; } } if (best > lastbest) break; start = besti == 0 ? start : p[besti - 1]; end = besti == NUM - 1 ? end : p[besti + 1]; pos = p[besti]; lastbest = best; } return pos; #undef NUM } } /* Returns estimated cost of a block in bits. It includes the size to encode the tree and the size to encode all literal, length and distance symbols and their extra bits. litlens: lz77 lit/lengths dists: ll77 distances lstart: start of block lend: end of block (not inclusive) */ double EstimateCost(const unsigned short* litlens, const unsigned short* dists, size_t lstart, size_t lend) { return CalculateBlockSize(litlens, dists, lstart, lend, 2); } typedef struct SplitCostContext { const unsigned short* litlens; const unsigned short* dists; size_t llsize; size_t start; size_t end; } SplitCostContext; /* Gets the cost which is the sum of the cost of the left and the right section of the data. type: FindMinimumFun */ static double SplitCost(size_t i, void* context) { SplitCostContext* c = (SplitCostContext*)context; return EstimateCost(c->litlens, c->dists, c->start, i) + EstimateCost(c->litlens, c->dists, i, c->end); } static void AddSorted(size_t value, size_t** out, size_t* outsize) { size_t i; APPEND_DATA(value, out, outsize); if (*outsize > 0) { for (i = 0; i < *outsize - 1; i++) { if ((*out)[i] > value) { size_t j; for (j = *outsize - 1; j > i; j--) { (*out)[j] = (*out)[j - 1]; } (*out)[i] = value; break; } } } } /* Prints the block split points as decimal and hex values in the terminal. */ static void PrintBlockSplitPoints(const unsigned short* litlens, const unsigned short* dists, size_t llsize, const size_t* lz77splitpoints, size_t nlz77points) { size_t* splitpoints = 0; size_t npoints = 0; size_t i; /* The input is given as lz77 indices, but we want to see the uncompressed index values. */ size_t pos = 0; if (nlz77points > 0) { for (i = 0; i < llsize; i++) { size_t length = dists[i] == 0 ? 1 : litlens[i]; if (lz77splitpoints[npoints] == i) { APPEND_DATA(pos, &splitpoints, &npoints); if (npoints == nlz77points) break; } pos += length; } } assert(npoints == nlz77points); fprintf(stderr, "block split points: "); for (i = 0; i < npoints; i++) { fprintf(stderr, "%d ", (int)splitpoints[i]); } fprintf(stderr, "(hex:"); for (i = 0; i < npoints; i++) { fprintf(stderr, " %x", (int)splitpoints[i]); } fprintf(stderr, ")\n"); free(splitpoints); } /* Finds next block to try to split, the largest of the available ones. The largest is chosen to make sure that if only a limited amount of blocks is requested, their sizes are spread evenly. llsize: the size of the LL77 data, which is the size of the done array here. done: array indicating which blocks starting at that position are no longer splittable (splitting them increases rather than decreases cost). splitpoints: the splitpoints found so far. npoints: the amount of splitpoints found so far. lstart: output variable, giving start of block. lend: output variable, giving end of block. returns 1 if a block was found, 0 if no block found (all are done). */ static int FindLargestSplittableBlock( size_t llsize, const unsigned char* done, const size_t* splitpoints, size_t npoints, size_t* lstart, size_t* lend) { size_t longest = 0; int found = 0; size_t i; for (i = 0; i <= npoints; i++) { size_t start = i == 0 ? 0 : splitpoints[i - 1]; size_t end = i == npoints ? llsize - 1 : splitpoints[i]; if (!done[start] && end - start > longest) { *lstart = start; *lend = end; found = 1; longest = end - start; } } return found; } void BlockSplitLZ77(const Options* options, const unsigned short* litlens, const unsigned short* dists, size_t llsize, size_t maxblocks, size_t** splitpoints, size_t* npoints) { size_t lstart, lend; size_t i; size_t llpos = 0; size_t numblocks = 1; unsigned char* done; double splitcost, origcost; if (llsize < 10) return; /* This code fails on tiny files. */ done = (unsigned char*)malloc(llsize); if (!done) exit(-1); /* Allocation failed. */ for (i = 0; i < llsize; i++) done[i] = 0; lstart = 0; lend = llsize; for (;;) { SplitCostContext c; if (maxblocks > 0 && numblocks >= maxblocks) { break; } c.litlens = litlens; c.dists = dists; c.llsize = llsize; c.start = lstart; c.end = lend; assert(lstart < lend); llpos = FindMinimum(SplitCost, &c, lstart + 1, lend); assert(llpos > lstart); assert(llpos < lend); splitcost = EstimateCost(litlens, dists, lstart, llpos) + EstimateCost(litlens, dists, llpos, lend); origcost = EstimateCost(litlens, dists, lstart, lend); if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) { done[lstart] = 1; } else { AddSorted(llpos, splitpoints, npoints); numblocks++; } if (!FindLargestSplittableBlock( llsize, done, *splitpoints, *npoints, &lstart, &lend)) { break; /* No further split will probably reduce compression. */ } if (lend - lstart < 10) { break; } } if (options->verbose) { PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints); } free(done); } void BlockSplit(const Options* options, const unsigned char* in, size_t instart, size_t inend, size_t maxblocks, size_t** splitpoints, size_t* npoints) { size_t pos = 0; size_t i; BlockState s; size_t* lz77splitpoints = 0; size_t nlz77points = 0; LZ77Store store; InitLZ77Store(&store); s.options = options; s.blockstart = instart; s.blockend = inend; #ifdef USE_LONGEST_MATCH_CACHE s.lmc = 0; #endif *npoints = 0; *splitpoints = 0; /* Unintuitively, Using a simple LZ77 method here instead of LZ77Optimal results in better blocks. */ LZ77Greedy(&s, in, instart, inend, &store); BlockSplitLZ77(options, store.litlens, store.dists, store.size, maxblocks, &lz77splitpoints, &nlz77points); /* Convert LZ77 positions to positions in the uncompressed input. */ pos = instart; if (nlz77points > 0) { for (i = 0; i < store.size; i++) { size_t length = store.dists[i] == 0 ? 1 : store.litlens[i]; if (lz77splitpoints[*npoints] == i) { APPEND_DATA(pos, splitpoints, npoints); if (*npoints == nlz77points) break; } pos += length; } } assert(*npoints == nlz77points); free(lz77splitpoints); CleanLZ77Store(&store); } void BlockSplitSimple(const unsigned char* in, size_t instart, size_t inend, size_t blocksize, size_t** splitpoints, size_t* npoints) { size_t i = instart; while (i < inend) { APPEND_DATA(i, splitpoints, npoints); i += blocksize; } (void)in; } pigz-2.3/zopfli/blocksplitter.h0000644000076500000240000000515412114721072016015 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ /* Functions to choose good boundaries for block splitting. Deflate allows encoding the data in multiple blocks, with a separate Huffman tree for each block. The Huffman tree itself requires some bytes to encode, so by choosing certain blocks, you can either hurt, or enhance compression. These functions choose good ones that enhance it. */ #ifndef ZOPFLI_BLOCKSPLITTER_H_ #define ZOPFLI_BLOCKSPLITTER_H_ #include #include "util.h" /* Does blocksplitting on LZ77 data. The output splitpoints are indices in the LZ77 data. litlens: lz77 lit/lengths dists: lz77 distances llsize: size of litlens and dists maxblocks: set a limit to the amount of blocks. Set to 0 to mean no limit. */ void BlockSplitLZ77(const Options* options, const unsigned short* litlens, const unsigned short* dists, size_t llsize, size_t maxblocks, size_t** splitpoints, size_t* npoints); /* Does blocksplitting on uncompressed data. The output splitpoints are indices in the uncompressed bytes. options: general program options. in: uncompressed input data instart: where to start splitting inend: where to end splitting (not inclusive) maxblocks: maximum amount of blocks to split into, or 0 for no limit splitpoints: dynamic array to put the resulting split point coordinates into. The coordinates are indices in the input array. npoints: pointer to amount of splitpoints, for the dynamic array. The amount of blocks is the amount of splitpoitns + 1. */ void BlockSplit(const Options* options, const unsigned char* in, size_t instart, size_t inend, size_t maxblocks, size_t** splitpoints, size_t* npoints); /* Divides the input into equal blocks, does not even take LZ77 lengths into account. */ void BlockSplitSimple(const unsigned char* in, size_t instart, size_t inend, size_t blocksize, size_t** splitpoints, size_t* npoints); #endif /* ZOPFLI_BLOCKSPLITTER_H_ */ pigz-2.3/zopfli/cache.c0000644000076500000240000000677612114721072014205 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #include "cache.h" #include #include #include #ifdef USE_LONGEST_MATCH_CACHE void InitLongestMatchCache(size_t blocksize, LongestMatchCache* lmc) { size_t i; lmc->length = (unsigned short*)malloc(sizeof(unsigned short) * blocksize); lmc->dist = (unsigned short*)malloc(sizeof(unsigned short) * blocksize); /* Rather large amount of memory. */ lmc->sublen = (unsigned char*)malloc(NUM_CACHED_LENGTHS * 3 * blocksize); /* length > 0 and dist 0 is invalid combination, which indicates on purpose that this cache value is not filled in yet. */ for (i = 0; i < blocksize; i++) lmc->length[i] = 1; for (i = 0; i < blocksize; i++) lmc->dist[i] = 0; for (i = 0; i < NUM_CACHED_LENGTHS * blocksize * 3; i++) lmc->sublen[i] = 0; } void CleanLongestMatchCache(LongestMatchCache* lmc) { free(lmc->length); free(lmc->dist); free(lmc->sublen); } void SublenToCache(const unsigned short* sublen, size_t pos, size_t length, LongestMatchCache* lmc) { size_t i; size_t j = 0; unsigned bestlength = 0; unsigned char* cache; #if NUM_CACHED_LENGTHS == 0 return; #endif cache = &lmc->sublen[NUM_CACHED_LENGTHS * pos * 3]; if (length < 3) return; for (i = 3; i <= length; i++) { if (i == length || sublen[i] != sublen[i + 1]) { cache[j * 3] = i - 3; cache[j * 3 + 1] = sublen[i] % 256; cache[j * 3 + 2] = (sublen[i] >> 8) % 256; bestlength = i; j++; if (j >= NUM_CACHED_LENGTHS) break; } } if (j < NUM_CACHED_LENGTHS) { assert(bestlength == length); cache[(NUM_CACHED_LENGTHS - 1) * 3] = bestlength - 3; } else { assert(bestlength <= length); } assert(bestlength == MaxCachedSublen(lmc, pos, length)); } void CacheToSublen(const LongestMatchCache* lmc, size_t pos, size_t length, unsigned short* sublen) { size_t i, j; unsigned maxlength = MaxCachedSublen(lmc, pos, length); unsigned prevlength = 0; unsigned char* cache; #if NUM_CACHED_LENGTHS == 0 return; #endif if (length < 3) return; cache = &lmc->sublen[NUM_CACHED_LENGTHS * pos * 3]; for (j = 0; j < NUM_CACHED_LENGTHS; j++) { unsigned length = cache[j * 3] + 3; unsigned dist = cache[j * 3 + 1] + 256 * cache[j * 3 + 2]; for (i = prevlength; i <= length; i++) { sublen[i] = dist; } if (length == maxlength) break; prevlength = length + 1; } } /* Returns the length up to which could be stored in the cache. */ unsigned MaxCachedSublen(const LongestMatchCache* lmc, size_t pos, size_t length) { unsigned char* cache; #if NUM_CACHED_LENGTHS == 0 return 0; #endif cache = &lmc->sublen[NUM_CACHED_LENGTHS * pos * 3]; (void)length; if (cache[1] == 0 && cache[2] == 0) return 0; /* No sublen cached. */ return cache[(NUM_CACHED_LENGTHS - 1) * 3] + 3; } #endif /* USE_LONGEST_MATCH_CACHE */ pigz-2.3/zopfli/cache.h0000644000076500000240000000414212114721072014173 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ /* The cache that speeds up FindLongestMatch of lz77.c. */ #ifndef ZOPFLI_CACHE_H_ #define ZOPFLI_CACHE_H_ #include "util.h" #ifdef USE_LONGEST_MATCH_CACHE /* Cache used by FindLongestMatch to remember previously found length/dist values. This is needed because the squeeze runs will ask these values multiple times for the same position. Uses large amounts of memory, since it has to remember the distance belonging to every possible shorter-than-the-best length (the so called "sublen" array). */ typedef struct LongestMatchCache { unsigned short* length; unsigned short* dist; unsigned char* sublen; /* For each length, the distance */ } LongestMatchCache; /* Initializes the LongestMatchCache. */ void InitLongestMatchCache(size_t blocksize, LongestMatchCache* lmc); /* Frees up the memory of the LongestMatchCache. */ void CleanLongestMatchCache(LongestMatchCache* lmc); /* Stores sublen array in the cache. */ void SublenToCache(const unsigned short* sublen, size_t pos, size_t length, LongestMatchCache* lmc); /* Extracts sublen array from the cache. */ void CacheToSublen(const LongestMatchCache* lmc, size_t pos, size_t length, unsigned short* sublen); /* Returns the length up to which could be stored in the cache. */ unsigned MaxCachedSublen(const LongestMatchCache* lmc, size_t pos, size_t length); #endif /* USE_LONGEST_MATCH_CACHE */ #endif /* ZOPFLI_CACHE_H_ */ pigz-2.3/zopfli/COPYING0000644000076500000240000002611512114721072014016 0ustar madlerstaff Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2011 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. pigz-2.3/zopfli/deflate.c0000644000076500000240000005512312115033074014533 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ /* Modified by madler@alumni.caltech.edu (Mark Adler) Exposed DeflatePart() as an external function. */ #include "deflate.h" #include #include #include #include "blocksplitter.h" #include "lz77.h" #include "squeeze.h" #include "tree.h" static void AddBit(int bit, unsigned char* bp, unsigned char** out, size_t* outsize) { if (((*bp) & 7) == 0) APPEND_DATA(0, out, outsize); (*out)[*outsize - 1] |= bit << ((*bp) & 7); (*bp)++; } static void AddBits(unsigned symbol, unsigned length, unsigned char* bp, unsigned char** out, size_t* outsize) { /* TODO(lode): make more efficient (add more bits at once). */ unsigned i; for (i = 0; i < length; i++) { unsigned bit = (symbol >> i) & 1; if (((*bp) & 7) == 0) APPEND_DATA(0, out, outsize); (*out)[*outsize - 1] |= bit << ((*bp) & 7); (*bp)++; } } /* Adds bits, like AddBits, but the order is inverted. The deflate specification uses both orders in one standard. */ static void AddHuffmanBits(unsigned symbol, unsigned length, unsigned char* bp, unsigned char** out, size_t* outsize) { /* TODO(lode): make more efficient (add more bits at once). */ unsigned i; for (i = 0; i < length; i++) { unsigned bit = (symbol >> (length - i - 1)) & 1; if (((*bp) & 7) == 0) APPEND_DATA(0, out, outsize); (*out)[*outsize - 1] |= bit << ((*bp) & 7); (*bp)++; } } /* Ensures there are at least 2 distance codes to support buggy decoders. Zlib 1.2.1 and below have a bug where it fails if there isn't at least 1 distance code (with length > 0), even though it's valid according to the deflate spec to have 0 distance codes. On top of that, some mobile phones require at least two distance codes. To support these decoders too (but potentially at the cost of a few bytes), add dummy code lengths of 1. References to this bug can be found in the changelog of Zlib 1.2.2 and here: http://www.jonof.id.au/forum/index.php?topic=515.0. d_lengths: the 32 lengths of the distance codes. */ static void PatchDistanceCodesForBuggyDecoders(unsigned* d_lengths) { int num_dist_codes = 0; /* Amount of non-zero distance codes */ int i; for (i = 0; i < 30 /* Ignore the two unused codes from the spec */; i++) { if (d_lengths[i]) num_dist_codes++; if (num_dist_codes >= 2) return; /* Two or more codes is fine. */ } if (num_dist_codes == 0) { d_lengths[0] = d_lengths[1] = 1; } else if (num_dist_codes == 1) { d_lengths[d_lengths[0] ? 1 : 0] = 1; } } /* Gives the exact size of the tree, in bits, as it will be encoded in DEFLATE. */ size_t CalculateTreeSize(const unsigned* ll_lengths, const unsigned* d_lengths, size_t* ll_counts, size_t* d_counts) { unsigned char* dummy = 0; size_t dummysize = 0; unsigned char bp = 0; (void)ll_counts; (void)d_counts; AddDynamicTree(ll_lengths, d_lengths, &bp, &dummy, &dummysize); free(dummy); return dummysize * 8 + (bp & 7); } void AddDynamicTree(const unsigned* ll_lengths, const unsigned* d_lengths, unsigned char* bp, unsigned char** out, size_t* outsize) { unsigned* lld_lengths = 0; /* All litlen and dist lengthts with ending zeros trimmed together in one array. */ unsigned lld_total; /* Size of lld_lengths. */ unsigned* rle = 0; /* Runlength encoded version of lengths of litlen and dist trees. */ unsigned* rle_bits = 0; /* Extra bits for rle values 16, 17 and 18. */ size_t rle_size = 0; /* Size of rle array. */ size_t rle_bits_size = 0; /* Should have same value as rle_size. */ unsigned hlit = 29; /* 286 - 257 */ unsigned hdist = 29; /* 32 - 1, but gzip does not like hdist > 29.*/ unsigned hclen; size_t i, j; size_t clcounts[19]; unsigned clcl[19]; /* Code length code lengths. */ unsigned clsymbols[19]; /* The order in which code length code lengths are encoded as per deflate. */ unsigned order[19] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; /* Trim zeros. */ while (hlit > 0 && ll_lengths[257 + hlit - 1] == 0) hlit--; while (hdist > 0 && d_lengths[1 + hdist - 1] == 0) hdist--; lld_total = hlit + 257 + hdist + 1; lld_lengths = (unsigned*)malloc(sizeof(*lld_lengths) * lld_total); if (!lld_lengths) exit(-1); /* Allocation failed. */ for (i = 0; i < lld_total; i++) { lld_lengths[i] = i < 257 + hlit ? ll_lengths[i] : d_lengths[i - 257 - hlit]; assert(lld_lengths[i] < 16); } for (i = 0; i < lld_total; i++) { size_t count = 0; for (j = i; j < lld_total && lld_lengths[i] == lld_lengths[j]; j++) { count++; } if (count >= 4 || (count >= 3 && lld_lengths[i] == 0)) { if (lld_lengths[i] == 0) { if (count > 10) { if (count > 138) count = 138; APPEND_DATA(18, &rle, &rle_size); APPEND_DATA(count - 11, &rle_bits, &rle_bits_size); } else { APPEND_DATA(17, &rle, &rle_size); APPEND_DATA(count - 3, &rle_bits, &rle_bits_size); } } else { unsigned repeat = count - 1; /* Since the first one is hardcoded. */ APPEND_DATA(lld_lengths[i], &rle, &rle_size); APPEND_DATA(0, &rle_bits, &rle_bits_size); while (repeat >= 6) { APPEND_DATA(16, &rle, &rle_size); APPEND_DATA(6 - 3, &rle_bits, &rle_bits_size); repeat -= 6; } if (repeat >= 3) { APPEND_DATA(16, &rle, &rle_size); APPEND_DATA(3 - 3, &rle_bits, &rle_bits_size); repeat -= 3; } while (repeat != 0) { APPEND_DATA(lld_lengths[i], &rle, &rle_size); APPEND_DATA(0, &rle_bits, &rle_bits_size); repeat--; } } i += count - 1; } else { APPEND_DATA(lld_lengths[i], &rle, &rle_size); APPEND_DATA(0, &rle_bits, &rle_bits_size); } assert(rle[rle_size - 1] <= 18); } for (i = 0; i < 19; i++) { clcounts[i] = 0; } for (i = 0; i < rle_size; i++) { clcounts[rle[i]]++; } CalculateBitLengths(clcounts, 19, 7, clcl); LengthsToSymbols(clcl, 19, 7, clsymbols); hclen = 15; /* Trim zeros. */ while (hclen > 0 && clcounts[order[hclen + 4 - 1]] == 0) hclen--; AddBits(hlit, 5, bp, out, outsize); AddBits(hdist, 5, bp, out, outsize); AddBits(hclen, 4, bp, out, outsize); for (i = 0; i < hclen + 4; i++) { AddBits(clcl[order[i]], 3, bp, out, outsize); } for (i = 0; i < rle_size; i++) { unsigned symbol = clsymbols[rle[i]]; AddHuffmanBits(symbol, clcl[rle[i]], bp, out, outsize); /* Extra bits. */ if (rle[i] == 16) AddBits(rle_bits[i], 2, bp, out, outsize); else if (rle[i] == 17) AddBits(rle_bits[i], 3, bp, out, outsize); else if (rle[i] == 18) AddBits(rle_bits[i], 7, bp, out, outsize); } free(lld_lengths); free(rle); free(rle_bits); } /* Adds all lit/len and dist codes from the lists as huffman symbols. Does not add end code 256. expected_data_size is the uncompressed block size, used for assert, but you can set it to 0 to not do the assertion. */ void AddLZ77Data(const unsigned short* litlens, const unsigned short* dists, size_t lstart, size_t lend, size_t expected_data_size, const unsigned* ll_symbols, const unsigned* ll_lengths, const unsigned* d_symbols, const unsigned* d_lengths, unsigned char* bp, unsigned char** out, size_t* outsize) { size_t testlength = 0; size_t i; for (i = lstart; i < lend; i++) { unsigned dist = dists[i]; unsigned litlen = litlens[i]; if (dist == 0) { assert(litlen < 256); assert(ll_lengths[litlen] > 0); AddHuffmanBits(ll_symbols[litlen], ll_lengths[litlen], bp, out, outsize); testlength++; } else { unsigned lls = GetLengthSymbol(litlen); unsigned ds = GetDistSymbol(dist); assert(litlen >= 3 && litlen <= 288); assert(ll_lengths[lls] > 0); assert(d_lengths[ds] > 0); AddHuffmanBits(ll_symbols[lls], ll_lengths[lls], bp, out, outsize); AddBits(GetLengthExtraBitsValue(litlen), GetLengthExtraBits(litlen), bp, out, outsize); AddHuffmanBits(d_symbols[ds], d_lengths[ds], bp, out, outsize); AddBits(GetDistExtraBitsValue(dist), GetDistExtraBits(dist), bp, out, outsize); testlength += litlen; } } assert(expected_data_size == 0 || testlength == expected_data_size); } void GetFixedTree(unsigned* ll_lengths, unsigned* d_lengths) { size_t i; for (i = 0; i < 144; i++) ll_lengths[i] = 8; for (i = 144; i < 256; i++) ll_lengths[i] = 9; for (i = 256; i < 280; i++) ll_lengths[i] = 7; for (i = 280; i < 288; i++) ll_lengths[i] = 8; for (i = 0; i < 32; i++) d_lengths[i] = 5; } /* Calculates size of the part after the header and tree of an LZ77 block, in bits. */ size_t CalculateBlockSymbolSize(const unsigned* ll_lengths, const unsigned* d_lengths, const unsigned short* litlens, const unsigned short* dists, size_t lstart, size_t lend) { size_t result = 0; size_t i; for (i = lstart; i < lend; i++) { if (dists[i] == 0) { result += ll_lengths[litlens[i]]; } else { result += ll_lengths[GetLengthSymbol(litlens[i])]; result += d_lengths[GetDistSymbol(dists[i])]; result += GetLengthExtraBits(litlens[i]); result += GetDistExtraBits(dists[i]); } } result += ll_lengths[256]; /*end symbol*/ return result; } double CalculateBlockSize( const unsigned short* litlens, const unsigned short* dists, size_t lstart, size_t lend, int btype) { size_t ll_counts[288]; size_t d_counts[32]; unsigned ll_lengths[288]; unsigned d_lengths[32]; double result = 3; /*bfinal and btype bits*/ assert(btype == 1 || btype == 2); /* This is not for uncompressed blocks. */ if(btype == 1) { GetFixedTree(ll_lengths, d_lengths); } else { GetLZ77Counts(litlens, dists, lstart, lend, ll_counts, d_counts); CalculateBitLengths(ll_counts, 288, 15, ll_lengths); CalculateBitLengths(d_counts, 32, 15, d_lengths); PatchDistanceCodesForBuggyDecoders(d_lengths); result += CalculateTreeSize(ll_lengths, d_lengths, ll_counts, d_counts); } result += CalculateBlockSymbolSize( ll_lengths, d_lengths, litlens, dists, lstart, lend); return result; } /* Adds a deflate block with the given LZ77 data to the output. options: global program options btype: the block type, must be 1 or 2 final: whether to set the "final" bit on this block, must be the last block litlens: literal/length array of the LZ77 data, in the same format as in LZ77Store. dists: distance array of the LZ77 data, in the same format as in LZ77Store. lstart: where to start in the LZ77 data lend: where to end in the LZ77 data (not inclusive) expected_data_size: the uncompressed block size, used for assert, but you can set it to 0 to not do the assertion. bp: output bit pointer out: dynamic output array to append to outsize: dynamic output array size */ void AddLZ77Block(const Options* options, int btype, int final, const unsigned short* litlens, const unsigned short* dists, size_t lstart, size_t lend, size_t expected_data_size, unsigned char* bp, unsigned char** out, size_t* outsize) { size_t ll_counts[288]; size_t d_counts[32]; unsigned ll_lengths[288]; unsigned d_lengths[32]; unsigned ll_symbols[288]; unsigned d_symbols[32]; size_t detect_block_size = *outsize; size_t compressed_size; size_t uncompressed_size = 0; size_t i; AddBit(final, bp, out, outsize); AddBit(btype & 1, bp, out, outsize); AddBit((btype & 2) >> 1, bp, out, outsize); if (btype == 1) { /* Fixed block. */ GetFixedTree(ll_lengths, d_lengths); } else { /* Dynamic block. */ unsigned detect_tree_size; assert(btype == 2); GetLZ77Counts(litlens, dists, lstart, lend, ll_counts, d_counts); CalculateBitLengths(ll_counts, 288, 15, ll_lengths); CalculateBitLengths(d_counts, 32, 15, d_lengths); PatchDistanceCodesForBuggyDecoders(d_lengths); detect_tree_size = *outsize; AddDynamicTree(ll_lengths, d_lengths, bp, out, outsize); if (options->verbose) { fprintf(stderr, "treesize: %d\n", (int)(*outsize - detect_tree_size)); } /* Assert that for every present symbol, the code length is non-zero. */ /* TODO(lode): remove this in release version. */ for (i = 0; i < 288; i++) assert(ll_counts[i] == 0 || ll_lengths[i] > 0); for (i = 0; i < 32; i++) assert(d_counts[i] == 0 || d_lengths[i] > 0); } LengthsToSymbols(ll_lengths, 288, 15, ll_symbols); LengthsToSymbols(d_lengths, 32, 15, d_symbols); detect_block_size = *outsize; AddLZ77Data(litlens, dists, lstart, lend, expected_data_size, ll_symbols, ll_lengths, d_symbols, d_lengths, bp, out, outsize); /* End symbol. */ AddHuffmanBits(ll_symbols[256], ll_lengths[256], bp, out, outsize); for (i = lstart; i < lend; i++) { uncompressed_size += dists[i] == 0 ? 1 : litlens[i]; } compressed_size = *outsize - detect_block_size; if (options->verbose) { fprintf(stderr, "compressed block size: %d (%dk) (unc: %d)\n", (int)compressed_size, (int)(compressed_size / 1024), (int)(uncompressed_size)); } } void DeflateDynamicBlock(const Options* options, int final, const unsigned char* in, size_t instart, size_t inend, unsigned char* bp, unsigned char** out, size_t* outsize) { BlockState s; size_t blocksize = inend - instart; LZ77Store store; int btype = 2; InitLZ77Store(&store); s.options = options; s.blockstart = instart; s.blockend = inend; #ifdef USE_LONGEST_MATCH_CACHE s.lmc = (LongestMatchCache*)malloc(sizeof(LongestMatchCache)); InitLongestMatchCache(blocksize, s.lmc); #endif LZ77Optimal(&s, in, instart, inend, &store); /* For small block, encoding with fixed tree can be smaller. For large block, don't bother doing this expensive test, dynamic tree will be better.*/ if (store.size < 1000) { double dyncost, fixedcost; LZ77Store fixedstore; InitLZ77Store(&fixedstore); LZ77OptimalFixed(&s, in, instart, inend, &fixedstore); dyncost = CalculateBlockSize(store.litlens, store.dists, 0, store.size, 2); fixedcost = CalculateBlockSize(fixedstore.litlens, fixedstore.dists, 0, fixedstore.size, 1); if (fixedcost < dyncost) { btype = 1; CleanLZ77Store(&store); store = fixedstore; } else { CleanLZ77Store(&fixedstore); } } AddLZ77Block(s.options, btype, final, store.litlens, store.dists, 0, store.size, blocksize, bp, out, outsize); #ifdef USE_LONGEST_MATCH_CACHE CleanLongestMatchCache(s.lmc); free(s.lmc); #endif CleanLZ77Store(&store); } void DeflateFixedBlock(const Options* options, int final, const unsigned char* in, size_t instart, size_t inend, unsigned char* bp, unsigned char** out, size_t* outsize) { BlockState s; size_t blocksize = inend - instart; LZ77Store store; InitLZ77Store(&store); s.options = options; s.blockstart = instart; s.blockend = inend; #ifdef USE_LONGEST_MATCH_CACHE s.lmc = (LongestMatchCache*)malloc(sizeof(LongestMatchCache)); InitLongestMatchCache(blocksize, s.lmc); #endif LZ77OptimalFixed(&s, in, instart, inend, &store); AddLZ77Block(s.options, 1, final, store.litlens, store.dists, 0, store.size, blocksize, bp, out, outsize); #ifdef USE_LONGEST_MATCH_CACHE CleanLongestMatchCache(s.lmc); free(s.lmc); #endif CleanLZ77Store(&store); } void DeflateNonCompressedBlock(const Options* options, int final, const unsigned char* in, size_t instart, size_t inend, unsigned char* bp, unsigned char** out, size_t* outsize) { size_t i; size_t blocksize = inend - instart; unsigned short nlen = ~blocksize; (void)options; assert(blocksize < 65536); /* Non compressed blocks are max this size. */ AddBit(final, bp, out, outsize); /* BTYPE 00 */ AddBit(0, bp, out, outsize); AddBit(0, bp, out, outsize); /* Any bits of input up to the next byte boundary are ignored. */ *bp = 0; APPEND_DATA(blocksize % 256, out, outsize); APPEND_DATA((blocksize / 256) % 256, out, outsize); APPEND_DATA(nlen % 256, out, outsize); APPEND_DATA((nlen / 256) % 256, out, outsize); for (i = instart; i < inend; i++) { APPEND_DATA(in[i], out, outsize); } } void DeflateBlock(const Options* options, int btype, int final, const unsigned char* in, size_t instart, size_t inend, unsigned char* bp, unsigned char** out, size_t* outsize) { if (btype == 0) { DeflateNonCompressedBlock( options, final, in, instart, inend, bp, out, outsize); } else if (btype == 1) { DeflateFixedBlock(options, final, in, instart, inend, bp, out, outsize); } else { assert (btype == 2); DeflateDynamicBlock(options, final, in, instart, inend, bp, out, outsize); } } /* Does squeeze strategy where first block splitting is done, then each block is squeezed. Parameters: see description of the Deflate function. */ void DeflateSplittingFirst(const Options* options, int btype, int final, const unsigned char* in, size_t instart, size_t inend, unsigned char* bp, unsigned char** out, size_t* outsize) { size_t i; size_t* splitpoints = 0; size_t npoints = 0; if (btype == 0) { BlockSplitSimple(in, instart, inend, 65535, &splitpoints, &npoints); } else if (btype == 1) { /* If all blocks are fixed tree, splitting into separate blocks only increases the total size. Leave npoints at 0, this represents 1 block. */ } else { BlockSplit(options, in, instart, inend, options->blocksplittingmax, &splitpoints, &npoints); } for (i = 0; i <= npoints; i++) { size_t start = i == 0 ? instart : splitpoints[i - 1]; size_t end = i == npoints ? inend : splitpoints[i]; DeflateBlock(options, btype, i == npoints && final, in, start, end, bp, out, outsize); } free(splitpoints); } /* Does squeeze strategy where first the best possible lz77 is done, and then based on that data, block splitting is done. Parameters: see description of the Deflate function. */ void DeflateSplittingLast(const Options* options, int btype, int final, const unsigned char* in, size_t instart, size_t inend, unsigned char* bp, unsigned char** out, size_t* outsize) { size_t i; BlockState s; LZ77Store store; size_t* splitpoints = 0; size_t npoints = 0; if (btype == 0) { /* This function only supports LZ77 compression. DeflateSplittingFirst supports the special case of noncompressed data. Punt it to that one. */ DeflateSplittingFirst(options, btype, final, in, instart, inend, bp, out, outsize); } assert(btype == 1 || btype == 2); InitLZ77Store(&store); s.options = options; s.blockstart = instart; s.blockend = inend; #ifdef USE_LONGEST_MATCH_CACHE s.lmc = (LongestMatchCache*)malloc(sizeof(LongestMatchCache)); InitLongestMatchCache(inend - instart, s.lmc); #endif if (btype == 2) { LZ77Optimal(&s, in, instart, inend, &store); } else { assert (btype == 1); LZ77OptimalFixed(&s, in, instart, inend, &store); } if (btype == 1) { /* If all blocks are fixed tree, splitting into separate blocks only increases the total size. Leave npoints at 0, this represents 1 block. */ } else { BlockSplitLZ77(options, store.litlens, store.dists, store.size, options->blocksplittingmax, &splitpoints, &npoints); } for (i = 0; i <= npoints; i++) { size_t start = i == 0 ? 0 : splitpoints[i - 1]; size_t end = i == npoints ? store.size : splitpoints[i]; AddLZ77Block(options, btype, i == npoints && final, store.litlens, store.dists, start, end, 0, bp, out, outsize); } #ifdef USE_LONGEST_MATCH_CACHE CleanLongestMatchCache(s.lmc); free(s.lmc); #endif CleanLZ77Store(&store); } /* Deflate a part, to allow Deflate() to use multiple master blocks if needed. It is possible to call this function multiple times in a row, shifting instart and inend to next bytes of the data. If instart is larger than 0, then previous bytes are used as the initial dictionary for LZ77. This function will usually output multiple deflate blocks. If final is 1, then the final bit will be set on the last block. */ void DeflatePart(const Options* options, int btype, int final, const unsigned char* in, size_t instart, size_t inend, unsigned char* bp, unsigned char** out, size_t* outsize) { if (options->blocksplitting) { if (options->blocksplittinglast) { DeflateSplittingLast(options, btype, final, in, instart, inend, bp, out, outsize); } else { DeflateSplittingFirst(options, btype, final, in, instart, inend, bp, out, outsize); } } else { DeflateBlock(options, btype, final, in, instart, inend, bp, out, outsize); } } void Deflate(const Options* options, int btype, int final, const unsigned char* in, size_t insize, unsigned char* bp, unsigned char** out, size_t* outsize) { #if MASTER_BLOCK_SIZE == 0 DeflatePart(options, btype, final, in, 0, insize, bp, out, outsize); #else size_t i = 0; while (i < insize) { int masterfinal = (i + MASTER_BLOCK_SIZE >= insize); int final2 = final && masterfinal; size_t size = masterfinal ? insize - i : MASTER_BLOCK_SIZE; DeflatePart(options, btype, final2, in, i, i + size, bp, out, outsize); i += size; } #endif } pigz-2.3/zopfli/deflate.h0000644000076500000240000000610212115033074014531 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ /* Modified by madler@alumni.caltech.edu (Mark Adler) Exposed DeflatePart() as an external function. */ #ifndef ZOPFLI_DEFLATE_H_ #define ZOPFLI_DEFLATE_H_ /* Functions to compress compatible with the deflate specification. */ #include "util.h" /* Compresses according to the deflate specification and append the compressed result to the output. This function will usually output multiple deflate blocks. If final is 1, then the final bit will be set on the last block. options: global program options btype: the deflate block type. Use 2 for best compression. -0: non compressed blocks (00) -1: blocks with fixed tree (01) -2: blocks with dynamic tree (10) final: whether this is the last section of the input, sets the final bit to the last deflate block. in: the input bytes insize (Deflate() only): number of input bytes instart (DeflatePart() only): offset of the start of the data to compress at in if instart is not zero, then the data preceding instart will be used as the LZ77 dictionary inend (DeflatePart() only): offset + 1 of the end of the data to compress at in bp: bit pointer for the output array. This must initially be 0, and for consecutive calls must be reused (it can have values from 0-7). This is because deflate appends blocks as bit-based data, rather than on byte boundaries. out: pointer to the dynamic output array to which the result is appended. Must be freed after use. outsize: pointer to the dynamic output array size. */ void Deflate(const Options* options, int btype, int final, const unsigned char* in, size_t insize, unsigned char* bp, unsigned char** out, size_t* outsize); void DeflatePart(const Options* options, int btype, int final, const unsigned char* in, size_t instart, size_t inend, unsigned char* bp, unsigned char** out, size_t* outsize); /* Outputs the tree to a dynamic block (btype 10) according to the deflate specification. */ void AddDynamicTree(const unsigned* ll_lengths, const unsigned* d_lengths, unsigned char* bp, unsigned char** out, size_t* outsize); /* Calculates block size in bits. litlens: lz77 lit/lengths dists: ll77 distances lstart: start of block lend: end of block (not inclusive) */ double CalculateBlockSize( const unsigned short* litlens, const unsigned short* dists, size_t lstart, size_t lend, int btype); #endif /* ZOPFLI_DEFLATE_H_ */ pigz-2.3/zopfli/gzip_container.c0000644000076500000240000000642612114721072016145 0ustar madlerstaff/* Copyright 2013 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #include "gzip_container.h" #include #include "deflate.h" /* Table of CRCs of all 8-bit messages. */ static unsigned long crc_table[256]; /* Flag: has the table been computed? Initially false. */ static int crc_table_computed = 0; /* Makes the table for a fast CRC. */ void MakeCRCTable() { unsigned long c; int n, k; for (n = 0; n < 256; n++) { c = (unsigned long) n; for (k = 0; k < 8; k++) { if (c & 1) { c = 0xedb88320L ^ (c >> 1); } else { c = c >> 1; } } crc_table[n] = c; } crc_table_computed = 1; } /* Updates a running crc with the bytes buf[0..len-1] and returns the updated crc. The crc should be initialized to zero. */ unsigned long UpdateCRC(unsigned long crc, const unsigned char *buf, size_t len) { unsigned long c = crc ^ 0xffffffffL; unsigned n; if (!crc_table_computed) MakeCRCTable(); for (n = 0; n < len; n++) { c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8); } return c ^ 0xffffffffL; } /* Returns the CRC of the bytes buf[0..len-1]. */ unsigned long CRC(const unsigned char* buf, int len) { return UpdateCRC(0L, buf, len); } /* Compresses the data according to the gzip specification. */ void GzipCompress(const Options* options, const unsigned char* in, size_t insize, unsigned char** out, size_t* outsize) { unsigned long crcvalue = CRC(in, insize); unsigned char bp = 0; APPEND_DATA(31, out, outsize); /* ID1 */ APPEND_DATA(139, out, outsize); /* ID2 */ APPEND_DATA(8, out, outsize); /* CM */ APPEND_DATA(0, out, outsize); /* FLG */ /* MTIME */ APPEND_DATA(0, out, outsize); APPEND_DATA(0, out, outsize); APPEND_DATA(0, out, outsize); APPEND_DATA(0, out, outsize); APPEND_DATA(2, out, outsize); /* XFL, 2 indicates best compression. */ APPEND_DATA(3, out, outsize); /* OS follows Unix conventions. */ Deflate(options, 2 /* Dynamic block */, 1, in, insize, &bp, out, outsize); /* CRC */ APPEND_DATA(crcvalue % 256, out, outsize); APPEND_DATA((crcvalue >> 8) % 256, out, outsize); APPEND_DATA((crcvalue >> 16) % 256, out, outsize); APPEND_DATA((crcvalue >> 24) % 256, out, outsize); /* ISIZE */ APPEND_DATA(insize % 256, out, outsize); APPEND_DATA((insize >> 8) % 256, out, outsize); APPEND_DATA((insize >> 16) % 256, out, outsize); APPEND_DATA((insize >> 24) % 256, out, outsize); if (options->verbose) { fprintf(stderr, "Original Size: %d, Compressed: %d, Compression: %f%% Removed\n", (int)insize, (int)*outsize, 100.0f * (float)(insize - *outsize) / (float)insize); } } pigz-2.3/zopfli/gzip_container.h0000644000076500000240000000241412114721072016143 0ustar madlerstaff/* Copyright 2013 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #ifndef ZOPFLI_GZIP_H_ #define ZOPFLI_GZIP_H_ /* Functions to compress according to the Gzip specification. */ #include "util.h" /* Compresses according to the gzip specification and append the compressed result to the output. options: global program options out: pointer to the dynamic output array to which the result is appended. Must be freed after use. outsize: pointer to the dynamic output array size. */ void GzipCompress(const Options* options, const unsigned char* in, size_t insize, unsigned char** out, size_t* outsize); #endif /* ZOPFLI_GZIP_H_ */ pigz-2.3/zopfli/hash.c0000644000076500000240000000712212114721072014047 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #include "hash.h" #include #include #include #define HASH_SHIFT 5 #define HASH_MASK 32767 void InitHash(size_t window_size, Hash* h) { size_t i; h->val = 0; h->head = (int*)malloc(sizeof(*h->head) * 65536); h->prev = (unsigned short*)malloc(sizeof(*h->prev) * window_size); h->hashval = (int*)malloc(sizeof(*h->hashval) * window_size); for (i = 0; i < 65536; i++) { h->head[i] = -1; /* -1 indicates no head so far. */ } for (i = 0; i < window_size; i++) { h->prev[i] = i; /* If prev[j] == j, then prev[j] is uninitialized. */ h->hashval[i] = -1; } #ifdef USE_HASH_SAME h->same = (unsigned short*)malloc(sizeof(*h->same) * window_size); for (i = 0; i < window_size; i++) { h->same[i] = 0; } #endif #ifdef USE_HASH_SAME_HASH h->val2 = 0; h->head2 = (int*)malloc(sizeof(*h->head2) * 65536); h->prev2 = (unsigned short*)malloc(sizeof(*h->prev2) * window_size); h->hashval2 = (int*)malloc(sizeof(*h->hashval2) * window_size); for (i = 0; i < 65536; i++) { h->head2[i] = -1; } for (i = 0; i < window_size; i++) { h->prev2[i] = i; h->hashval2[i] = -1; } #endif } void CleanHash(Hash* h) { free(h->head); free(h->prev); free(h->hashval); #ifdef USE_HASH_SAME_HASH free(h->head2); free(h->prev2); free(h->hashval2); #endif #ifdef USE_HASH_SAME free(h->same); #endif } /* Update the sliding hash value with the given byte. All calls to this function must be made on consecutive input characters. Since the hash value exists out of multiple input bytes, a few warmups with this function are needed initially. */ static void UpdateHashValue(Hash* h, unsigned char c) { h->val = (((h->val) << HASH_SHIFT) ^ (c)) & HASH_MASK; } void UpdateHash(const unsigned char* array, size_t pos, size_t end, Hash* h) { unsigned short hpos = pos & WINDOW_MASK; #ifdef USE_HASH_SAME size_t amount = 0; #endif UpdateHashValue(h, pos + MIN_MATCH <= end ? array[pos + MIN_MATCH - 1] : 0); h->hashval[hpos] = h->val; if (h->head[h->val] != -1 && h->hashval[h->head[h->val]] == h->val) { h->prev[hpos] = h->head[h->val]; } else h->prev[hpos] = hpos; h->head[h->val] = hpos; #ifdef USE_HASH_SAME /* Update "same". */ if (h->same[(pos - 1) & WINDOW_MASK] > 1) { amount = h->same[(pos - 1) & WINDOW_MASK] - 1; } while (pos + amount + 1 < end && array[pos] == array[pos + amount + 1] && amount < (unsigned short)(-1)) { amount++; } h->same[hpos] = amount; #endif #ifdef USE_HASH_SAME_HASH h->val2 = ((h->same[hpos] - MIN_MATCH) & 255) ^ h->val; h->hashval2[hpos] = h->val2; if (h->head2[h->val2] != -1 && h->hashval2[h->head2[h->val2]] == h->val2) { h->prev2[hpos] = h->head2[h->val2]; } else h->prev2[hpos] = hpos; h->head2[h->val2] = hpos; #endif } void WarmupHash(const unsigned char* array, size_t pos, size_t end, Hash* h) { (void)end; UpdateHashValue(h, array[pos + 0]); UpdateHashValue(h, array[pos + 1]); } pigz-2.3/zopfli/hash.h0000644000076500000240000000417312114721072014057 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ /* The hash for FindLongestMatch of lz77.c. */ #ifndef ZOPFLI_HASH_H_ #define ZOPFLI_HASH_H_ #include "util.h" typedef struct Hash { int* head; /* Hash value to index of its most recent occurance. */ unsigned short* prev; /* Index to index of prev. occurance of same hash. */ int* hashval; /* Index to hash value at this index. */ int val; /* Current hash value. */ #ifdef USE_HASH_SAME_HASH /* Fields with similar purpose as the above hash, but for the second hash with a value that is calculated differently. */ int* head2; /* Hash value to index of its most recent occurance. */ unsigned short* prev2; /* Index to index of prev. occurance of same hash. */ int* hashval2; /* Index to hash value at this index. */ int val2; /* Current hash value. */ #endif #ifdef USE_HASH_SAME unsigned short* same; /* Amount of repetitions of same byte after this .*/ #endif } Hash; /* Allocates and initializes all fields of Hash. */ void InitHash(size_t window_size, Hash* h); /* Frees all fields of Hash. */ void CleanHash(Hash* h); /* Updates the hash values based on the current position in the array. All calls to this must be made for consecutive bytes. */ void UpdateHash(const unsigned char* array, size_t pos, size_t end, Hash* h); /* Prepopulates hash: Fills in the initial values in the hash, before UpdateHash can be used correctly. */ void WarmupHash(const unsigned char* array, size_t pos, size_t end, Hash* h); #endif /* ZOPFLI_HASH_H_ */ pigz-2.3/zopfli/katajainen.c0000644000076500000240000001735112114721072015236 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ /* Bounded package merge algorithm, based on the paper "A Fast and Space-Economical Algorithm for Length-Limited Coding Jyrki Katajainen, Alistair Moffat, Andrew Turpin". */ #include "katajainen.h" #include #include typedef struct Node Node; /* Nodes forming chains. Also used to represent leaves. */ struct Node { size_t weight; /* Total weight (symbol count) of this chain. */ Node* tail; /* Previous node(s) of this chain, or 0 if none. */ int count; /* Leaf symbol index, or number of leaves before this chain. */ char inuse; /* Tracking for garbage collection. */ }; /* Memory pool for nodes. */ typedef struct NodePool { Node* nodes; /* The pool. */ Node* next; /* Pointer to a possibly free node in the pool. */ int size; /* Size of the memory pool. */ } NodePool; /* Initializes a chain node with the given values and marks it as in use. */ static void InitNode(size_t weight, int count, Node* tail, Node* node) { node->weight = weight; node->count = count; node->tail = tail; node->inuse = 1; } /* Finds a free location in the memory pool. Performs garbage collection if needed. lists: If given, used to mark in-use nodes during garbage collection. maxbits: Size of lists. pool: Memory pool to get free node from. */ static Node* GetFreeNode(Node* (*lists)[2], int maxbits, NodePool* pool) { for (;;) { if (pool->next >= &pool->nodes[pool->size]) { /* Garbage collection. */ int i; for (i = 0; i < pool->size; i++) { pool->nodes[i].inuse = 0; } if (lists) { for (i = 0; i < maxbits * 2; i++) { Node* node; for (node = lists[i / 2][i % 2]; node; node = node->tail) { node->inuse = 1; } } } pool->next = &pool->nodes[0]; } if (!pool->next->inuse) break; /* Found one. */ pool->next++; } return pool->next++; } /* Performs a Boundary Package-Merge step. Puts a new chain in the given list. The new chain is, depending on the weights, a leaf or a combination of two chains from the previous list. lists: The lists of chains. maxbits: Number of lists. leaves: The leaves, one per symbol. numsymbols: Number of leaves. pool: the node memory pool. index: The index of the list in which a new chain or leaf is required. final: Whether this is the last time this function is called. If it is then it is no more needed to recursively call self. */ static void BoundaryPM(Node* (*lists)[2], int maxbits, Node* leaves, int numsymbols, NodePool* pool, int index, char final) { Node* newchain; Node* oldchain; int lastcount = lists[index][1]->count; /* Count of last chain of list. */ if (index == 0 && lastcount >= numsymbols) return; newchain = GetFreeNode(lists, maxbits, pool); oldchain = lists[index][1]; /* These are set up before the recursive calls below, so that there is a list pointing to the new node, to let the garbage collection know it's in use. */ lists[index][0] = oldchain; lists[index][1] = newchain; if (index == 0) { /* New leaf node in list 0. */ InitNode(leaves[lastcount].weight, lastcount + 1, 0, newchain); } else { size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight; if (lastcount < numsymbols && sum > leaves[lastcount].weight) { /* New leaf inserted in list, so count is incremented. */ InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail, newchain); } else { InitNode(sum, lastcount, lists[index - 1][1], newchain); if (!final) { /* Two lookahead chains of previous list used up, create new ones. */ BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0); BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0); } } } } /* Initializes each list with as lookahead chains the two leaves with lowest weights. */ static void InitLists( NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) { int i; Node* node0 = GetFreeNode(0, maxbits, pool); Node* node1 = GetFreeNode(0, maxbits, pool); InitNode(leaves[0].weight, 1, 0, node0); InitNode(leaves[1].weight, 2, 0, node1); for (i = 0; i < maxbits; i++) { lists[i][0] = node0; lists[i][1] = node1; } } /* Converts result of boundary package-merge to the bitlengths. The result in the last chain of the last list contains the amount of active leaves in each list. chain: Chain to extract the bit length from (last chain from last list). */ static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) { Node* node; for (node = chain; node; node = node->tail) { int i; for (i = 0; i < node->count; i++) { bitlengths[leaves[i].count]++; } } } /* Comparator for sorting the leaves. Has the function signature for qsort. */ static int LeafComparator(const void* a, const void* b) { return ((const Node*)a)->weight - ((const Node*)b)->weight; } int LengthLimitedCodeLengths( const size_t* frequencies, int n, int maxbits, unsigned* bitlengths) { NodePool pool; int i; int numsymbols = 0; /* Amount of symbols with frequency > 0. */ int numBoundaryPMRuns; /* Array of lists of chains. Each list requires only two lookahead chains at a time, so each list is a array of two Node*'s. */ Node* (*lists)[2]; /* One leaf per symbol. Only numsymbols leaves will be used. */ Node* leaves = (Node*)malloc(n * sizeof(*leaves)); /* Initialize all bitlengths at 0. */ for (i = 0; i < n; i++) { bitlengths[i] = 0; } /* Count used symbols and place them in the leaves. */ for (i = 0; i < n; i++) { if (frequencies[i]) { leaves[numsymbols].weight = frequencies[i]; leaves[numsymbols].count = i; /* Index of symbol this leaf represents. */ numsymbols++; } } /* Check special cases and error conditions. */ if ((1 << maxbits) < numsymbols) { free(leaves); return 1; /* Error, too few maxbits to represent symbols. */ } if (numsymbols == 0) { free(leaves); return 0; /* No symbols at all. OK. */ } if (numsymbols == 1) { bitlengths[leaves[0].count] = 1; free(leaves); return 0; /* Only one symbol, give it bitlength 1, not 0. OK. */ } /* Sort the leaves from lightest to heaviest. */ qsort(leaves, numsymbols, sizeof(Node), LeafComparator); /* Initialize node memory pool. */ pool.size = 2 * maxbits * (maxbits + 1); pool.nodes = (Node*)malloc(pool.size * sizeof(*pool.nodes)); pool.next = pool.nodes; for (i = 0; i < pool.size; i++) { pool.nodes[i].inuse = 0; } lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists)); InitLists(&pool, leaves, maxbits, lists); /* In the last list, 2 * numsymbols - 2 active chains need to be created. Two are already created in the initialization. Each BoundaryPM run creates one. */ numBoundaryPMRuns = 2 * numsymbols - 4; for (i = 0; i < numBoundaryPMRuns; i++) { char final = i == numBoundaryPMRuns - 1; BoundaryPM(lists, maxbits, leaves, numsymbols, &pool, maxbits - 1, final); } ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths); free(lists); free(leaves); free(pool.nodes); return 0; /* OK. */ } pigz-2.3/zopfli/katajainen.h0000644000076500000240000000272212114721072015237 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #ifndef ZOPFLI_KATAJAINEN_H_ #define ZOPFLI_KATAJAINEN_H_ #include /* Outputs minimum-redundancy length-limited code bitlengths for symbols with the given counts. The bitlengths are limited by maxbits. The output is tailored for DEFLATE: symbols that never occur, get a bit length of 0, and if only a single symbol occurs at least once, its bitlength will be 1, and not 0 as would theoretically be needed for a single symbol. frequencies: The amount of occurances of each symbol. n: The amount of symbols. maxbits: Maximum bit length, inclusive. bitlengths: Output, the bitlengths for the symbol prefix codes. return: 0 for OK, non-0 for error. */ int LengthLimitedCodeLengths( const size_t* frequencies, int n, int maxbits, unsigned* bitlengths); #endif /* ZOPFLI_KATAJAINEN_H_ */ pigz-2.3/zopfli/lz77.c0000644000076500000240000003276312114721072013740 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #include "lz77.h" #include #include #include void InitLZ77Store(LZ77Store* store) { store->size = 0; store->litlens = 0; store->dists = 0; } void CleanLZ77Store(LZ77Store* store) { free(store->litlens); free(store->dists); } void CopyLZ77Store( const LZ77Store* source, LZ77Store* dest) { size_t i; CleanLZ77Store(dest); dest->litlens = (unsigned short*)malloc(sizeof(*dest->litlens) * source->size); dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size); if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */ dest->size = source->size; for (i = 0; i < source->size; i++) { dest->litlens[i] = source->litlens[i]; dest->dists[i] = source->dists[i]; } } /* Appends the length and distance to the LZ77 arrays of the LZ77Store. context must be a LZ77Store*. */ void StoreLitLenDist(unsigned short length, unsigned short dist, LZ77Store* store) { size_t size2 = store->size; /* Needed for using APPEND_DATA twice. */ APPEND_DATA(length, &store->litlens, &store->size); APPEND_DATA(dist, &store->dists, &size2); } /* Gets the value of the length given the distance. Typically, the value of the length is the length, but if the distance is very long, decrease the value of the length a bit to make up for the fact that long distances use large amounts of extra bits. */ static int GetLengthValue(int length, int distance) { /* At distance > 1024, using length 3 is no longer good, due to the large amount of extra bits for the distance code. distance > 1024 uses 9+ extra bits, and this seems to be the sweet spot. */ return distance > 1024 ? length - 1 : length; } void VerifyLenDist(const unsigned char* data, size_t datasize, size_t pos, unsigned short dist, unsigned short length) { /* TODO(lode): make this only run in a debug compile, it's for assert only. */ size_t i; assert(pos + length <= datasize); for (i = 0; i < length; i++) { if (data[pos - dist + i] != data[pos + i]) { assert(data[pos - dist + i] == data[pos + i]); break; } } } /* Finds how long the match of scan and match is. Can be used to find how many bytes starting from scan, and from match, are equal. Returns the last byte after scan, which is still equal to the correspondinb byte after match. scan is the position to compare match is the earlier position to compare. end is the last possible byte, beyond which to stop looking. safe_end is a few (8) bytes before end, for comparing multiple bytes at once. */ static const unsigned char* GetMatch(const unsigned char* scan, const unsigned char* match, const unsigned char* end, const unsigned char* safe_end) { if (sizeof(size_t) == 8) { /* 8 checks at once per array bounds check (size_t is 64-bit). */ while (scan < safe_end && *((size_t*)scan) == *((size_t*)match)) { scan += 8; match += 8; } } else if (sizeof(unsigned int) == 4) { /* 4 checks at once per array bounds check (unsigned int is 32-bit). */ while (scan < safe_end && *((unsigned int*)scan) == *((unsigned int*)match)) { scan += 4; match += 4; } } else { /* do 8 checks at once per array bounds check. */ while (scan < safe_end && *scan == *match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match) { scan++; match++; } } /* The remaining few bytes. */ while (scan != end && *scan == *match) { scan++; match++; } return scan; } #ifdef USE_LONGEST_MATCH_CACHE /* Gets distance, length and sublen values from the cache if possible. Returns 1 if it got the values from the cache, 0 if not. Updates the limit value to a smaller one if possible with more limited information from the cache. */ int TryGetFromLongestMatchCache(BlockState* s, size_t pos, size_t* limit, unsigned short* sublen, unsigned short* distance, unsigned short* length) { /* The LMC cache starts at the beginning of the block rather than the beginning of the whole array. */ size_t lmcpos = pos - s->blockstart; /* Length > 0 and dist 0 is invalid combination, which indicates on purpose that this cache value is not filled in yet. */ unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 || s->lmc->dist[lmcpos] != 0); unsigned char limit_ok_for_cache = cache_available && (*limit == MAX_MATCH || s->lmc->length[lmcpos] <= *limit || (sublen && MaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos]) >= *limit)); if (s->lmc && limit_ok_for_cache && cache_available) { if (!sublen || s->lmc->length[lmcpos] <= MaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos])) { *length = s->lmc->length[lmcpos]; if (*length > *limit) *length = *limit; if (sublen) { CacheToSublen(s->lmc, lmcpos, *length, sublen); *distance = sublen[*length]; if (*limit == MAX_MATCH && *length >= MIN_MATCH) { assert(sublen[*length] == s->lmc->dist[lmcpos]); } } else { *distance = s->lmc->dist[lmcpos]; } return 1; } /* Can't use much of the cache, since the "sublens" need to be calculated, but at least we already know when to stop. */ *limit = s->lmc->length[lmcpos]; } return 0; } /* Stores the found sublen, distance and length in the longest match cache, if possible. */ void StoreInLongestMatchCache(BlockState* s, size_t pos, size_t limit, const unsigned short* sublen, unsigned short distance, unsigned short length) { /* The LMC cache starts at the beginning of the block rather than the beginning of the whole array. */ size_t lmcpos = pos - s->blockstart; /* Length > 0 and dist 0 is invalid combination, which indicates on purpose that this cache value is not filled in yet. */ unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 || s->lmc->dist[lmcpos] != 0); if (s->lmc && limit == MAX_MATCH && sublen && !cache_available) { assert(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0); s->lmc->dist[lmcpos] = length < MIN_MATCH ? 0 : distance; s->lmc->length[lmcpos] = length < MIN_MATCH ? 0 : length; assert(!(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0)); SublenToCache(sublen, lmcpos, length, s->lmc); } } #endif void FindLongestMatch(BlockState* s, const Hash* h, const unsigned char* array, size_t pos, size_t size, size_t limit, unsigned short* sublen, unsigned short* distance, unsigned short* length) { unsigned short hpos = pos & WINDOW_MASK, p, pp; unsigned short bestdist = 0; unsigned short bestlength = 1; const unsigned char* scan; const unsigned char* match; const unsigned char* arrayend; const unsigned char* arrayend_safe; #if MAX_CHAIN_HITS < WINDOW_SIZE int chain_counter = MAX_CHAIN_HITS; /* For quitting early. */ #endif unsigned dist = 0; /* Not unsigned short on purpose. */ int* hhead = h->head; unsigned short* hprev = h->prev; int* hhashval = h->hashval; int hval = h->val; #ifdef USE_LONGEST_MATCH_CACHE if (TryGetFromLongestMatchCache(s, pos, &limit, sublen, distance, length)) { assert(pos + *length <= size); return; } #endif assert(limit <= MAX_MATCH); assert(limit >= MIN_MATCH); assert(pos < size); if (size - pos < MIN_MATCH) { /* The rest of the code assumes there are at least MIN_MATCH bytes to try. */ *length = 0; *distance = 0; return; } if (pos + limit > size) { limit = size - pos; } arrayend = &array[pos] + limit; arrayend_safe = arrayend - 8; assert(hval < 65536); pp = hhead[hval]; /* During the whole loop, p == hprev[pp]. */ p = hprev[pp]; assert(pp == hpos); dist = p < pp ? pp - p : ((WINDOW_SIZE - p) + pp); /* Go through all distances. */ while (dist < WINDOW_SIZE) { unsigned short currentlength = 0; assert(p < WINDOW_SIZE); assert(p == hprev[pp]); assert(hhashval[p] == hval); if (dist > 0) { assert(pos < size); assert(dist <= pos); scan = &array[pos]; match = &array[pos - dist]; /* Testing the byte at position bestlength first, goes slightly faster. */ if (pos + bestlength >= size || *(scan + bestlength) == *(match + bestlength)) { #ifdef USE_HASH_SAME unsigned short same0 = h->same[pos & WINDOW_MASK]; if (same0 > 2 && *scan == *match) { unsigned short same1 = h->same[(pos - dist) & WINDOW_MASK]; unsigned short same = same0 < same1 ? same0 : same1; if (same > limit) same = limit; scan += same; match += same; } #endif scan = GetMatch(scan, match, arrayend, arrayend_safe); currentlength = scan - &array[pos]; /* The found length. */ } if (currentlength > bestlength) { if (sublen) { unsigned short j; for (j = bestlength + 1; j <= currentlength; j++) { sublen[j] = dist; } } bestdist = dist; bestlength = currentlength; if (currentlength >= limit) break; } } #ifdef USE_HASH_SAME_HASH /* Switch to the other hash once this will be more efficient. */ if (hhead != h->head2 && bestlength >= h->same[hpos] && h->val2 == h->hashval2[p]) { /* Now use the hash that encodes the length and first byte. */ hhead = h->head2; hprev = h->prev2; hhashval = h->hashval2; hval = h->val2; } #endif pp = p; p = hprev[p]; if (p == pp) break; /* Uninited prev value. */ dist += p < pp ? pp - p : ((WINDOW_SIZE - p) + pp); #if MAX_CHAIN_HITS < WINDOW_SIZE chain_counter--; if (chain_counter <= 0) break; #endif } #ifdef USE_LONGEST_MATCH_CACHE StoreInLongestMatchCache(s, pos, limit, sublen, bestdist, bestlength); #endif assert(bestlength <= limit); *distance = bestdist; *length = bestlength; assert(pos + *length <= size); } void LZ77Greedy(BlockState* s, const unsigned char* in, size_t instart, size_t inend, LZ77Store* store) { size_t i = 0, j; unsigned short leng; unsigned short dist; int lengvalue; size_t windowstart = instart > WINDOW_SIZE ? instart - WINDOW_SIZE : 0; unsigned short dummysublen[259]; Hash hash; Hash* h = &hash; #ifdef LAZY_MATCHING /* Lazy matching. */ unsigned prev_length = 0; unsigned prev_match = 0; int prevlengvalue; int match_available = 0; #endif if (instart == inend) return; InitHash(WINDOW_SIZE, h); WarmupHash(in, windowstart, inend, h); for (i = windowstart; i < instart; i++) { UpdateHash(in, i, inend, h); } for (i = instart; i < inend; i++) { UpdateHash(in, i, inend, h); FindLongestMatch(s, h, in, i, inend, MAX_MATCH, dummysublen, &dist, &leng); lengvalue = GetLengthValue(leng, dist); #ifdef LAZY_MATCHING /* Lazy matching. */ prevlengvalue = GetLengthValue(prev_length, prev_match); if (match_available) { match_available = 0; if (lengvalue > prevlengvalue + 1) { StoreLitLenDist(in[i - 1], 0, store); if (lengvalue >= MIN_MATCH && lengvalue < MAX_MATCH) { match_available = 1; prev_length = leng; prev_match = dist; continue; } } else { /* Add previous to output. */ leng = prev_length; dist = prev_match; lengvalue = prevlengvalue; /* Add to output. */ VerifyLenDist(in, inend, i - 1, dist, leng); StoreLitLenDist(leng, dist, store); for (j = 2; j < leng; j++) { assert(i < inend); i++; UpdateHash(in, i, inend, h); } continue; } } else if (lengvalue >= MIN_MATCH && leng < MAX_MATCH) { match_available = 1; prev_length = leng; prev_match = dist; continue; } /* End of lazy matching. */ #endif /* Add to output. */ if (lengvalue >= MIN_MATCH) { VerifyLenDist(in, inend, i, dist, leng); StoreLitLenDist(leng, dist, store); } else { leng = 1; StoreLitLenDist(in[i], 0, store); } for (j = 1; j < leng; j++) { assert(i < inend); i++; UpdateHash(in, i, inend, h); } } CleanHash(h); } void GetLZ77Counts(const unsigned short* litlens, const unsigned short* dists, size_t start, size_t end, size_t* ll_count, size_t* d_count) { size_t i; for (i = 0; i < 288; i++) { ll_count[i] = 0; } for (i = 0; i < 32; i++) { d_count[i] = 0; } for (i = start; i < end; i++) { if (dists[i] == 0) { ll_count[litlens[i]]++; } else { ll_count[GetLengthSymbol(litlens[i])]++; d_count[GetDistSymbol(dists[i])]++; } } ll_count[256] = 1; /* End symbol. */ } pigz-2.3/zopfli/lz77.h0000644000076500000240000001042112114721072013730 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ /* Functions for basic LZ77 compression and utilities for the "squeeze" LZ77 compression. */ #ifndef ZOPFLI_LZ77_H_ #define ZOPFLI_LZ77_H_ #include #include "cache.h" #include "hash.h" #include "util.h" /* Stores lit/length and dist pairs for LZ77. litlens: Contains the literal symbols or length values. dists: Indicates the distance, or 0 to indicate that there is no distance and litlens contains a literal instead of a length. litlens and dists both have the same size. */ typedef struct LZ77Store { unsigned short* litlens; /* Lit or len. */ unsigned short* dists; /* If 0: indicates literal in corresponding litlens, if > 0: length in corresponding litlens, this is the distance. */ size_t size; } LZ77Store; void InitLZ77Store(LZ77Store* store); void CleanLZ77Store(LZ77Store* store); void CopyLZ77Store(const LZ77Store* source, LZ77Store* dest); void StoreLitLenDist(unsigned short length, unsigned short dist, LZ77Store* store); /* Some state information for compressing a block. This is currently a bit under-used (with mainly only the longest match cache), but is kept for easy future expansion. */ typedef struct BlockState { const Options* options; #ifdef USE_LONGEST_MATCH_CACHE /* Cache for length/distance pairs found so far. */ LongestMatchCache* lmc; #endif /* The start (inclusive) and end (not inclusive) of the current block. */ size_t blockstart; size_t blockend; } BlockState; /* Finds the longest match (length and corresponding distance) for LZ77 compression. Even when not using "sublen", it can be more efficient to provide an array, because only then the caching is used. array: the data pos: position in the data to find the match for size: size of the data limit: limit length to maximum this value (default should be 258). This allows finding a shorter dist for that length (= less extra bits). Must be in the range [MIN_MATCH, MAX_MATCH]. sublen: output array of 259 elements, or null. Has, for each length, the smallest distance required to reach this length. Only 256 of its 259 values are used, the first 3 are ignored (the shortest length is 3. It is purely for convenience that the array is made 3 longer). */ void FindLongestMatch( BlockState *s, const Hash* h, const unsigned char* array, size_t pos, size_t size, size_t limit, unsigned short* sublen, unsigned short* distance, unsigned short* length); /* Verifies if length and dist are indeed valid, only used for assertion. */ void VerifyLenDist(const unsigned char* data, size_t datasize, size_t pos, unsigned short dist, unsigned short length); /* Counts the number of literal, length and distance symbols in the given lz77 arrays. litlens: lz77 lit/lengths dists: ll77 distances start: where to begin counting in litlens and dists end: where to stop counting in litlens and dists (not inclusive) ll_count: count of each lit/len symbol, must have size 288 (see deflate standard) d_count: count of each dist symbol, must have size 32 (see deflate standard) */ void GetLZ77Counts(const unsigned short* litlens, const unsigned short* dists, size_t start, size_t end, size_t* ll_count, size_t* d_count); /* Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than with the slow but better "squeeze" implementation. The result is placed in the LZ77Store. If instart is larger than 0, it uses values before instart as starting dictionary. */ void LZ77Greedy(BlockState* s, const unsigned char* in, size_t instart, size_t inend, LZ77Store* store); #endif /* ZOPFLI_LZ77_H_ */ pigz-2.3/zopfli/makefile0000644000076500000240000000014512114721072014456 0ustar madlerstaffmake: gcc *.c -O2 -W -Wall -Wextra -ansi -pedantic -lm -o zopfli debug: gcc *.c -g3 -lm -o zopfli pigz-2.3/zopfli/README0000644000076500000240000000203412114721072013635 0ustar madlerstaffZopfli Compression Algorithm is a compression library programmed in C to perform very good, but slow, deflate or zlib compression. zopfli.c is separate from the library and contains an example program to create very well compressed gzip files. The basic functions to compress data are Deflate in deflate.h, ZlibCompress in zlib_container.h and GzipCompress in gzip_container.h. Use the Options object to set parameters that affect the speed and compression. Use the InitOptions function to place the default values in the Options first. Deflate creates a valid deflate stream in memory, see: http://www.ietf.org/rfc/rfc1951.txt ZlibCompress creates a valid zlib stream in memory, see: http://www.ietf.org/rfc/rfc1950.txt GzipCompress creates a valid gzip stream in memory, see: http://www.ietf.org/rfc/rfc1952.txt This library can only compress, not decompress. Existing zlib or deflate libraries can decompress the data. Zopfli Compression Algorithm was created by Lode Vandevenne and Jyrki Alakuijala, based on an algorithm by Jyrki Alakuijala. pigz-2.3/zopfli/squeeze.c0000644000076500000240000004113012114721072014602 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #include "squeeze.h" #include #include #include #include "blocksplitter.h" #include "deflate.h" #include "tree.h" #include "util.h" typedef struct SymbolStats { /* The literal and length symbols. */ size_t litlens[288]; /* The 32 unique dist symbols, not the 32768 possible dists. */ size_t dists[32]; double ll_symbols[288]; /* Length of each lit/len symbol in bits. */ double d_symbols[32]; /* Length of each dist symbol in bits. */ } SymbolStats; /* Sets everything to 0. */ static void InitStats(SymbolStats* stats) { memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0])); memset(stats->dists, 0, 32 * sizeof(stats->dists[0])); memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0])); memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0])); } static void CopyStats(SymbolStats* source, SymbolStats* dest) { memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0])); memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0])); memcpy(dest->ll_symbols, source->ll_symbols, 288 * sizeof(dest->ll_symbols[0])); memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0])); } /* Adds the bit lengths. */ static void AddWeighedStatFreqs(const SymbolStats* stats1, double w1, const SymbolStats* stats2, double w2, SymbolStats* result) { size_t i; for (i = 0; i < 288; i++) { result->litlens[i] = (size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2); } for (i = 0; i < 32; i++) { result->dists[i] = (size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2); } result->litlens[256] = 1; /* End symbol. */ } /* Get random number: "Multiply-With-Carry" generator of G. Marsaglia */ static unsigned int Ran() { static unsigned int m_w = 1; static unsigned int m_z = 2; m_z = 36969 * (m_z & 65535) + (m_z >> 16); m_w = 18000 * (m_w & 65535) + (m_w >> 16); return (m_z << 16) + m_w; /* 32-bit result. */ } static void RandomizeFreqs(size_t* freqs, int n) { int i; for (i = 0; i < n; i++) { if ((Ran() >> 4) % 3 == 0) freqs[i] = freqs[Ran() % n]; } } static void RandomizeStatFreqs(SymbolStats* stats) { RandomizeFreqs(stats->litlens, 288); RandomizeFreqs(stats->dists, 32); stats->litlens[256] = 1; /* End symbol. */ } static void ClearStatFreqs(SymbolStats* stats) { size_t i; for (i = 0; i < 288; i++) stats->litlens[i] = 0; for (i = 0; i < 32; i++) stats->dists[i] = 0; } /* Function that calculates a cost based on a model for the given LZ77 symbol. litlen: means literal symbol if dist is 0, length otherwise. */ typedef double CostModelFun(unsigned litlen, unsigned dist, void* context); /* Cost model which should exactly match fixed tree. type: CostModelFun */ static double GetCostFixed(unsigned litlen, unsigned dist, void* unused) { (void)unused; if (dist == 0) { if (litlen <= 143) return 8; else return 9; } else { int dbits = GetDistExtraBits(dist); int lbits = GetLengthExtraBits(litlen); int lsym = GetLengthSymbol(litlen); double cost = 0; if (lsym <= 279) cost += 7; else cost += 8; cost += 5; /* Every dist symbol has length 5. */ return cost + dbits + lbits; } } /* Cost model based on symbol statistics. type: CostModelFun */ static double GetCostStat(unsigned litlen, unsigned dist, void* context) { SymbolStats* stats = (SymbolStats*)context; if (dist == 0) { return stats->ll_symbols[litlen]; } else { int lsym = GetLengthSymbol(litlen); int lbits = GetLengthExtraBits(litlen); int dsym = GetDistSymbol(dist); int dbits = GetDistExtraBits(dist); return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits; } } /* Finds the minimum possible cost this cost model can return for valid length and distance symbols. */ static double GetCostModelMinCost(CostModelFun* costmodel, void* costcontext) { double mincost; int bestlength = 0; /* length that has lowest cost in the cost model */ int bestdist = 0; /* distance that has lowest cost in the cost model */ int i; /* Table of distances that have a different distance symbol in the deflate specification. Each value is the first distance that has a new symbol. Only different symbols affect the cost model so only these need to be checked. See RFC 1951 section 3.2.5. Compressed blocks (length and distance codes). */ static const int dsymbols[30] = { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 }; mincost = LARGE_FLOAT; for (i = 3; i < 259; i++) { double c = costmodel(i, 1, costcontext); if (c < mincost) { bestlength = i; mincost = c; } } mincost = LARGE_FLOAT; for (i = 0; i < 30; i++) { double c = costmodel(3, dsymbols[i], costcontext); if (c < mincost) { bestdist = dsymbols[i]; mincost = c; } } return costmodel(bestlength, bestdist, costcontext); } /* Performs the forward pass for "squeeze". Gets the most optimal length to reach every byte from a previous byte, using cost calculations. s: the BlockState in: the input data array instart: where to start inend: where to stop (not inclusive) costmodel: function to calculate the cost of some lit/len/dist pair. costcontext: abstract context for the costmodel function length_array: output array of size (inend - instart) which will receive the best length to reach this byte from a previous byte. returns the cost that was, according to the costmodel, needed to get to the end. */ static double GetBestLengths(BlockState *s, const unsigned char* in, size_t instart, size_t inend, CostModelFun* costmodel, void* costcontext, unsigned short* length_array) { /* Best cost to get here so far. */ size_t blocksize = inend - instart; float* costs; size_t i = 0, k; unsigned short leng; unsigned short dist; unsigned short sublen[259]; size_t windowstart = instart > WINDOW_SIZE ? instart - WINDOW_SIZE : 0; Hash hash; Hash* h = &hash; double result; double mincost = GetCostModelMinCost(costmodel, costcontext); if (instart == inend) return 0; costs = (float*)malloc(sizeof(float) * (blocksize + 1)); if (!costs) exit(-1); /* Allocation failed. */ InitHash(WINDOW_SIZE, h); WarmupHash(in, windowstart, inend, h); for (i = windowstart; i < instart; i++) { UpdateHash(in, i, inend, h); } for (i = 1; i < blocksize + 1; i++) costs[i] = LARGE_FLOAT; costs[0] = 0; /* Because it's the start. */ length_array[0] = 0; for (i = instart; i < inend; i++) { size_t j = i - instart; /* Index in the costs array and length_array. */ UpdateHash(in, i, inend, h); #ifdef SHORTCUT_LONG_REPETITIONS /* If we're in a long repetition of the same character and have more than MAX_MATCH characters before and after our position. */ if (h->same[i & WINDOW_MASK] > MAX_MATCH * 2 && i > instart + MAX_MATCH + 1 && i + MAX_MATCH * 2 + 1 < inend && h->same[(i - MAX_MATCH) & WINDOW_MASK] > MAX_MATCH) { double symbolcost = costmodel(MAX_MATCH, 1, costcontext); /* Set the length to reach each one to MAX_MATCH, and the cost to the cost corresponding to that length. Doing this, we skip MAX_MATCH values to avoid calling FindLongestMatch. */ for (k = 0; k < MAX_MATCH; k++) { costs[j + MAX_MATCH] = costs[j] + symbolcost; length_array[j + MAX_MATCH] = MAX_MATCH; i++; j++; UpdateHash(in, i, inend, h); } } #endif FindLongestMatch(s, h, in, i, inend, MAX_MATCH, sublen, &dist, &leng); /* Literal. */ if (i + 1 <= inend) { double newCost = costs[j] + costmodel(in[i], 0, costcontext); assert(newCost >= 0); if (newCost < costs[j + 1]) { costs[j + 1] = newCost; length_array[j + 1] = 1; } } /* Lengths. */ for (k = 3; k <= leng && i + k <= inend; k++) { double newCost; /* Calling the cost model is expensive, avoid this if we are already at the minimum possible cost that it can return. */ if (costs[j + k] - costs[j] <= mincost) continue; newCost = costs[j] + costmodel(k, sublen[k], costcontext); assert(newCost >= 0); if (newCost < costs[j + k]) { assert(k <= MAX_MATCH); costs[j + k] = newCost; length_array[j + k] = k; } } } assert(costs[blocksize] >= 0); result = costs[blocksize]; CleanHash(h); free(costs); return result; } /* Calculates the optimal path of lz77 lengths to use, from the calculated length_array. The length_array must contain the optimal length to reach that byte. The path will be filled with the lengths to use, so its data size will be the amount of lz77 symbols. */ static void TraceBackwards(size_t size, const unsigned short* length_array, unsigned short** path, size_t* pathsize) { size_t index = size; if (size == 0) return; for (;;) { APPEND_DATA(length_array[index], path, pathsize); assert(length_array[index] <= index); assert(length_array[index] <= MAX_MATCH); assert(length_array[index] != 0); index -= length_array[index]; if (index == 0) break; } /* Mirror result. */ for (index = 0; index < *pathsize / 2; index++) { unsigned short temp = (*path)[index]; (*path)[index] = (*path)[*pathsize - index - 1]; (*path)[*pathsize - index - 1] = temp; } } static void FollowPath(BlockState* s, const unsigned char* in, size_t instart, size_t inend, unsigned short* path, size_t pathsize, LZ77Store* store) { size_t i, j, pos = 0; size_t windowstart = instart > WINDOW_SIZE ? instart - WINDOW_SIZE : 0; size_t total_length_test = 0; Hash hash; Hash* h = &hash; if (instart == inend) return; InitHash(WINDOW_SIZE, h); WarmupHash(in, windowstart, inend, h); for (i = windowstart; i < instart; i++) { UpdateHash(in, i, inend, h); } pos = instart; for (i = 0; i < pathsize; i++) { unsigned short length = path[i]; unsigned short dummy_length; unsigned short dist; assert(pos < inend); UpdateHash(in, pos, inend, h); /* Add to output. */ if (length >= MIN_MATCH) { /* Get the distance by recalculating longest match. The found length should match the length from the path. */ FindLongestMatch(s, h, in, pos, inend, length, 0, &dist, &dummy_length); assert(!(dummy_length != length && length > 2 && dummy_length > 2)); VerifyLenDist(in, inend, pos, dist, length); StoreLitLenDist(length, dist, store); total_length_test += length; } else { length = 1; StoreLitLenDist(in[pos], 0, store); total_length_test++; } assert(pos + length <= inend); for (j = 1; j < length; j++) { UpdateHash(in, pos + j, inend, h); } pos += length; } CleanHash(h); } /* Calculates the entropy of the statistics */ static void CalculateStatistics(SymbolStats* stats) { CalculateEntropy(stats->litlens, 288, stats->ll_symbols); CalculateEntropy(stats->dists, 32, stats->d_symbols); } /* Appends the symbol statistics from the store. */ static void GetStatistics(const LZ77Store* store, SymbolStats* stats) { size_t i; for (i = 0; i < store->size; i++) { if (store->dists[i] == 0) { stats->litlens[store->litlens[i]]++; } else { stats->litlens[GetLengthSymbol(store->litlens[i])]++; stats->dists[GetDistSymbol(store->dists[i])]++; } } stats->litlens[256] = 1; /* End symbol. */ CalculateStatistics(stats); } /* Does a single run for LZ77Optimal. For good compression, repeated runs with updated statistics should be performed. s: the block state in: the input data array instart: where to start inend: where to stop (not inclusive) path: pointer to dynamically allocated memory to store the path pathsize: pointer to the size of the dynamic path array length_array: array if size (inend - instart) used to store lengths costmodel: function to use as the cost model for this squeeze run costcontext: abstract context for the costmodel function store: place to output the LZ77 data returns the cost that was, according to the costmodel, needed to get to the end. This is not the actual cost. */ static double LZ77OptimalRun(BlockState* s, const unsigned char* in, size_t instart, size_t inend, unsigned short** path, size_t* pathsize, unsigned short* length_array, CostModelFun* costmodel, void* costcontext, LZ77Store* store) { double cost = GetBestLengths( s, in, instart, inend, costmodel, costcontext, length_array); free(*path); *path = 0; *pathsize = 0; TraceBackwards(inend - instart, length_array, path, pathsize); FollowPath(s, in, instart, inend, *path, *pathsize, store); assert(cost < LARGE_FLOAT); return cost; } void LZ77Optimal(BlockState *s, const unsigned char* in, size_t instart, size_t inend, LZ77Store* store) { /* Dist to get to here with smallest cost. */ size_t blocksize = inend - instart; unsigned short* length_array = (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1)); unsigned short* path = 0; size_t pathsize = 0; LZ77Store currentstore; SymbolStats stats, beststats, laststats; int i; double cost; double bestcost = LARGE_FLOAT; double lastcost = 0; /* Try randomizing the costs a bit once the size stabilizes. */ int lastrandomstep = -1; if (!length_array) exit(-1); /* Allocation failed. */ InitStats(&stats); InitLZ77Store(¤tstore); /* Do regular deflate, then loop multiple shortest path runs, each time using the statistics of the previous run. */ /* Initial run. */ LZ77Greedy(s, in, instart, inend, ¤tstore); GetStatistics(¤tstore, &stats); /* Repeat statistics with each time the cost model from the previous stat run. */ for (i = 0; i < s->options->numiterations; i++) { CleanLZ77Store(¤tstore); InitLZ77Store(¤tstore); LZ77OptimalRun(s, in, instart, inend, &path, &pathsize, length_array, GetCostStat, (void*)&stats, ¤tstore); cost = CalculateBlockSize(currentstore.litlens, currentstore.dists, 0, currentstore.size, 2); if (cost < bestcost) { /* Copy to the output store. */ CopyLZ77Store(¤tstore, store); CopyStats(&stats, &beststats); bestcost = cost; } CopyStats(&stats, &laststats); ClearStatFreqs(&stats); GetStatistics(¤tstore, &stats); if (lastrandomstep != -1) { /* This makes it converge slower but better. Do it only once the randomness kicks in so that if the user does few iterations, it gives a better result sooner. */ AddWeighedStatFreqs(&stats, 1.0, &laststats, 0.5, &stats); CalculateStatistics(&stats); } if (i > 5 && cost == lastcost) { CopyStats(&beststats, &stats); RandomizeStatFreqs(&stats); CalculateStatistics(&stats); lastrandomstep = i; } lastcost = cost; } free(length_array); free(path); CleanLZ77Store(¤tstore); } void LZ77OptimalFixed(BlockState *s, const unsigned char* in, size_t instart, size_t inend, LZ77Store* store) { /* Dist to get to here with smallest cost. */ size_t blocksize = inend - instart; unsigned short* length_array = (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1)); unsigned short* path = 0; size_t pathsize = 0; if (!length_array) exit(-1); /* Allocation failed. */ s->blockstart = instart; s->blockend = inend; /* Shortest path for fixed tree This one should give the shortest possible result for fixed tree, no repeated runs are needed since the tree is known. */ LZ77OptimalRun(s, in, instart, inend, &path, &pathsize, length_array, GetCostFixed, 0, store); free(length_array); free(path); } pigz-2.3/zopfli/squeeze.h0000644000076500000240000000404212114721072014610 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ /* The squeeze functions do enhanced LZ77 compression by optimal parsing with a cost model, rather than greedily choosing the longest length or using a single step of lazy matching like regular implementations. Since the cost model is based on the Huffman tree that can only be calculated after the LZ77 data is generated, there is a chicken and egg problem, and multiple runs are done with updated cost models to converge to a better solution. */ #ifndef ZOPFLI_SQUEEZE_H_ #define ZOPFLI_SQUEEZE_H_ #include "lz77.h" /* Calculates lit/len and dist pairs for given data. If instart is larger than 0, it uses values before instart as starting dictionary. */ void LZ77Optimal(BlockState *s, const unsigned char* in, size_t instart, size_t inend, LZ77Store* store); /* Does the same as LZ77Optimal, but optimized for the fixed tree of the deflate standard. The fixed tree rarely gives the best compression. But this gives the best possible LZ77 encoding possible with the fixed tree. This does not create or output any fixed tree, only LZ77 data optimized for using with a fixed tree. If instart is larger than 0, it uses values before instart as starting dictionary. */ void LZ77OptimalFixed(BlockState *s, const unsigned char* in, size_t instart, size_t inend, LZ77Store* store); #endif /* ZOPFLI_SQUEEZE_H_ */ pigz-2.3/zopfli/tree.c0000644000076500000240000000641312114721072014065 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #include "tree.h" #include #include #include #include #include "katajainen.h" #include "util.h" void LengthsToSymbols(const unsigned* lengths, size_t n, unsigned maxbits, unsigned* symbols) { size_t* bl_count = (size_t*)malloc(sizeof(size_t) * (maxbits + 1)); size_t* next_code = (size_t*)malloc(sizeof(size_t) * (maxbits + 1)); unsigned bits, i; unsigned code; for (i = 0; i < n; i++) { symbols[i] = 0; } /* 1) Count the number of codes for each code length. Let bl_count[N] be the number of codes of length N, N >= 1. */ for (bits = 0; bits <= maxbits; bits++) { bl_count[bits] = 0; } for (i = 0; i < n; i++) { assert(lengths[i] <= maxbits); bl_count[lengths[i]]++; } /* 2) Find the numerical value of the smallest code for each code length. */ code = 0; bl_count[0] = 0; for (bits = 1; bits <= maxbits; bits++) { code = (code + bl_count[bits-1]) << 1; next_code[bits] = code; } /* 3) Assign numerical values to all codes, using consecutive values for all codes of the same length with the base values determined at step 2. */ for (i = 0; i < n; i++) { unsigned len = lengths[i]; if (len != 0) { symbols[i] = next_code[len]; next_code[len]++; } } free(bl_count); free(next_code); } void CalculateEntropy(const size_t* count, size_t n, double* bitlengths) { static const double kInvLog2 = 1.4426950408889; /* 1.0 / log(2.0) */ unsigned sum = 0; unsigned i; double log2sum; for (i = 0; i < n; ++i) { sum += count[i]; } log2sum = (sum == 0 ? log(n) : log(sum)) * kInvLog2; for (i = 0; i < n; ++i) { /* When the count of the symbol is 0, but its cost is requested anyway, it means the symbol will appear at least once anyway, so give it the cost as if its count is 1.*/ if (count[i] == 0) bitlengths[i] = log2sum; else bitlengths[i] = log2sum - log(count[i]) * kInvLog2; /* Depending on compiler and architecture, the above subtraction of two floating point numbers may give a negative result very close to zero instead of zero (e.g. -5.973954e-17 with gcc 4.1.2 on Ubuntu 11.4). Clamp it to zero. These floating point imprecisions do not affect the cost model significantly so this is ok. */ if (bitlengths[i] < 0 && bitlengths[i] > -1e-5) bitlengths[i] = 0; assert(bitlengths[i] >= 0); } } void CalculateBitLengths(const size_t* count, size_t n, int maxbits, unsigned* bitlengths) { int error = LengthLimitedCodeLengths(count, n, maxbits, bitlengths); (void) error; assert(!error); } pigz-2.3/zopfli/tree.h0000644000076500000240000000315112114721072014066 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ /* Utilities for creating and using Huffman trees. */ #ifndef ZOPFLI_TREE_H_ #define ZOPFLI_TREE_H_ #include /* Calculates the bitlengths for the Huffman tree, based on the counts of each symbol. */ void CalculateBitLengths(const size_t* count, size_t n, int maxbits, unsigned *bitlengths); /* Converts a series of Huffman tree bitlengths, to the bit values of the symbols. */ void LengthsToSymbols(const unsigned* lengths, size_t n, unsigned maxbits, unsigned* symbols); /* Calculates the entropy of each symbol, based on the counts of each symbol. The result is similar to the result of CalculateBitLengths, but with the actual theoritical bit lengths according to the entropy. Since the resulting values are fractional, they cannot be used to encode the tree specified by DEFLATE. */ void CalculateEntropy(const size_t* count, size_t n, double* bitlengths); #endif /* ZOPFLI_TREE_H_ */ pigz-2.3/zopfli/util.c0000644000076500000240000001632012114721072014101 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #include "util.h" #include #include #include int GetDistExtraBits(int dist) { #ifdef __GNUC__ if (dist < 5) return 0; return (31 ^ __builtin_clz(dist - 1)) - 1; /* log2(dist - 1) - 1 */ #else if (dist < 5) return 0; else if (dist < 9) return 1; else if (dist < 17) return 2; else if (dist < 33) return 3; else if (dist < 65) return 4; else if (dist < 129) return 5; else if (dist < 257) return 6; else if (dist < 513) return 7; else if (dist < 1025) return 8; else if (dist < 2049) return 9; else if (dist < 4097) return 10; else if (dist < 8193) return 11; else if (dist < 16385) return 12; else return 13; #endif } int GetDistExtraBitsValue(int dist) { #ifdef __GNUC__ if (dist < 5) { return 0; } else { int l = 31 ^ __builtin_clz(dist - 1); /* log2(dist - 1) */ return (dist - (1 + (1 << l))) & ((1 << (l - 1)) - 1); } #else if (dist < 5) return 0; else if (dist < 9) return (dist - 5) & 1; else if (dist < 17) return (dist - 9) & 3; else if (dist < 33) return (dist - 17) & 7; else if (dist < 65) return (dist - 33) & 15; else if (dist < 129) return (dist - 65) & 31; else if (dist < 257) return (dist - 129) & 63; else if (dist < 513) return (dist - 257) & 127; else if (dist < 1025) return (dist - 513) & 255; else if (dist < 2049) return (dist - 1025) & 511; else if (dist < 4097) return (dist - 2049) & 1023; else if (dist < 8193) return (dist - 4097) & 2047; else if (dist < 16385) return (dist - 8193) & 4095; else return (dist - 16385) & 8191; #endif } int GetDistSymbol(int dist) { #ifdef __GNUC__ if (dist < 5) { return dist - 1; } else { int l = (31 ^ __builtin_clz(dist - 1)); /* log2(dist - 1) */ int r = ((dist - 1) >> (l - 1)) & 1; return l * 2 + r; } #else if (dist < 193) { if (dist < 13) { /* dist 0..13. */ if (dist < 5) return dist - 1; else if (dist < 7) return 4; else if (dist < 9) return 5; else return 6; } else { /* dist 13..193. */ if (dist < 17) return 7; else if (dist < 25) return 8; else if (dist < 33) return 9; else if (dist < 49) return 10; else if (dist < 65) return 11; else if (dist < 97) return 12; else if (dist < 129) return 13; else return 14; } } else { if (dist < 2049) { /* dist 193..2049. */ if (dist < 257) return 15; else if (dist < 385) return 16; else if (dist < 513) return 17; else if (dist < 769) return 18; else if (dist < 1025) return 19; else if (dist < 1537) return 20; else return 21; } else { /* dist 2049..32768. */ if (dist < 3073) return 22; else if (dist < 4097) return 23; else if (dist < 6145) return 24; else if (dist < 8193) return 25; else if (dist < 12289) return 26; else if (dist < 16385) return 27; else if (dist < 24577) return 28; else return 29; } } #endif } int GetLengthExtraBits(int l) { static const int table[259] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0 }; return table[l]; } int GetLengthExtraBitsValue(int l) { static const int table[259] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0 }; return table[l]; } /* Returns symbol in range [257-285] (inclusive). */ int GetLengthSymbol(int l) { static const int table[259] = { 0, 0, 0, 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285 }; return table[l]; } void InitOptions(Options* options) { options->verbose = 0; options->numiterations = 15; options->blocksplitting = 1; options->blocksplittinglast = 0; options->blocksplittingmax = 15; } pigz-2.3/zopfli/util.h0000644000076500000240000001573412114721072014116 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ /* Several utilities, including: #defines to try different compression results, basic deflate specification values and generic program options. */ #ifndef ZOPFLI_UTIL_H_ #define ZOPFLI_UTIL_H_ #include #include /* Minimum and maximum length that can be encoded in deflate. */ #define MAX_MATCH 258 #define MIN_MATCH 3 /* The window size for deflate. Must be a power of two. This should be 32768, the maximum possible by the deflate spec. Anything less hurts compression more than speed. */ #define WINDOW_SIZE 32768 /* The window mask used to wrap indices into the window. This is why the window size must be a power of two. */ #define WINDOW_MASK (WINDOW_SIZE - 1) /* A block structure of huge, non-smart, blocks to divide the input into, to allow operating on huge files without exceeding memory, such as the 1GB wiki9 corpus. The whole compression algorithm, including the smarter block splitting, will be executed independently on each huge block. Dividing into huge blocks hurts compression, but not much relative to the size. Set this to, for example, 20MB (20000000). Set it to 0 to disable master blocks. */ #define MASTER_BLOCK_SIZE 20000000 /* Used to initialize costs for example */ #define LARGE_FLOAT 1e30 /* For longest match cache. max 256. Uses huge amounts of memory but makes it faster. Uses this many times three bytes per single byte of the input data. This is so because longest match finding has to find the exact distance that belongs to each length for the best lz77 strategy. Good values: e.g. 5, 8. */ #define NUM_CACHED_LENGTHS 8 /* limit the max hash chain hits for this hash value. This has an effect only on files where the hash value is the same very often. On these files, this gives worse compression (the value should ideally be 32768, which is the WINDOW_SIZE, while zlib uses 4096 even for best level), but makes it faster on some specific files. Good value: e.g. 8192. */ #define MAX_CHAIN_HITS 8192 /* Whether to use the longest match cache for FindLongestMatch. This cache consumes a lot of memory but speeds it up. No effect on compression size. */ #define USE_LONGEST_MATCH_CACHE /* Enable to remember amount of successive identical bytes in the hash chain for finding longest match required for USE_HASH_SAME_HASH and SHORTCUT_LONG_REPETITIONS This has no effect on the compression result, and enabling it increases speed. */ #define USE_HASH_SAME /* Switch to a faster hash based on the info from USE_HASH_SAME once the best length so far is long enough. This is way faster for files with lots of identical bytes, on which the compressor is otherwise too slow. Regular files are unaffected or maybe a tiny bit slower. This has no effect on the compression result, only on speed. */ #define USE_HASH_SAME_HASH /* Enable this, to avoid slowness for files which are a repetition of the same character more than a multiple of MAX_MATCH times. This should not affect the compression result. */ #define SHORTCUT_LONG_REPETITIONS /* Whether to use lazy matching in the greedy LZ77 implementation. This gives a better result of LZ77Greedy, but the effect this has on the optimal LZ77 varies from file to file. */ #define LAZY_MATCHING /* Gets the symbol for the given length, cfr. the DEFLATE spec. Returns the symbol in the range [257-285] (inclusive) */ int GetLengthSymbol(int l); /* Gets the amount of extra bits for the given length, cfr. the DEFLATE spec. */ int GetLengthExtraBits(int l); /* Gets value of the extra bits for the given length, cfr. the DEFLATE spec. */ int GetLengthExtraBitsValue(int l); /* Gets the symbol for the given dist, cfr. the DEFLATE spec. */ int GetDistSymbol(int dist); /* Gets the amount of extra bits for the given dist, cfr. the DEFLATE spec. */ int GetDistExtraBits(int dist); /* Gets value of the extra bits for the given dist, cfr. the DEFLATE spec. */ int GetDistExtraBitsValue(int dist); /* Options used throughout the program. */ typedef struct Options { /* Whether to print output */ int verbose; /* Maximum amount of times to rerun forward and backward pass to optimize LZ77 compression cost. Good values: 10, 15 for small files, 5 for files over several MB in size or it will be too slow. */ int numiterations; /* If true, splits the data in multiple deflate blocks with optimal choice for the block boundaries. Block splitting gives better compression. Default: true (1). */ int blocksplitting; /* If true, chooses the optimal block split points only after doing the iterative LZ77 compression. If false, chooses the block split points first, then does iterative LZ77 on each individual block. Depending on the file, either first or last gives the best compression. Default: false (0). */ int blocksplittinglast; /* Maximum amount of blocks to split into (0 for unlimited, but this can give extreme results that hurt compression on some files). Default value: 15. */ int blocksplittingmax; } Options; /* Initializes options with default values. */ void InitOptions(Options* options); /* Appends value to dynamically allocated memory, doubling its allocation size whenever needed. value: the value to append, type T data: pointer to the dynamic array to append to, type T** size: pointer to the size of the array to append to, type size_t*. This is the size that you consider the array to be, not the internal allocation size. Precondition: allocated size of data is at least a power of two greater than or equal than *size. */ #ifdef __cplusplus /* C++ cannot assign void* from malloc to *data */ #define APPEND_DATA(/* T */ value, /* T** */ data, /* size_t* */ size) {\ if (!((*size) & ((*size) - 1))) {\ /*double alloc size if it's a power of two*/\ void** data_void = reinterpret_cast(data);\ *data_void = (*size) == 0 ? malloc(sizeof(**data))\ : realloc((*data), (*size) * 2 * sizeof(**data));\ }\ (*data)[(*size)] = (value);\ (*size)++;\ } #else /* C gives problems with strict-aliasing rules for (void**) cast */ #define APPEND_DATA(/* T */ value, /* T** */ data, /* size_t* */ size) {\ if (!((*size) & ((*size) - 1))) {\ /*double alloc size if it's a power of two*/\ (*data) = (*size) == 0 ? malloc(sizeof(**data))\ : realloc((*data), (*size) * 2 * sizeof(**data));\ }\ (*data)[(*size)] = (value);\ (*size)++;\ } #endif #endif /* ZOPFLI_UTIL_H_ */ pigz-2.3/zopfli/zlib_container.c0000644000076500000240000000442012114721072016124 0ustar madlerstaff/* Copyright 2013 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #include "zlib_container.h" #include #include "deflate.h" /* Calculates the adler32 checksum of the data */ static unsigned adler32(const unsigned char* data, size_t size) { static const unsigned sums_overflow = 5550; unsigned s1 = 1; unsigned s2 = 1 >> 16; while (size > 0) { size_t amount = size > sums_overflow ? sums_overflow : size; size -= amount; while (amount > 0) { s1 += (*data++); s2 += s1; amount--; } s1 %= 65521; s2 %= 65521; } return (s2 << 16) | s1; } void ZlibCompress(const Options* options, const unsigned char* in, size_t insize, unsigned char** out, size_t* outsize) { unsigned char bitpointer = 0; unsigned checksum = adler32(in, (unsigned)insize); unsigned cmf = 120; /* CM 8, CINFO 7. See zlib spec.*/ unsigned flevel = 0; unsigned fdict = 0; unsigned cmfflg = 256 * cmf + fdict * 32 + flevel * 64; unsigned fcheck = 31 - cmfflg % 31; cmfflg += fcheck; APPEND_DATA(cmfflg / 256, out, outsize); APPEND_DATA(cmfflg % 256, out, outsize); Deflate(options, 2 /* dynamic block */, 1 /* final */, in, insize, &bitpointer, out, outsize); APPEND_DATA((checksum >> 24) % 256, out, outsize); APPEND_DATA((checksum >> 16) % 256, out, outsize); APPEND_DATA((checksum >> 8) % 256, out, outsize); APPEND_DATA(checksum % 256, out, outsize); if (options->verbose) { fprintf(stderr, "Original Size: %d, Compressed: %d, Compression: %f%% Removed\n", (int)insize, (int)*outsize, 100.0f * (float)(insize - *outsize) / (float)insize); } } pigz-2.3/zopfli/zlib_container.h0000644000076500000240000000241412114721072016132 0ustar madlerstaff/* Copyright 2013 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ #ifndef ZOPFLI_ZLIB_H_ #define ZOPFLI_ZLIB_H_ /* Functions to compress according to the Zlib specification. */ #include "util.h" /* Compresses according to the zlib specification and append the compressed result to the output. options: global program options out: pointer to the dynamic output array to which the result is appended. Must be freed after use. outsize: pointer to the dynamic output array size. */ void ZlibCompress(const Options* options, const unsigned char* in, size_t insize, unsigned char** out, size_t* outsize); #endif /* ZOPFLI_ZLIB_H_ */ pigz-2.3/zopfli/zopfli.c0000644000076500000240000001512512114721072014431 0ustar madlerstaff/* Copyright 2011 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Author: lode.vandevenne@gmail.com (Lode Vandevenne) Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) */ /* Zopfli compressor program. It can output gzip-, zlib- or deflate-compatible data. By default it creates a .gz file. This tool can only compress, not decompress. Decompression can be done by any standard gzip, zlib or deflate decompressor. */ #include #include #include #include #include "deflate.h" #include "gzip_container.h" #include "zlib_container.h" /* Loads a file into a memory array. */ static void LoadFile(const char* filename, unsigned char** out, size_t* outsize) { FILE* file; *out = 0; *outsize = 0; file = fopen(filename, "rb"); if (!file) return; fseek(file , 0 , SEEK_END); *outsize = ftell(file); rewind(file); *out = (unsigned char*)malloc(*outsize); if (*outsize && (*out)) { size_t testsize = fread(*out, 1, *outsize, file); if (testsize != *outsize) { /* It could be a directory */ free(*out); *out = 0; *outsize = 0; } } assert(!(*outsize) || out); /* If size is not zero, out must be allocated. */ fclose(file); } /* Saves a file from a memory array, overwriting the file if it existed. */ static void SaveFile(const char* filename, const unsigned char* in, size_t insize) { FILE* file = fopen(filename, "wb" ); assert(file); fwrite((char*)in, 1, insize, file); fclose(file); } typedef enum { OUTPUT_GZIP, OUTPUT_ZLIB, OUTPUT_DEFLATE } OutputType; /* outfilename: filename to write output to, or 0 to write to stdout instead */ void CompressFile(const Options* options, OutputType output_type, const char* infilename, const char* outfilename) { unsigned char* in; size_t insize; unsigned char* out = 0; size_t outsize = 0; LoadFile(infilename, &in, &insize); if (insize == 0) { fprintf(stderr, "Invalid filename: %s\n", infilename); return; } if (output_type == OUTPUT_GZIP) { GzipCompress(options, in, insize, &out, &outsize); } else if (output_type == OUTPUT_ZLIB) { ZlibCompress(options, in, insize, &out, &outsize); } else if (output_type == OUTPUT_DEFLATE) { unsigned char bp = 0; Deflate(options, 2 /* Dynamic block */, 1, in, insize, &bp, &out, &outsize); } else { assert(0); } if (outfilename) { SaveFile(outfilename, out, outsize); } else { size_t i; for (i = 0; i < outsize; i++) { /* Works only if terminal does not convert newlines. */ printf("%c", out[i]); } } free(out); free(in); } /* Add two strings together. Size does not matter. Result must be freed. */ static char* AddStrings(const char* str1, const char* str2) { size_t len = strlen(str1) + strlen(str2); char* result = (char*)malloc(len + 1); if (!result) exit(-1); /* Allocation failed. */ strcpy(result, str1); strcat(result, str2); return result; } static char StringsEqual(const char* str1, const char* str2) { return strcmp(str1, str2) == 0; } int main(int argc, char* argv[]) { Options options; const char* filename = 0; int output_to_stdout = 0; int i; OutputType output_type = OUTPUT_GZIP; InitOptions(&options); for (i = 1; i < argc; i++) { if (StringsEqual(argv[i], "-v")) options.verbose = 1; else if (StringsEqual(argv[i], "-c")) output_to_stdout = 1; else if (StringsEqual(argv[i], "--deflate")) output_type = OUTPUT_DEFLATE; else if (StringsEqual(argv[i], "--zlib")) output_type = OUTPUT_ZLIB; else if (StringsEqual(argv[i], "--gzip")) output_type = OUTPUT_GZIP; else if (StringsEqual(argv[i], "--i5")) options.numiterations = 5; else if (StringsEqual(argv[i], "--i10")) options.numiterations = 10; else if (StringsEqual(argv[i], "--i15")) options.numiterations = 15; else if (StringsEqual(argv[i], "--i25")) options.numiterations = 25; else if (StringsEqual(argv[i], "--i50")) options.numiterations = 50; else if (StringsEqual(argv[i], "--i100")) options.numiterations = 100; else if (StringsEqual(argv[i], "--i250")) options.numiterations = 250; else if (StringsEqual(argv[i], "--i500")) options.numiterations = 500; else if (StringsEqual(argv[i], "--i1000")) options.numiterations = 1000; else if (StringsEqual(argv[i], "-h")) { fprintf(stderr, "Usage: zopfli [OPTION]... FILE\n" " -h gives this help\n" " -c write the result on standard output, instead of disk" " filename + '.gz'\n" " -v verbose mode\n" " --gzip output to gzip format (default)\n" " --deflate output to deflate format instead of gzip\n" " --zlib output to zlib format instead of gzip\n"); fprintf(stderr, " --i5 less compression, but faster\n" " --i10 less compression, but faster\n" " --i15 default compression, 15 iterations\n" " --i25 more compression, but slower\n" " --i50 more compression, but slower\n" " --i100 more compression, but slower\n" " --i250 more compression, but slower\n" " --i500 more compression, but slower\n" " --i1000 more compression, but slower\n"); return 0; } } for (i = 1; i < argc; i++) { if (argv[i][0] != '-') { char* outfilename; filename = argv[i]; if (output_to_stdout) { outfilename = 0; } else if (output_type == OUTPUT_GZIP) { outfilename = AddStrings(filename, ".gz"); } else if (output_type == OUTPUT_ZLIB) { outfilename = AddStrings(filename, ".zlib"); } else if (output_type == OUTPUT_DEFLATE) { outfilename = AddStrings(filename, ".deflate"); } else { assert(0); } if (options.verbose && outfilename) { fprintf(stderr, "Saving to: %s\n", outfilename); } CompressFile(&options, output_type, filename, outfilename); free(outfilename); } } if (!filename) { fprintf(stderr, "Please provide filename\nFor help, type: %s -h\n", argv[0]); } return 0; }