pixz-1.0.2/000755 000765 000024 00000000000 12073170466 012617 5ustar00vasistaff000000 000000 pixz-1.0.2/common.c000644 000765 000024 00000036040 12073170437 014254 0ustar00vasistaff000000 000000 #include "pixz.h" #include #include #pragma mark UTILS FILE *gInFile = NULL; lzma_stream gStream = LZMA_STREAM_INIT; void die(const char *fmt, ...) { va_list args; va_start(args, fmt); vfprintf(stderr, fmt, args); fprintf(stderr, "\n"); fflush(stderr); va_end(args); exit(1); } char *xstrdup(const char *s) { if (!s) return NULL; size_t len = strlen(s); char *r = malloc(len + 1); if (!r) return NULL; return memcpy(r, s, len + 1); } bool is_multi_header(const char *name) { size_t i = strlen(name); while (i != 0 && name[i - 1] != '/') --i; return strncmp(name + i, "._", 2) == 0; } #pragma mark INDEX lzma_index *gIndex = NULL; file_index_t *gFileIndex = NULL, *gLastFile = NULL; static uint8_t *gFileIndexBuf = NULL; static size_t gFIBSize = CHUNKSIZE, gFIBPos = 0; static lzma_ret gFIBErr = LZMA_OK; static uint8_t gFIBInputBuf[CHUNKSIZE]; static size_t gMoved = 0; static void *decode_file_index_start(off_t block_seek, lzma_check check); static lzma_vli find_file_index(void **bdatap); static char *read_file_index_name(void); static void read_file_index_make_space(void); static void read_file_index_data(void); void dump_file_index(FILE *out, bool verbose) { for (file_index_t *f = gFileIndex; f != NULL; f = f->next) { if (verbose) { fprintf(out, "%10"PRIuMAX" %s\n", (uintmax_t)f->offset, f->name ? f->name : ""); } else { if (f->name) fprintf(out, "%s\n", f->name); } } } void free_file_index(void) { for (file_index_t *f = gFileIndex; f != NULL; ) { file_index_t *next = f->next; free(f->name); free(f); f = next; } gFileIndex = gLastFile = NULL; } typedef struct { lzma_block block; lzma_filter filters[LZMA_FILTERS_MAX + 1]; } block_wrapper_t; static void *decode_file_index_start(off_t block_seek, lzma_check check) { if (fseeko(gInFile, block_seek, SEEK_SET) == -1) die("Error seeking to block"); // Some memory in which to keep the discovered filters safe block_wrapper_t *bw = malloc(sizeof(block_wrapper_t)); bw->block = (lzma_block){ .check = check, .filters = bw->filters, .version = 0 }; int b = fgetc(gInFile); if (b == EOF || b == 0) die("Error reading block size"); bw->block.header_size = lzma_block_header_size_decode(b); uint8_t hdrbuf[bw->block.header_size]; hdrbuf[0] = (uint8_t)b; if (fread(hdrbuf + 1, bw->block.header_size - 1, 1, gInFile) != 1) die("Error reading block header"); if (lzma_block_header_decode(&bw->block, NULL, hdrbuf) != LZMA_OK) die("Error decoding file index block header"); if (lzma_block_decoder(&gStream, &bw->block) != LZMA_OK) die("Error initializing file index stream"); return bw; } static lzma_vli find_file_index(void **bdatap) { if (!gIndex) decode_index(); // find the last block lzma_index_iter iter; lzma_index_iter_init(&iter, gIndex); lzma_vli loc = lzma_index_uncompressed_size(gIndex) - 1; if (lzma_index_iter_locate(&iter, loc)) die("Can't locate file index block"); if (iter.stream.number != 1) return 0; // Too many streams for one file index void *bdata = decode_file_index_start(iter.block.compressed_file_offset, iter.stream.flags->check); gFileIndexBuf = malloc(gFIBSize); gStream.avail_out = gFIBSize; gStream.avail_in = 0; // Check if this is really an index read_file_index_data(); lzma_vli ret = iter.block.compressed_file_offset; if (xle64dec(gFileIndexBuf + gFIBPos) != PIXZ_INDEX_MAGIC) ret = 0; gFIBPos += sizeof(uint64_t); if (bdatap && ret) { *bdatap = bdata; } else { // Just looking, don't keep things around if (bdatap) *bdatap = NULL; free(bdata); free(gFileIndexBuf); gLastFile = gFileIndex = NULL; lzma_end(&gStream); } return ret; } lzma_vli read_file_index() { void *bdata = NULL; lzma_vli offset = find_file_index(&bdata); if (!offset) return 0; while (true) { char *name = read_file_index_name(); if (!name) break; file_index_t *f = malloc(sizeof(file_index_t)); f->name = strlen(name) ? xstrdup(name) : NULL; f->offset = xle64dec(gFileIndexBuf + gFIBPos); gFIBPos += sizeof(uint64_t); if (gLastFile) { gLastFile->next = f; } else { gFileIndex = f; } gLastFile = f; } free(gFileIndexBuf); lzma_end(&gStream); free(bdata); return offset; } static char *read_file_index_name(void) { while (true) { // find a nul that ends a name uint8_t *eos, *haystack = gFileIndexBuf + gFIBPos; ssize_t len = gFIBSize - gStream.avail_out - gFIBPos - sizeof(uint64_t); if (len > 0 && (eos = memchr(haystack, '\0', len))) { // found it gFIBPos += eos - haystack + 1; return (char*)haystack; } else if (gFIBErr == LZMA_STREAM_END) { // nothing left return NULL; } else { // need more data if (gStream.avail_out == 0) read_file_index_make_space(); read_file_index_data(); } } } static void read_file_index_make_space(void) { bool expand = (gFIBPos == 0); if (gFIBPos != 0) { // clear more space size_t move = gFIBSize - gStream.avail_out - gFIBPos; memmove(gFileIndexBuf, gFileIndexBuf + gFIBPos, move); gMoved += move; gStream.avail_out += gFIBPos; gFIBPos = 0; } // Try to reduce number of moves by expanding proactively if (expand || gMoved >= gFIBSize) { // malloc more space gStream.avail_out += gFIBSize; gFIBSize *= 2; gFileIndexBuf = realloc(gFileIndexBuf, gFIBSize); } } static void read_file_index_data(void) { gStream.next_out = gFileIndexBuf + gFIBSize - gStream.avail_out; while (gFIBErr != LZMA_STREAM_END && gStream.avail_out) { if (gStream.avail_in == 0) { // It's ok to read past the end of the block, we'll still // get LZMA_STREAM_END at the right place gStream.avail_in = fread(gFIBInputBuf, 1, CHUNKSIZE, gInFile); if (ferror(gInFile)) die("Error reading file index data"); gStream.next_in = gFIBInputBuf; } gFIBErr = lzma_code(&gStream, LZMA_RUN); if (gFIBErr != LZMA_OK && gFIBErr != LZMA_STREAM_END) die("Error decoding file index data"); } } #define BWCHUNK 512 typedef struct { uint8_t buf[BWCHUNK]; off_t pos; size_t size; } bw; static uint32_t *bw_read(bw *b) { size_t sz = sizeof(uint32_t); if (b->size < sz) { if (b->pos < sz) return NULL; // EOF b->size = (b->pos > BWCHUNK) ? BWCHUNK : b->pos; b->pos -= b->size; if (fseeko(gInFile, b->pos, SEEK_SET) == -1) return NULL; if (fread(b->buf, b->size, 1, gInFile) != 1) return NULL; } b->size -= sz; return &((uint32_t*)b->buf)[b->size / sz]; } static off_t stream_padding(bw *b, off_t pos) { b->pos = pos; b->size = 0; for (off_t pad = 0; true; pad += sizeof(uint32_t)) { uint32_t *i = bw_read(b); if (!i) die("Error reading stream padding"); if (*i != 0) { b->size += sizeof(uint32_t); return pad; } } } static void stream_footer(bw *b, lzma_stream_flags *flags) { uint8_t ftr[LZMA_STREAM_HEADER_SIZE]; for (int i = sizeof(ftr) / sizeof(uint32_t) - 1; i >= 0; --i) { uint32_t *p = bw_read(b); if (!p) die("Error reading stream footer"); *((uint32_t*)ftr + i) = *p; } if (lzma_stream_footer_decode(flags, ftr) != LZMA_OK) die("Error decoding stream footer"); } static lzma_index *next_index(off_t *pos) { bw b; off_t pad = stream_padding(&b, *pos); off_t eos = *pos - pad; lzma_stream_flags flags; stream_footer(&b, &flags); *pos = eos - LZMA_STREAM_HEADER_SIZE - flags.backward_size; if (fseeko(gInFile, *pos, SEEK_SET) == -1) die("Error seeking to index"); lzma_stream strm = LZMA_STREAM_INIT; lzma_index *index; if (lzma_index_decoder(&strm, &index, MEMLIMIT) != LZMA_OK) die("Error creating index decoder"); uint8_t ibuf[CHUNKSIZE]; strm.avail_in = 0; lzma_ret err = LZMA_OK; while (err != LZMA_STREAM_END) { if (strm.avail_in == 0) { strm.avail_in = fread(ibuf, 1, CHUNKSIZE, gInFile); if (ferror(gInFile)) die("Error reading index"); strm.next_in = ibuf; } err = lzma_code(&strm, LZMA_RUN); if (err != LZMA_OK && err != LZMA_STREAM_END) die("Error decoding index"); } *pos = eos - lzma_index_stream_size(index); if (fseeko(gInFile, *pos, SEEK_SET) == -1) die("Error seeking to beginning of stream"); if (lzma_index_stream_flags(index, &flags) != LZMA_OK) die("Error setting stream flags"); if (lzma_index_stream_padding(index, pad) != LZMA_OK) die("Error setting stream padding"); return index; } bool decode_index(void) { if (fseeko(gInFile, 0, SEEK_END) == -1) return false; // not seekable off_t pos = ftello(gInFile); gIndex = NULL; while (pos > 0) { lzma_index *index = next_index(&pos); if (gIndex && lzma_index_cat(index, gIndex, NULL) != LZMA_OK) die("Error concatenating indices"); gIndex = index; } return (gIndex != NULL); } #pragma mark QUEUE queue_t *queue_new(queue_free_t freer) { queue_t *q = malloc(sizeof(queue_t)); q->first = q->last = NULL; q->freer = freer; pthread_mutex_init(&q->mutex, NULL); pthread_cond_init(&q->pop_cond, NULL); return q; } void queue_free(queue_t *q) { for (queue_item_t *i = q->first; i; ) { queue_item_t *tmp = i->next; if (q->freer) q->freer(i->type, i->data); free(i); i = tmp; } pthread_mutex_destroy(&q->mutex); pthread_cond_destroy(&q->pop_cond); free(q); } void queue_push(queue_t *q, int type, void *data) { pthread_mutex_lock(&q->mutex); queue_item_t *i = malloc(sizeof(queue_item_t)); i->type = type; i->data = data; i->next = NULL; if (q->last) { q->last->next = i; } else { q->first = i; } q->last = i; pthread_cond_signal(&q->pop_cond); pthread_mutex_unlock(&q->mutex); } int queue_pop(queue_t *q, void **datap) { pthread_mutex_lock(&q->mutex); while (!q->first) pthread_cond_wait(&q->pop_cond, &q->mutex); queue_item_t *i = q->first; q->first = i->next; if (!q->first) q->last = NULL; *datap = i->data; int type = i->type; free(i); pthread_mutex_unlock(&q->mutex); return type; } #pragma mark PIPELINE queue_t *gPipelineStartQ = NULL, *gPipelineSplitQ = NULL, *gPipelineMergeQ = NULL; size_t gPipelineProcessMax = 0; size_t gPipelineQSize = 0; pipeline_data_free_t gPLFreer = NULL; pipeline_split_t gPLSplit = NULL; pipeline_process_t gPLProcess = NULL; size_t gPLProcessCount = 0; pthread_t *gPLProcessThreads = NULL; pthread_t gPLSplitThread; ssize_t gPLSplitSeq = 0; ssize_t gPLMergeSeq = 0; pipeline_item_t *gPLMergedItems = NULL; static void pipeline_qfree(int type, void *p); static void *pipeline_thread_split(void *); static void *pipeline_thread_process(void *arg); void pipeline_create( pipeline_data_create_t create, pipeline_data_free_t destroy, pipeline_split_t split, pipeline_process_t process) { gPLFreer = destroy; gPLSplit = split; gPLProcess = process; gPipelineStartQ = queue_new(pipeline_qfree); gPipelineSplitQ = queue_new(pipeline_qfree); gPipelineMergeQ = queue_new(pipeline_qfree); gPLSplitSeq = 0; gPLMergeSeq = 0; gPLMergedItems = NULL; gPLProcessCount = num_threads(); if (gPipelineProcessMax > 0 && gPipelineProcessMax < gPLProcessCount) gPLProcessCount = gPipelineProcessMax; gPLProcessThreads = malloc(gPLProcessCount * sizeof(pthread_t)); int qsize = gPipelineQSize ? gPipelineQSize : ceil(gPLProcessCount * 1.3 + 1); if (qsize < gPLProcessCount) { fprintf(stderr, "Warning: queue size is less than thread count, " "performance will suffer!\n"); } for (size_t i = 0; i < qsize; ++i) { // create blocks, including a margin of error pipeline_item_t *item = malloc(sizeof(pipeline_item_t)); item->data = create(); // seq and next are garbage queue_push(gPipelineStartQ, PIPELINE_ITEM, item); } for (size_t i = 0; i < gPLProcessCount; ++i) { if (pthread_create(&gPLProcessThreads[i], NULL, &pipeline_thread_process, (void*)(uintptr_t)i)) die("Error creating encode thread"); } if (pthread_create(&gPLSplitThread, NULL, &pipeline_thread_split, NULL)) die("Error creating read thread"); } static void pipeline_qfree(int type, void *p) { switch (type) { case PIPELINE_ITEM: { pipeline_item_t *item = (pipeline_item_t*)p; gPLFreer(item->data); free(item); break; } case PIPELINE_STOP: break; default: die("Unknown msg type %d", type); } } static void *pipeline_thread_split(void *ignore) { gPLSplit(); return NULL; } static void *pipeline_thread_process(void *arg) { size_t thnum = (uintptr_t)arg; gPLProcess(thnum); return NULL; } void pipeline_stop(void) { // ask the other threads to stop for (size_t i = 0; i < gPLProcessCount; ++i) queue_push(gPipelineSplitQ, PIPELINE_STOP, NULL); for (size_t i = 0; i < gPLProcessCount; ++i) { if (pthread_join(gPLProcessThreads[i], NULL)) die("Error joining processing thread"); } queue_push(gPipelineMergeQ, PIPELINE_STOP, NULL); } void pipeline_destroy(void) { if (pthread_join(gPLSplitThread, NULL)) die("Error joining splitter thread"); queue_free(gPipelineStartQ); queue_free(gPipelineSplitQ); queue_free(gPipelineMergeQ); free(gPLProcessThreads); } void pipeline_dispatch(pipeline_item_t *item, queue_t *q) { item->seq = gPLSplitSeq++; item->next = NULL; queue_push(q, PIPELINE_ITEM, item); } void pipeline_split(pipeline_item_t *item) { pipeline_dispatch(item, gPipelineSplitQ); } pipeline_item_t *pipeline_merged() { pipeline_item_t *item; while (!gPLMergedItems || gPLMergedItems->seq != gPLMergeSeq) { // We don't have the next item, wait for a new one pipeline_tag_t tag = queue_pop(gPipelineMergeQ, (void**)&item); if (tag == PIPELINE_STOP) return NULL; // Done processing items // Insert the item into the queue pipeline_item_t **prev = &gPLMergedItems; while (*prev && (*prev)->seq < item->seq) { prev = &(*prev)->next; } item->next = *prev; *prev = item; } // Got the next item item = gPLMergedItems; gPLMergedItems = item->next; ++gPLMergeSeq; return item; } pixz-1.0.2/cpu.c000644 000765 000024 00000000134 12073170437 013546 0ustar00vasistaff000000 000000 #include size_t num_threads(void) { return sysconf(_SC_NPROCESSORS_ONLN); } pixz-1.0.2/endian.c000644 000765 000024 00000001477 12073170437 014230 0ustar00vasistaff000000 000000 #ifdef __APPLE__ #include uint64_t xle64dec(const uint8_t *d) { return OSReadLittleInt64(d, 0); } void xle64enc(uint8_t *d, uint64_t n) { OSWriteLittleInt64(d, 0, n); } #elif defined(__linux__) || defined(__FreeBSD__) #include #ifdef __linux__ #include #else #include #endif uint64_t xle64dec(const uint8_t *d) { return le64toh(*(uint64_t*)d); } void xle64enc(uint8_t *d, uint64_t n) { *(uint64_t*)d = htole64(n); } #else // Platform independent #include uint64_t xle64dec(const uint8_t *d) { uint64_t r = 0; for (const uint8_t *p = d + sizeof(r) - 1; p >= d; --p) r = (r << 8) + *p; return r; } void xle64enc(uint8_t *d, uint64_t n) { for (uint8_t *p = d; p < d + sizeof(n); ++p) { *p = n & 0xff; n >>= 8; } } #endif pixz-1.0.2/LICENSE000644 000765 000024 00000002422 12073170437 013622 0ustar00vasistaff000000 000000 Copyright (c) 2009-2011 Dave Vasilevsky All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pixz-1.0.2/list.c000644 000765 000024 00000001204 12073170437 013731 0ustar00vasistaff000000 000000 #include "pixz.h" #pragma mark FUNCTION DEFINITIONS void pixz_list(bool tar) { if (!decode_index()) die("Can't list non-seekable input"); lzma_index_iter iter; lzma_index_iter_init(&iter, gIndex); if (tar && read_file_index()) { dump_file_index(stdout, false); free_file_index(); } else { while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { printf("%9"PRIuMAX" / %9"PRIuMAX"\n", (uintmax_t)iter.block.unpadded_size, (uintmax_t)iter.block.uncompressed_size); } } lzma_index_end(gIndex, NULL); lzma_end(&gStream); } pixz-1.0.2/Makefile000644 000765 000024 00000002157 12073170437 014262 0ustar00vasistaff000000 000000 VERSION = 1.0.2 DISTNAME = pixz-$(VERSION) TARBALL = $(DISTNAME).tgz ifneq ($(shell gcc -v 2>&1 | grep 'Apple Inc'),) APPLE=1 endif OPT = -g -O0 MYCFLAGS = $(patsubst %,-I%/include,$(LIBPREFIX)) $(OPT) -std=gnu99 \ -Wall -Wno-unknown-pragmas -DPIXZ_VERSION='"$(VERSION)"' MYLDFLAGS = $(patsubst %,-L%/lib,$(LIBPREFIX)) $(OPT) -Wall THREADS = -lpthread LIBADD = $(THREADS) -llzma -larchive CC = gcc COMPILE = $(CC) $(MYCFLAGS) $(CFLAGS) -c -o LD = $(CC) $(MYLDFLAGS) $(LDFLAGS) -o ifdef APPLE ifeq ($(CC),gcc) MYLDFLAGS += -search_paths_first endif endif PROGS = pixz MANPAGE = pixz.1 COMMON = common.o endian.o cpu.o read.o write.o list.o all: $(PROGS) %.o: %.c pixz.h $(COMPILE) $@ $< $(PROGS): %: %.o $(COMMON) $(LD) $@ $^ $(LIBADD) clean: rm -rf *.o $(PROGS) $(MANPAGE) $(TARBALL) dist $(MANPAGE): pixz.1.asciidoc a2x -a manversion=$(VERSION) -f manpage $< dist: rm -rf dist mkdir -p dist git archive --prefix=$(DISTNAME)/ --format=tar HEAD | tar -x -C dist $(TARBALL): $(MANPAGE) dist cp pixz.1 dist/$(DISTNAME)/ tar -czf $(TARBALL) -C dist $(DISTNAME) tarball: $(TARBALL) .PHONY: all clean tarball dist pixz-1.0.2/NEWS000644 000765 000024 00000000506 12073170437 013315 0ustar00vasistaff000000 000000 1.0.2 - Jan 8, 2013 * Fix a crashing bug when input is incompressible https://github.com/vasi/pixz/issues/10 1.0.1 - Dec 22, 2012 * Add a man page * Add tuning options -e, -q, -f 1.0 - Nov 21, 2012 * Support streaming decompression * Prevent accidental truncation * Don't spew binary output to a terminal pixz-1.0.2/pixz.1000644 000765 000024 00000010375 12073170466 013701 0ustar00vasistaff000000 000000 '\" t .\" Title: pixz .\" Author: [see the "AUTHOR" section] .\" Generator: DocBook XSL Stylesheets v1.78.0 .\" Date: 01/08/2013 .\" Manual: \ \& .\" Source: \ \& 1.0.2 .\" Language: English .\" .TH "PIXZ" "1" "01/08/2013" "\ \& 1\&.0\&.2" "\ \&" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" http://bugs.debian.org/507673 .\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "NAME" pixz \- parallel, indexed xz compressor .SH "SYNOPSIS" .sp \fBpixz\fR [\fIOPTIONS\fR] [\fIINPUT\fR [\fIOUTPUT\fR]] .SH "DESCRIPTION" .sp pixz compresses and decompresses files using multiple processors\&. If the input looks like a tar(1) archive, it also creates an index of all the files in the archive\&. This allows the extraction of only a small segment of the tarball, without needing to decompress the entire archive\&. .SH "OPTIONS" .sp By default, pixz uses standard input and output, unless \fIINPUT\fR and \fIOUTPUT\fR arguments are provided\&. If pixz is provided with input but no output, it will delete the input once it\(cqs done\&. .PP \fB\-d\fR .RS 4 Decompress, instead of compress\&. .RE .PP \fB\-t\fR .RS 4 Force non\-tarball mode\&. By default, pixz auto\-detects tar data, and if found enters tarball mode\&. When compressing in non\-tarball mode, no archive index will be created\&. When decompressing, fast extraction will not be available\&. .RE .PP \fB\-l\fR .RS 4 List the archive contents\&. In tarball mode, lists the files in the tarball\&. In non\-tarball mode, lists the blocks of compressed data\&. .RE .PP \fB\-x\fR \fIPATH\fR .RS 4 Extract certain members from an archive, quickly\&. All members whose path begins with \fIPATH\fR will be extracted\&. .RE .PP \fB\-i\fR \fIINPUT\fR .RS 4 Use \fIINPUT\fR as the input\&. .RE .PP \fB\-o\fR \fIOUTPUT\fR .RS 4 Use OUTPUT as the output\&. .RE .PP \fB\-#\fR .RS 4 Set compression level, from \-0 (lowest compression, fastest) to \-9 (highest compression, slowest)\&. .RE .PP \fB\-e\fR .RS 4 Use "extreme" compression, which is much slower and only yields a marginal decrease in size\&. .RE .PP \fB\-p\fR \fICPUS\fR .RS 4 Set the number of CPU cores to use\&. By default pixz will use the number of cores on the system\&. .RE .PP \fB\-f\fR \fIFRACTION\fR .RS 4 Set the size of each compression block, relative to the LZMA dictionary size (default is 2\&.0)\&. Higher values give better compression ratios, but use more memory and make random access less efficient\&. Values less than 1\&.0 aren\(cqt very efficient\&. .RE .PP \fB\-q\fR \fISIZE\fR .RS 4 Set the number of blocks to allocate for the compression queue (default is 1\&.3 * cores + 2, rounded up)\&. Higher values give better throughput, up to a point, but use more memory\&. Values less than the number of cores will make some cores sit idle\&. .RE .PP \fB\-h\fR .RS 4 Show pixz\(cqs online help\&. .RE .SH "EXAMPLES" .PP pixz < myfile > myfile\&.xz .RS 4 Compress a file with pixz\&. .RE .PP pixz myfile .RS 4 Compress to myfile\&.pxz, removing the original\&. .RE .PP tar \-Ipixz \-cf output\&.tpxz directory .RS 4 Make tar use pixz for compression\&. .RE .PP pixz \-x path/to/file < input\&.tpxz | tar x .RS 4 Extract one file from an archive, quickly\&. .RE .SH "AUTHOR" .sp pixz is written by Dave Vasilevsky\&. .SH "RESOURCES" .sp The pixz homepage: http://github\&.com/vasi/pixz/ .sp Source downloads: http://sourceforge\&.net/projects/pixz/files/ .SH "COPYRIGHT" .sp Copyright \(co 2009\-2010 Dave Vasilevsky\&. Use of this software is granted under the FreeBSD License\&. .SH "SEE ALSO" .sp xz(1), tar(1) pixz-1.0.2/pixz.1.asciidoc000644 000765 000024 00000005442 12073170437 015453 0ustar00vasistaff000000 000000 PIXZ(1) ======= :doctype: manpage NAME ---- pixz - parallel, indexed xz compressor SYNOPSIS -------- *pixz* ['OPTIONS'] ['INPUT' ['OUTPUT']] DESCRIPTION ----------- pixz compresses and decompresses files using multiple processors. If the input looks like a tar(1) archive, it also creates an index of all the files in the archive. This allows the extraction of only a small segment of the tarball, without needing to decompress the entire archive. OPTIONS ------- By default, pixz uses standard input and output, unless 'INPUT' and 'OUTPUT' arguments are provided. If pixz is provided with input but no output, it will delete the input once it's done. *-d*:: Decompress, instead of compress. *-t*:: Force non-tarball mode. By default, pixz auto-detects tar data, and if found enters tarball mode. When compressing in non-tarball mode, no archive index will be created. When decompressing, fast extraction will not be available. *-l*:: List the archive contents. In tarball mode, lists the files in the tarball. In non-tarball mode, lists the blocks of compressed data. *-x* 'PATH':: Extract certain members from an archive, quickly. All members whose path begins with 'PATH' will be extracted. *-i* 'INPUT':: Use 'INPUT' as the input. *-o* 'OUTPUT':: Use OUTPUT as the output. *-#*:: Set compression level, from -0 (lowest compression, fastest) to -9 (highest compression, slowest). *-e*:: Use "extreme" compression, which is much slower and only yields a marginal decrease in size. *-p* 'CPUS':: Set the number of CPU cores to use. By default pixz will use the number of cores on the system. *-f* 'FRACTION':: Set the size of each compression block, relative to the LZMA dictionary size (default is 2.0). Higher values give better compression ratios, but use more memory and make random access less efficient. Values less than 1.0 aren't very efficient. *-q* 'SIZE':: Set the number of blocks to allocate for the compression queue (default is 1.3 * cores + 2, rounded up). Higher values give better throughput, up to a point, but use more memory. Values less than the number of cores will make some cores sit idle. *-h*:: Show pixz's online help. EXAMPLES -------- `pixz < myfile > myfile.xz`:: Compress a file with pixz. `pixz myfile`:: Compress to myfile.pxz, removing the original. `tar -Ipixz -cf output.tpxz directory`:: Make tar use pixz for compression. `pixz -x path/to/file < input.tpxz | tar x`:: Extract one file from an archive, quickly. AUTHOR ------ pixz is written by Dave Vasilevsky. RESOURCES --------- The pixz homepage: Source downloads: COPYRIGHT --------- Copyright (C) 2009-2010 Dave Vasilevsky. Use of this software is granted under the FreeBSD License. SEE ALSO -------- xz(1), tar(1) pixz-1.0.2/pixz.c000644 000765 000024 00000012143 12073170437 013754 0ustar00vasistaff000000 000000 #include "pixz.h" #include #include typedef enum { OP_WRITE, OP_READ, OP_EXTRACT, OP_LIST } pixz_op_t; static bool strsuf(char *big, char *small); static char *subsuf(char *in, char *suf1, char *suf2); static char *auto_output(pixz_op_t op, char *in); static void usage(const char *msg) { if (msg) fprintf(stderr, "%s\n\n", msg); fprintf(stderr, "pixz: Parallel Indexing XZ compression, fully compatible with XZ\n" "\n" "Basic usage:\n" " pixz input output.pxz # Compress a file in parallel\n" " pixz -d input.pxz output # Decompress\n" "\n" "Tarballs:\n" " pixz input.tar output.tpxz # Compress and index a tarball\n" " pixz -d input.tpxz output.tar # Decompress\n" " pixz -l input.tpxz # List tarball contents very fast\n" " pixz -x path/to/file < input.tpxz | tar x # Extract one file very fast\n" " tar -Ipixz -cf output.tpxz dir # Make tar use pixz automatically\n" "\n" "Input and output:\n" " pixz < input > output.pxz # Same as `pixz input output.pxz`\n" " pixz -i input -o output.pxz # Ditto\n" " pixz [-d] input # Automatically choose output filename\n" "\n" "Other flags:\n" " -0, -1 ... -9 Set compression level, from fastest to strongest\n" " -p NUM Use a maximum of NUM CPU-intensive threads\n" " -t Don't assume input is in tar format\n" " -h Print this help\n" "\n" "pixz %s\n" "(C) 2009-2012 Dave Vasilevsky \n" "https://github.com/vasi/pixz\n" "You may use this software under the FreeBSD License\n", PIXZ_VERSION); exit(2); } int main(int argc, char **argv) { uint32_t level = LZMA_PRESET_DEFAULT; bool tar = true; pixz_op_t op = OP_WRITE; char *ipath = NULL, *opath = NULL; int ch; char *optend; long optint; double optdbl; while ((ch = getopt(argc, argv, "dxli:o:tvhp:0123456789f:q:e")) != -1) { switch (ch) { case 'd': op = OP_READ; break; case 'x': op = OP_EXTRACT; break; case 'l': op = OP_LIST; break; case 'i': ipath = optarg; break; case 'o': opath = optarg; break; case 't': tar = false; break; case 'h': usage(NULL); break; case 'e': level |= LZMA_PRESET_EXTREME; break; case 'f': optdbl = strtod(optarg, &optend); if (*optend || optdbl <= 0) usage("Need a positive floating-point argument to -f"); gBlockFraction = optdbl; break; case 'p': optint = strtol(optarg, &optend, 10); if (optint < 0 || *optend) usage("Need a non-negative integer argument to -p"); gPipelineProcessMax = optint; break; case 'q': optint = strtol(optarg, &optend, 10); if (optint <= 0 || *optend) usage("Need a positive integer argument to -q"); gPipelineQSize = optint; break; default: if (ch >= '0' && ch <= '9') { level = ch - '0'; } else { usage(""); } } } argc -= optind; argv += optind; gInFile = stdin; gOutFile = stdout; bool iremove = false; if (op != OP_EXTRACT && argc >= 1) { if (argc > 2 || (op == OP_LIST && argc == 2)) usage("Too many arguments"); if (ipath) usage("Multiple input files specified"); ipath = argv[0]; if (argc == 2) { if (opath) usage("Multiple output files specified"); opath = argv[1]; } else if (op != OP_LIST) { iremove = true; opath = auto_output(op, argv[0]); if (!opath) usage("Unknown suffix"); } } if (ipath && !(gInFile = fopen(ipath, "r"))) die("Can't open input file"); if (opath && !(gOutFile = fopen(opath, "w"))) die("Can't open output file"); switch (op) { case OP_WRITE: if (isatty(fileno(gOutFile)) == 1) usage("Refusing to output to a TTY"); pixz_write(tar, level); break; case OP_READ: pixz_read(tar, 0, NULL); break; case OP_EXTRACT: pixz_read(tar, argc, argv); break; case OP_LIST: pixz_list(tar); } if (iremove) unlink(ipath); return 0; } #define SUF(_op, _s1, _s2) ({ \ if (op == OP_##_op) { \ char *r = subsuf(in, _s1, _s2); \ if (r) \ return r; \ } \ }) static char *auto_output(pixz_op_t op, char *in) { SUF(READ, ".tar.xz", ".tar"); SUF(READ, ".tpxz", ".tar"); SUF(READ, ".xz", ""); SUF(WRITE, ".tar", ".tpxz"); SUF(WRITE, "", ".xz"); return NULL; } static bool strsuf(char *big, char *small) { size_t bl = strlen(big), sl = strlen(small); return strcmp(big + bl - sl, small) == 0; } static char *subsuf(char *in, char *suf1, char *suf2) { if (!strsuf(in, suf1)) return NULL; size_t li = strlen(in), l1 = strlen(suf1), l2 = strlen(suf2); char *r = malloc(li + l2 - l1 + 1); memcpy(r, in, li - l1); strcpy(r + li - l1, suf2); return r; } pixz-1.0.2/pixz.h000644 000765 000024 00000005316 12073170437 013765 0ustar00vasistaff000000 000000 #include #define __USE_LARGEFILE 1 #include #include #include #include #include #include #pragma mark DEFINES #define PIXZ_INDEX_MAGIC 0xDBAE14D62E324CA6LL #define CHECK LZMA_CHECK_CRC32 #define MEMLIMIT (64ULL * 1024 * 1024 * 1024) // crazy high #define CHUNKSIZE 4096 #ifndef DEBUG #define DEBUG 0 #endif #if DEBUG #define debug(str, ...) fprintf(stderr, str "\n", ##__VA_ARGS__) #else #define debug(...) #endif #pragma mark OPERATIONS void pixz_list(bool tar); void pixz_write(bool tar, uint32_t level); void pixz_read(bool verify, size_t nspecs, char **specs); #pragma mark UTILS FILE *gInFile, *gOutFile; lzma_stream gStream; extern lzma_index *gIndex; void die(const char *fmt, ...); char *xstrdup(const char *s); uint64_t xle64dec(const uint8_t *d); void xle64enc(uint8_t *d, uint64_t n); size_t num_threads(void); extern double gBlockFraction; #pragma mark INDEX typedef struct file_index_t file_index_t; struct file_index_t { char *name; off_t offset; file_index_t *next; }; extern file_index_t *gFileIndex, *gLastFile; bool is_multi_header(const char *name); bool decode_index(void); // true on success lzma_vli read_file_index(void); void dump_file_index(FILE *out, bool verbose); void free_file_index(void); #pragma mark QUEUE typedef struct queue_item_t queue_item_t; struct queue_item_t { int type; void *data; queue_item_t *next; }; typedef void (*queue_free_t)(int type, void *p); typedef struct { queue_item_t *first; queue_item_t *last; pthread_mutex_t mutex; pthread_cond_t pop_cond; queue_free_t freer; } queue_t; queue_t *queue_new(queue_free_t freer); void queue_free(queue_t *q); void queue_push(queue_t *q, int type, void *data); int queue_pop(queue_t *q, void **datap); #pragma mark PIPELINE extern size_t gPipelineQSize; extern size_t gPipelineProcessMax; extern queue_t *gPipelineStartQ, *gPipelineSplitQ, *gPipelineMergeQ; typedef enum { PIPELINE_ITEM, PIPELINE_STOP } pipeline_tag_t; typedef struct pipeline_item_t pipeline_item_t; struct pipeline_item_t { size_t seq; pipeline_item_t *next; void *data; }; typedef void* (*pipeline_data_create_t)(void); typedef void (*pipeline_data_free_t)(void*); typedef void (*pipeline_split_t)(void); typedef void (*pipeline_process_t)(size_t); void pipeline_create( pipeline_data_create_t create, pipeline_data_free_t destroy, pipeline_split_t split, pipeline_process_t process); void pipeline_stop(void); void pipeline_destroy(void); void pipeline_dispatch(pipeline_item_t *item, queue_t *q); void pipeline_split(pipeline_item_t *item); pipeline_item_t *pipeline_merged(); pixz-1.0.2/read.c000644 000765 000024 00000045537 12073170437 013712 0ustar00vasistaff000000 000000 #include "pixz.h" #include #include #pragma mark DECLARE WANTED typedef struct wanted_t wanted_t; struct wanted_t { wanted_t *next; char *name; off_t start, end; size_t size; }; static wanted_t *gWantedFiles = NULL; static bool spec_match(char *spec, char *name); static void wanted_files(size_t count, char **specs); static void wanted_free(wanted_t *w); #pragma mark DECLARE PIPELINE typedef enum { BLOCK_SIZED, BLOCK_UNSIZED, BLOCK_CONTINUATION } block_type; typedef struct { uint8_t *input, *output; size_t incap, outcap; size_t insize, outsize; off_t uoffset; // uncompressed offset lzma_check check; block_type btype; } io_block_t; static void *block_create(void); static void block_free(void *data); static void read_thread(void); static void read_thread_noindex(void); static void decode_thread(size_t thnum); #pragma mark DECLARE ARCHIVE static pipeline_item_t *gArItem = NULL, *gArLastItem = NULL; static off_t gArLastOffset; static size_t gArLastSize; static wanted_t *gArWanted = NULL; static bool gArNextItem = false; static bool gExplicitFiles = false; static int tar_ok(struct archive *ar, void *ref); static ssize_t tar_read(struct archive *ar, void *ref, const void **bufp); static bool tar_next_block(void); static void tar_write_last(void); #pragma mark DECLARE READ BUFFER #define STREAMSIZE (1024 * 1024) #define MAXSPLITSIZE ((64 * 1024 * 1024) * 2) // xz -9 blocksize * 2 static pipeline_item_t *gRbufPI = NULL; static io_block_t *gRbuf = NULL; static void block_capacity(io_block_t *ib, size_t incap, size_t outcap); typedef enum { RBUF_ERR, RBUF_EOF, RBUF_PART, RBUF_FULL } rbuf_read_status; static rbuf_read_status rbuf_read(size_t bytes); static bool rbuf_cycle(lzma_stream *stream, bool start, size_t skip); static void rbuf_consume(size_t bytes); static void rbuf_dispatch(void); static bool read_header(lzma_check *check); static bool read_block(bool force_stream, lzma_check check); static void read_streaming(lzma_block *block); static void read_index(void); static void read_footer(void); #pragma mark DECLARE UTILS static lzma_vli gFileIndexOffset = 0; static bool taste_tar(io_block_t *ib); static bool taste_file_index(io_block_t *ib); #pragma mark MAIN void pixz_read(bool verify, size_t nspecs, char **specs) { if (decode_index()) { if (verify) gFileIndexOffset = read_file_index(); wanted_files(nspecs, specs); gExplicitFiles = nspecs; } #if DEBUG for (wanted_t *w = gWantedFiles; w; w = w->next) debug("want: %s", w->name); #endif pipeline_create(block_create, block_free, gIndex ? read_thread : read_thread_noindex, decode_thread); if (verify && gFileIndexOffset) { gArWanted = gWantedFiles; wanted_t *w = gWantedFiles, *wlast = NULL; bool lastmulti = false; off_t lastoff = 0; struct archive *ar = archive_read_new(); archive_read_support_compression_none(ar); archive_read_support_format_tar(ar); archive_read_open(ar, NULL, tar_ok, tar_read, tar_ok); struct archive_entry *entry; while (true) { int aerr = archive_read_next_header(ar, &entry); if (aerr == ARCHIVE_EOF) { break; } else if (aerr != ARCHIVE_OK && aerr != ARCHIVE_WARN) { fprintf(stderr, "%s\n", archive_error_string(ar)); die("Error reading archive entry"); } off_t off = archive_read_header_position(ar); const char *path = archive_entry_pathname(entry); if (!lastmulti) { if (wlast && wlast->size != off - lastoff) die("Index and archive show differing sizes for %s: %d vs %d", wlast->name, wlast->size, off - lastoff); lastoff = off; } lastmulti = is_multi_header(path); if (lastmulti) continue; if (!w) die("File %s missing in index", path); if (strcmp(path, w->name) != 0) die("Index and archive differ as to next file: %s vs %s", w->name, path); wlast = w; w = w->next; } archive_read_finish(ar); if (w && w->name) die("File %s missing in archive", w->name); tar_write_last(); // write whatever's left } if (!gExplicitFiles) { /* Heuristics for detecting pixz file index: * - Input must be streaming (otherwise read_thread does this) * - Data must look tar-like * - Must have all sized blocks, followed by unsized file index */ bool start = !gIndex && verify, tar = false, all_sized = true, skipping = false; pipeline_item_t *pi; while ((pi = pipeline_merged())) { io_block_t *ib = (io_block_t*)(pi->data); if (skipping && ib->btype != BLOCK_CONTINUATION) { fprintf(stderr, "Warning: File index heuristic failed, use -t flag.\n"); skipping = false; } if (!skipping && tar && !start && all_sized && ib->btype == BLOCK_UNSIZED && taste_file_index(ib)) skipping = true; if (start) { tar = taste_tar(ib); start = false; } if (ib->btype != BLOCK_SIZED) all_sized = false; if (!skipping) fwrite(ib->output, ib->outsize, 1, gOutFile); queue_push(gPipelineStartQ, PIPELINE_ITEM, pi); } } pipeline_destroy(); wanted_free(gWantedFiles); } #pragma mark BLOCKS static void *block_create(void) { io_block_t *ib = malloc(sizeof(io_block_t)); ib->incap = ib->outcap = 0; ib->input = ib->output = NULL; return ib; } static void block_free(void* data) { io_block_t *ib = (io_block_t*)data; free(ib->input); free(ib->output); free(ib); } #pragma mark SETUP static void wanted_free(wanted_t *w) { for (wanted_t *w = gWantedFiles; w; ) { wanted_t *tmp = w->next; free(w); w = tmp; } } static bool spec_match(char *spec, char *name) { bool match = true; for (; *spec; ++spec, ++name) { if (!*name || *spec != *name) { // spec must be equal or prefix match = false; break; } } // If spec's a prefix of the file name, it must be a dir name return match && (!*name || *name == '/'); } static void wanted_files(size_t count, char **specs) { if (!gFileIndexOffset) { if (count) die("Can't filter non-tarball"); gWantedFiles = NULL; return; } // Remove trailing slashes from specs for (char **spec = specs; spec < specs + count; ++spec) { char *c = *spec; while (*c++) ; // forward to end while (--c >= *spec && *c == '/') *c = '\0'; } bool matched[count]; // for each spec, does it match? memset(matched, 0, sizeof(matched)); wanted_t *last = NULL; // Check each file in order, to see if we want it for (file_index_t *f = gFileIndex; f->name; f = f->next) { bool match = !count; for (char **spec = specs; spec < specs + count; ++spec) { if (spec_match(*spec, f->name)) { match = true; matched[spec - specs] = true; break; } } if (match) { wanted_t *w = malloc(sizeof(wanted_t)); *w = (wanted_t){ .name = f->name, .start = f->offset, .end = f->next->offset, .next = NULL }; w->size = w->end - w->start; if (last) { last->next = w; } else { gWantedFiles = w; } last = w; } } // Make sure each spec matched for (size_t i = 0; i < count; ++i) { if (!matched[i]) die("\"%s\" not found in archive", *(specs + i)); } } #pragma mark READ static void block_capacity(io_block_t *ib, size_t incap, size_t outcap) { if (incap > ib->incap) { ib->incap = incap; ib->input = realloc(ib->input, incap); } if (outcap > ib->outcap) { ib->outcap = outcap; ib->output = malloc(outcap); } } // Ensure at least this many bytes available // Return 1 on success, zero on EOF, -1 on error static rbuf_read_status rbuf_read(size_t bytes) { if (!gRbufPI) { queue_pop(gPipelineStartQ, (void**)&gRbufPI); gRbuf = (io_block_t*)(gRbufPI->data); gRbuf->insize = gRbuf->outsize = 0; } if (gRbuf->insize >= bytes) return RBUF_FULL; block_capacity(gRbuf, bytes, 0); size_t r = fread(gRbuf->input + gRbuf->insize, 1, bytes - gRbuf->insize, gInFile); gRbuf->insize += r; if (r) return (gRbuf->insize == bytes) ? RBUF_FULL : RBUF_PART; return feof(gInFile) ? RBUF_EOF : RBUF_ERR; } static bool rbuf_cycle(lzma_stream *stream, bool start, size_t skip) { if (!start) { rbuf_consume(gRbuf->insize); if (rbuf_read(CHUNKSIZE) < RBUF_PART) return false; } stream->next_in = gRbuf->input + skip; stream->avail_in = gRbuf->insize - skip; return true; } static void rbuf_consume(size_t bytes) { if (bytes < gRbuf->insize) memmove(gRbuf->input, gRbuf->input + bytes, gRbuf->insize - bytes); gRbuf->insize -= bytes; } static void rbuf_dispatch(void) { pipeline_split(gRbufPI); gRbufPI = NULL; gRbuf = NULL; } static bool read_header(lzma_check *check) { lzma_stream_flags stream_flags; rbuf_read_status st = rbuf_read(LZMA_STREAM_HEADER_SIZE); if (st == RBUF_EOF) return false; else if (st != RBUF_FULL) die("Error reading stream header"); lzma_ret err = lzma_stream_header_decode(&stream_flags, gRbuf->input); if (err == LZMA_FORMAT_ERROR) die("Not an XZ file"); else if (err != LZMA_OK) die("Error decoding XZ header"); *check = stream_flags.check; rbuf_consume(LZMA_STREAM_HEADER_SIZE); return true; } static bool read_block(bool force_stream, lzma_check check) { lzma_filter filters[LZMA_FILTERS_MAX + 1]; lzma_block block = { .filters = filters, .check = check, .version = 0 }; if (rbuf_read(1) != RBUF_FULL) die("Error reading block header size"); if (gRbuf->input[0] == 0) return false; block.header_size = lzma_block_header_size_decode(gRbuf->input[0]); if (block.header_size > LZMA_BLOCK_HEADER_SIZE_MAX) die("Block header size too large"); if (rbuf_read(block.header_size) != RBUF_FULL) die("Error reading block header"); if (lzma_block_header_decode(&block, NULL, gRbuf->input) != LZMA_OK) die("Error decoding block header"); size_t comp = block.compressed_size, outsize = block.uncompressed_size; if (force_stream || comp == LZMA_VLI_UNKNOWN || outsize == LZMA_VLI_UNKNOWN || outsize > MAXSPLITSIZE) { read_streaming(&block); } else { block_capacity(gRbuf, 0, outsize); gRbuf->outsize = outsize; gRbuf->check = check; gRbuf->btype = BLOCK_SIZED; if (rbuf_read(lzma_block_total_size(&block)) != RBUF_FULL) die("Error reading block contents"); rbuf_dispatch(); } return true; } static void read_streaming(lzma_block *block) { lzma_stream stream = LZMA_STREAM_INIT; if (lzma_block_decoder(&stream, block) != LZMA_OK) die("Error initializing streaming block decode"); rbuf_cycle(&stream, true, block->header_size); stream.avail_out = 0; bool first = true; pipeline_item_t *pi = NULL; io_block_t *ib = NULL; lzma_ret err = LZMA_OK; while (err != LZMA_STREAM_END) { if (err != LZMA_OK) die("Error decoding streaming block"); if (stream.avail_out == 0) { if (ib) { ib->outsize = ib->outcap; pipeline_dispatch(pi, gPipelineMergeQ); first = false; } queue_pop(gPipelineStartQ, (void**)&pi); ib = (io_block_t*)pi->data; ib->btype = (first ? BLOCK_UNSIZED : BLOCK_CONTINUATION); block_capacity(ib, 0, STREAMSIZE); stream.next_out = ib->output; stream.avail_out = ib->outcap; } if (stream.avail_in == 0 && !rbuf_cycle(&stream, false, 0)) die("Error reading streaming block"); err = lzma_code(&stream, LZMA_RUN); } if (ib && stream.avail_out != ib->outcap) { ib->outsize = ib->outcap - stream.avail_out; pipeline_dispatch(pi, gPipelineMergeQ); } rbuf_consume(gRbuf->insize - stream.avail_in); lzma_end(&stream); } static void read_index(void) { lzma_stream stream = LZMA_STREAM_INIT; lzma_index *index; if (lzma_index_decoder(&stream, &index, MEMLIMIT) != LZMA_OK) die("Error initializing index decoder"); rbuf_cycle(&stream, true, 0); lzma_ret err = LZMA_OK; while (err != LZMA_STREAM_END) { if (err != LZMA_OK) die("Error decoding index"); if (stream.avail_in == 0 && !rbuf_cycle(&stream, false, 0)) die("Error reading index"); err = lzma_code(&stream, LZMA_RUN); } rbuf_consume(gRbuf->insize - stream.avail_in); lzma_end(&stream); } static void read_footer(void) { lzma_stream_flags stream_flags; if (rbuf_read(LZMA_STREAM_HEADER_SIZE) != RBUF_FULL) die("Error reading stream footer"); if (lzma_stream_footer_decode(&stream_flags, gRbuf->input) != LZMA_OK) die("Error decoding XZ footer"); rbuf_consume(LZMA_STREAM_HEADER_SIZE); char zeros[4] = "\0\0\0\0"; while (true) { rbuf_read_status st = rbuf_read(4); if (st == RBUF_EOF) return; if (st != RBUF_FULL) die("Footer must be multiple of four bytes"); if (memcmp(zeros, gRbuf->input, 4) != 0) return; rbuf_consume(4); } } static void read_thread_noindex(void) { bool empty = true; lzma_check check = LZMA_CHECK_NONE; while (read_header(&check)) { empty = false; while (read_block(false, check)) ; // pass read_index(); read_footer(); } if (empty) die("Empty input"); pipeline_stop(); } static void read_thread(void) { off_t offset = ftello(gInFile); wanted_t *w = gWantedFiles; lzma_index_iter iter; lzma_index_iter_init(&iter, gIndex); while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { // Don't decode the file-index off_t boffset = iter.block.compressed_file_offset; size_t bsize = iter.block.total_size; if (gFileIndexOffset && boffset == gFileIndexOffset) continue; // Do we need this block? if (gWantedFiles && gExplicitFiles) { off_t uend = iter.block.uncompressed_file_offset + iter.block.uncompressed_size; if (!w || w->start >= uend) { debug("read: skip %llu", iter.block.number_in_file); continue; } for ( ; w && w->end < uend; w = w->next) ; } debug("read: want %llu", iter.block.number_in_file); // Get a block to work with pipeline_item_t *pi; queue_pop(gPipelineStartQ, (void**)&pi); io_block_t *ib = (io_block_t*)(pi->data); block_capacity(ib, iter.block.unpadded_size, iter.block.uncompressed_size); // Seek if needed, and get the data if (offset != boffset) { fseeko(gInFile, boffset, SEEK_SET); offset = boffset; } if (iter.block.uncompressed_size > MAXSPLITSIZE) { // must stream if (gRbuf) rbuf_consume(gRbuf->insize); // clear read_block(true, iter.stream.flags->check); } else { ib->insize = fread(ib->input, 1, bsize, gInFile); if (ib->insize < bsize) die("Error reading block contents"); offset += bsize; ib->uoffset = iter.block.uncompressed_file_offset; ib->check = iter.stream.flags->check; ib->btype = BLOCK_SIZED; // Indexed blocks always sized pipeline_split(pi); } } pipeline_stop(); } #pragma mark DECODE static void decode_thread(size_t thnum) { lzma_stream stream = LZMA_STREAM_INIT; lzma_filter filters[LZMA_FILTERS_MAX + 1]; lzma_block block = { .filters = filters, .check = LZMA_CHECK_NONE, .version = 0 }; pipeline_item_t *pi; io_block_t *ib; while (PIPELINE_STOP != queue_pop(gPipelineSplitQ, (void**)&pi)) { ib = (io_block_t*)(pi->data); block.header_size = lzma_block_header_size_decode(*(ib->input)); block.check = ib->check; if (lzma_block_header_decode(&block, NULL, ib->input) != LZMA_OK) die("Error decoding block header"); if (lzma_block_decoder(&stream, &block) != LZMA_OK) die("Error initializing block decode"); stream.avail_in = ib->insize - block.header_size; stream.next_in = ib->input + block.header_size; stream.avail_out = ib->outcap; stream.next_out = ib->output; lzma_ret err = LZMA_OK; while (err != LZMA_STREAM_END) { if (err != LZMA_OK) die("Error decoding block"); err = lzma_code(&stream, LZMA_FINISH); } ib->outsize = stream.next_out - ib->output; queue_push(gPipelineMergeQ, PIPELINE_ITEM, pi); } lzma_end(&stream); } #pragma mark ARCHIVE static int tar_ok(struct archive *ar, void *ref) { return ARCHIVE_OK; } static bool tar_next_block(void) { if (gArItem && !gArNextItem && gArWanted && gExplicitFiles) { io_block_t *ib = (io_block_t*)(gArItem->data); if (gArWanted->start < ib->uoffset + ib->outsize) return true; // No need } if (gArLastItem) queue_push(gPipelineStartQ, PIPELINE_ITEM, gArLastItem); gArLastItem = gArItem; gArItem = pipeline_merged(); gArNextItem = false; return gArItem; } static void tar_write_last(void) { if (gArItem) { io_block_t *ib = (io_block_t*)(gArItem->data); fwrite(ib->output + gArLastOffset, gArLastSize, 1, gOutFile); gArLastSize = 0; } } static ssize_t tar_read(struct archive *ar, void *ref, const void **bufp) { // If we got here, the last bit of archive is ok to write tar_write_last(); // Write the first wanted file if (!tar_next_block()) return 0; off_t off; size_t size; io_block_t *ib = (io_block_t*)(gArItem->data); if (gWantedFiles && gExplicitFiles) { debug("tar want: %s", gArWanted->name); off = gArWanted->start - ib->uoffset; size = gArWanted->size; if (off < 0) { size += off; off = 0; } if (off + size > ib->outsize) { size = ib->outsize - off; gArNextItem = true; // force the end of this block } else { gArWanted = gArWanted->next; } } else { off = 0; size = ib->outsize; } debug("tar off = %llu, size = %zu", (unsigned long long)off, size); gArLastOffset = off; gArLastSize = size; if (bufp) *bufp = ib->output + off; return size; } #pragma mark UTILS static bool taste_tar(io_block_t *ib) { struct archive *ar = archive_read_new(); archive_read_support_compression_none(ar); archive_read_support_format_tar(ar); archive_read_open_memory(ar, ib->output, ib->outsize); struct archive_entry *entry; bool ok = (archive_read_next_header(ar, &entry) == ARCHIVE_OK); archive_read_finish(ar); return ok; } static bool taste_file_index(io_block_t *ib) { return xle64dec(ib->output) == PIXZ_INDEX_MAGIC; } pixz-1.0.2/README000644 000765 000024 00000006103 12073170437 013475 0ustar00vasistaff000000 000000 Pixz (pronounced 'pixie') is a parallel, indexing version of XZ Repository: https://github.com/vasi/pixz Downloads: https://sourceforge.net/projects/pixz/files/ The existing XZ Utils ( http://tukaani.org/xz/ ) provide great compression in the .xz file format, but they have two significant problems: * They are single-threaded, while most users nowadays have multi-core computers. * The .xz files they produce are just one big block of compressed data, rather than a collection of smaller blocks. This makes random access to the original data impossible. With pixz, both these problems are solved. The most useful commands: $ pixz foo.tar foo.tpxz # Compress and index a tarball, multi-core $ pixz -l foo.tpxz # Very quickly list the contents of the compressed tarball $ pixz -d foo.tpxz foo.tar # Decompress it, multi-core $ pixz -x dir/file < foo.tpxz | tar x # Very quickly extract a file, multi-core. # Also verifies that contents match index. $ tar -Ipixz -cf foo.tpxz foo # Create a tarball using pixz for multi-core compression $ pixz bar bar.xz # Compress a non-tarball, multi-core $ pixz -d bar.xz bar # Decompress it, multi-core Specifying input and output: $ pixz < foo.tar > foo.tpxz # Same as 'pixz foo.tar foo.tpxz' $ pixz -i foo.tar -o foo.tpxz # Ditto. These both work for -x, -d and -l too, eg: $ pixz -x -i foo.tpxz -o foo.tar file1 file2 ... # Extract the files from foo.tpxz into foo.tar $ pixz foo.tar # Compress it to foo.tpxz, removing the original $ pixz -d foo.tpxz # Extract it to foo.tar, removing the original Other flags: $ pixz -1 foo.tar # Faster, worse compression $ pixz -9 foo.tar # Better, slower compression $ pixz -p 2 foo.tar # Cap the number of threads at 2 $ pixz -t foo.tar # Compress but don't treat it as a tarball (don't index it) $ pixz -d -t foo.tpxz # Decompress foo, don't check that contents match index $ pixz -l -t foo.tpxz # List the xz blocks instead of files For even more tuning flags, check the manual page. Compare to: plzip * About equally complex, efficient * lzip format seems less-used * Version 1 is theoretically indexable...I think ChopZip * Python, much simpler * More flexible, supports arbitrary compression programs * Uses streams instead of blocks, not indexable * Splits input and then combines output, much higher disk usage pxz * Simpler code * Uses OpenMP instead of pthreads * Uses streams instead of blocks, not indexable * Uses temp files and doesn't combine them until the whole file is compressed, high disk/memory usage Comparable tools for other compression algorithms: pbzip2 * Not indexable * Appears slow * bzip2 algorithm is non-ideal pigz * Not indexable dictzip, idzip * Not parallel Requirements: * libarchive 2.8 or later * liblzma 4.999.9-beta-212 or later (from the xz distribution) pixz-1.0.2/test.sh000755 000765 000024 00000000614 12073170437 014134 0ustar00vasistaff000000 000000 #!/bin/bash tarball=$1 sample=$2 echo XZ time xz -c < "$tarball" > test.txz time xz -cd < test.txz | tar xO "$sample" | md5sum echo; echo; echo PIXZ time ./pixz < "$tarball" > test.tpxz time ./pixz -x "$sample" < test.tpxz | tar xO "$sample" | md5sum echo; echo; echo CROSS xz -cd < test.tpxz | tar xO "$sample" | md5sum echo; echo du -sh "$tarball" test.tpxz test.txz rm test.tpxz test.txz pixz-1.0.2/TODO000644 000765 000024 00000001577 12073170437 013317 0ustar00vasistaff000000 000000 See also: https://github.com/vasi/pixz/issues CLEANUP * massive cleanup * error handling * signal handling * globals * autoconf * optimized settings * memory limit * cpu number * block size, for max threads on small files BUGS * performance lags under IO? * slow input -> CPUs idle while waiting for input * safe extraction * sanity checks, from spec: - CRCs are already tested, i think? - backward size should match file - reserved flags must be zero - header vs footer flags - uncompressed size field vs actual uncompressed size - index vs actual blocks EFFICIENCY * more efficient indexing: ranges? sorted? mtree? * circular buffer > linked list? DOCUMENTATION * man pages * command help FEATURES * support multiple streams * tarball append without decompression * other archive formats: cpio? * lzma-like API * recovery tool (already is, kinda) pixz-1.0.2/write.c000644 000765 000024 00000036702 12073170437 014123 0ustar00vasistaff000000 000000 #include "pixz.h" #include #include #pragma mark TYPES typedef struct io_block_t io_block_t; struct io_block_t { lzma_block block; uint8_t *input, *output; size_t insize, outsize; }; #pragma mark GLOBALS #define LZMA_CHUNK_MAX (1 << 16) double gBlockFraction = 2.0; static bool gTar = true; static size_t gBlockInSize = 0, gBlockOutSize = 0; static off_t gMultiHeaderStart = 0; static bool gMultiHeader = false; static off_t gTotalRead = 0; static pipeline_item_t *gReadItem = NULL; static io_block_t *gReadBlock = NULL; static size_t gReadItemCount = 0; static lzma_filter gFilters[LZMA_FILTERS_MAX + 1]; static uint8_t gFileIndexBuf[CHUNKSIZE]; static size_t gFileIndexBufPos = 0; #pragma mark FUNCTION DECLARATIONS static void read_thread(); static void encode_thread(size_t thnum); static void encode_uncompressible(io_block_t *ib); static size_t size_uncompressible(size_t insize); static void *block_create(); static void block_free(void *data); typedef enum { BLOCK_IN = 1, BLOCK_OUT = 2, BLOCK_ALL = BLOCK_IN | BLOCK_OUT, } block_parts; static void block_alloc(io_block_t *ib, block_parts parts); static void block_dealloc(io_block_t *ib, block_parts parts); static void add_file(off_t offset, const char *name); static archive_read_callback tar_read; static archive_open_callback tar_ok; static archive_close_callback tar_ok; static void block_init(lzma_block *block, size_t insize); static void stream_edge(lzma_vli backward_size); static void write_block(pipeline_item_t *pi); static void encode_index(void); static void write_file_index(void); static void write_file_index_bytes(size_t size, uint8_t *buf); static void write_file_index_buf(lzma_action action); #pragma mark FUNCTION DEFINITIONS void pixz_write(bool tar, uint32_t level) { gTar = tar; // xz options lzma_options_lzma lzma_opts; if (lzma_lzma_preset(&lzma_opts, level)) die("Error setting lzma options"); gFilters[0] = (lzma_filter){ .id = LZMA_FILTER_LZMA2, .options = &lzma_opts }; gFilters[1] = (lzma_filter){ .id = LZMA_VLI_UNKNOWN, .options = NULL }; gBlockInSize = lzma_opts.dict_size * gBlockFraction; if (gBlockInSize <= 0) die("Block size must be positive"); gBlockOutSize = lzma_block_buffer_bound(gBlockInSize); pipeline_create(block_create, block_free, read_thread, encode_thread); debug("writer: start"); // pre-block setup: header, index if (!(gIndex = lzma_index_init(NULL))) die("Error creating index"); stream_edge(LZMA_VLI_UNKNOWN); // write blocks while (true) { pipeline_item_t *pi = pipeline_merged(); if (!pi) break; debug("writer: received %zu", pi->seq); write_block(pi); queue_push(gPipelineStartQ, PIPELINE_ITEM, pi); } // file index if (gTar) write_file_index(); free_file_index(); // post-block cleanup: index, footer encode_index(); stream_edge(lzma_index_size(gIndex)); lzma_index_end(gIndex, NULL); fclose(gOutFile); debug("writer: cleaning up reader"); pipeline_destroy(); debug("exit"); } #pragma mark READING static void read_thread() { debug("reader: start"); if (gTar) { struct archive *ar = archive_read_new(); archive_read_support_compression_none(ar); archive_read_support_format_tar(ar); archive_read_support_format_raw(ar); archive_read_open(ar, NULL, tar_ok, tar_read, tar_ok); struct archive_entry *entry; while (true) { int aerr = archive_read_next_header(ar, &entry); if (aerr == ARCHIVE_EOF) { break; } else if (aerr != ARCHIVE_OK && aerr != ARCHIVE_WARN) { // Some charset translations warn spuriously fprintf(stderr, "%s\n", archive_error_string(ar)); die("Error reading archive entry"); } if (archive_format(ar) == ARCHIVE_FORMAT_RAW) { gTar = false; break; } add_file(archive_read_header_position(ar), archive_entry_pathname(entry)); } if (archive_read_header_position(ar) == 0) gTar = false; // probably spuriously identified as tar archive_read_finish(ar); } if (!feof(gInFile)) { const void *dummy; while (tar_read(NULL, NULL, &dummy) != 0) ; // just keep pumping } fclose(gInFile); if (gTar) add_file(gTotalRead, NULL); // write last block, if necessary if (gReadItem) { // if this block had only one read, and it was EOF, it's waste debug("reader: handling last block %zu", gReadItemCount); if (gReadBlock->insize) pipeline_split(gReadItem); else queue_push(gPipelineStartQ, PIPELINE_ITEM, gReadItem); gReadItem = NULL; } // stop the other threads debug("reader: cleaning up encoders"); pipeline_stop(); debug("reader: end"); } static ssize_t tar_read(struct archive *ar, void *ref, const void **bufp) { if (!gReadItem) { queue_pop(gPipelineStartQ, (void**)&gReadItem); gReadBlock = (io_block_t*)(gReadItem->data); block_alloc(gReadBlock, BLOCK_IN); gReadBlock->insize = 0; debug("reader: reading %zu", gReadItemCount); } size_t space = gBlockInSize - gReadBlock->insize; if (space > CHUNKSIZE) space = CHUNKSIZE; uint8_t *buf = gReadBlock->input + gReadBlock->insize; size_t rd = fread(buf, 1, space, gInFile); if (ferror(gInFile)) die("Error reading input file"); gReadBlock->insize += rd; gTotalRead += rd; *bufp = buf; if (gReadBlock->insize == gBlockInSize) { debug("reader: sending %zu", gReadItemCount); pipeline_split(gReadItem); ++gReadItemCount; gReadItem = NULL; } return rd; } static int tar_ok(struct archive *ar, void *ref) { return ARCHIVE_OK; } static void add_file(off_t offset, const char *name) { if (name && is_multi_header(name)) { if (!gMultiHeader) gMultiHeaderStart = offset; gMultiHeader = true; return; } file_index_t *f = malloc(sizeof(file_index_t)); f->offset = gMultiHeader ? gMultiHeaderStart : offset; gMultiHeader = false; f->name = name ? xstrdup(name) : NULL; f->next = NULL; if (gLastFile) { gLastFile->next = f; } else { // new index gFileIndex = f; } gLastFile = f; } static void block_free(void *data) { io_block_t *ib = (io_block_t*)data; free(ib->input); free(ib->output); free(ib); } static void *block_create() { io_block_t *ib = malloc(sizeof(io_block_t)); ib->input = ib->output = NULL; return ib; } static void block_alloc(io_block_t *ib, block_parts parts) { if ((parts & BLOCK_IN) && !ib->input) ib->input = malloc(gBlockInSize); if ((parts & BLOCK_IN) && !ib->output) ib->output = malloc(gBlockOutSize); if (!ib->input || !ib->output) die("Can't allocate blocks"); } static void block_dealloc(io_block_t *ib, block_parts parts) { if (parts & BLOCK_IN) { free(ib->input); ib->input = NULL; } if (parts & BLOCK_OUT) { free(ib->output); ib->output = NULL; } } #pragma mark ENCODING static size_t size_uncompressible(size_t insize) { size_t chunks = insize / LZMA_CHUNK_MAX; if (insize % LZMA_CHUNK_MAX) ++chunks; // Per chunk (control code + 2-byte size), one byte for EOF size_t data_size = insize + chunks * 3 + 1; if (data_size % 4) data_size += 4 - data_size % 4; // Padding return data_size; } static void encode_uncompressible(io_block_t *ib) { // See http://en.wikipedia.org/wiki/Lzma#LZMA2_format const uint8_t control_uncomp = 1; const uint8_t control_end = 0; uint8_t *output_start = ib->output + ib->block.header_size; uint8_t *output = output_start; uint8_t *input = ib->input; size_t remain = ib->insize; while (remain) { size_t size = remain; if (size > LZMA_CHUNK_MAX) size = LZMA_CHUNK_MAX; // control byte for uncompressed block *output++ = control_uncomp; // 16-bit big endian (size - 1) uint16_t size_write = size - 1; *output++ = (size_write >> 8); *output++ = (size_write & 0xFF); // actual chunk data memcpy(output, input, size); remain -= size; output += size; input += size; } // control byte for end of block *output++ = control_end; ib->block.compressed_size = output - output_start; ib->block.uncompressed_size = ib->insize; // padding while ((output - output_start) % 4) *output++ = 0; // checksum (little endian) if (ib->block.check != LZMA_CHECK_CRC32) die("pixz only supports CRC-32 checksums"); uint32_t check = lzma_crc32(ib->input, ib->insize, 0); *output++ = check & 0xFF; *output++ = (check >> 8) & 0xFF; *output++ = (check >> 16) & 0xFF; *output++ = (check >> 24); } static void encode_thread(size_t thnum) { lzma_stream stream = LZMA_STREAM_INIT; while (true) { pipeline_item_t *pi; int msg = queue_pop(gPipelineSplitQ, (void**)&pi); if (msg == PIPELINE_STOP) break; debug("encoder %zu: received %zu", thnum, pi->seq); io_block_t *ib = (io_block_t*)(pi->data); block_alloc(ib, BLOCK_OUT); block_init(&ib->block, ib->insize); size_t header_size = ib->block.header_size; size_t uncompressible_size = size_uncompressible(ib->insize) + lzma_check_size(ib->block.check); if (lzma_block_encoder(&stream, &ib->block) != LZMA_OK) die("Error creating block encoder"); stream.next_in = ib->input; stream.avail_in = ib->insize; stream.next_out = ib->output + header_size; stream.avail_out = uncompressible_size; ib->block.uncompressed_size = LZMA_VLI_UNKNOWN; // for encoder to change lzma_ret err = LZMA_OK; while (err == LZMA_OK) { err = lzma_code(&stream, LZMA_FINISH); } if (err == LZMA_BUF_ERROR) { debug("encoder: uncompressible %zu", pi->seq); encode_uncompressible(ib); ib->outsize = header_size + uncompressible_size; } else if (err == LZMA_STREAM_END) { ib->outsize = stream.next_out - ib->output; } else { die("Error encoding block"); } block_dealloc(ib, BLOCK_IN); if (lzma_block_header_encode(&ib->block, ib->output) != LZMA_OK) die("Error encoding block header"); debug("encoder %zu: sending %zu", thnum, pi->seq); queue_push(gPipelineMergeQ, PIPELINE_ITEM, pi); } lzma_end(&stream); } #pragma mark WRITING static void block_init(lzma_block *block, size_t insize) { block->version = 0; block->check = CHECK; block->filters = gFilters; block->uncompressed_size = insize ? insize : LZMA_VLI_UNKNOWN; block->compressed_size = insize ? gBlockOutSize : LZMA_VLI_UNKNOWN; if (lzma_block_header_size(block) != LZMA_OK) die("Error getting block header size"); } static void stream_edge(lzma_vli backward_size) { lzma_stream_flags flags = { .version = 0, .check = CHECK, .backward_size = backward_size }; uint8_t buf[LZMA_STREAM_HEADER_SIZE]; lzma_ret (*encoder)(const lzma_stream_flags *flags, uint8_t *buf); encoder = backward_size == LZMA_VLI_UNKNOWN ? &lzma_stream_header_encode : &lzma_stream_footer_encode; if ((*encoder)(&flags, buf) != LZMA_OK) die("Error encoding stream edge"); if (fwrite(buf, LZMA_STREAM_HEADER_SIZE, 1, gOutFile) != 1) die("Error writing stream edge"); } static void write_block(pipeline_item_t *pi) { debug("writer: writing %zu", pi->seq); io_block_t *ib = (io_block_t*)(pi->data); // Does it make sense to chunk this? size_t written = 0; while (ib->outsize > written) { size_t size = ib->outsize - written; if (size > CHUNKSIZE) size = CHUNKSIZE; if (fwrite(ib->output + written, size, 1, gOutFile) != 1) die("Error writing block data"); written += size; } if (lzma_index_append(gIndex, NULL, lzma_block_unpadded_size(&ib->block), ib->block.uncompressed_size) != LZMA_OK) die("Error adding to index"); block_dealloc(ib, BLOCK_ALL); debug("writer: writing %zu complete", pi->seq); } static void encode_index(void) { if (lzma_index_encoder(&gStream, gIndex) != LZMA_OK) die("Error creating index encoder"); uint8_t obuf[CHUNKSIZE]; lzma_ret err = LZMA_OK; while (err != LZMA_STREAM_END) { gStream.next_out = obuf; gStream.avail_out = CHUNKSIZE; err = lzma_code(&gStream, LZMA_RUN); if (err != LZMA_OK && err != LZMA_STREAM_END) die("Error encoding index"); if (gStream.avail_out != CHUNKSIZE) { if (fwrite(obuf, CHUNKSIZE - gStream.avail_out, 1, gOutFile) != 1) die("Error writing index data"); } } lzma_end(&gStream); } static void write_file_index(void) { lzma_block block; block_init(&block, 0); uint8_t hdrbuf[block.header_size]; if (lzma_block_header_encode(&block, hdrbuf) != LZMA_OK) die("Error encoding file index header"); if (fwrite(hdrbuf, block.header_size, 1, gOutFile) != 1) die("Error writing file index header"); if (lzma_block_encoder(&gStream, &block) != LZMA_OK) die("Error creating file index encoder"); uint8_t offbuf[sizeof(uint64_t)]; xle64enc(offbuf, PIXZ_INDEX_MAGIC); write_file_index_bytes(sizeof(offbuf), offbuf); for (file_index_t *f = gFileIndex; f != NULL; f = f->next) { char *name = f->name ? f->name : ""; size_t len = strlen(name); write_file_index_bytes(len + 1, (uint8_t*)name); xle64enc(offbuf, f->offset); write_file_index_bytes(sizeof(offbuf), offbuf); } write_file_index_buf(LZMA_FINISH); if (lzma_index_append(gIndex, NULL, lzma_block_unpadded_size(&block), block.uncompressed_size) != LZMA_OK) die("Error adding file-index to index"); lzma_end(&gStream); } static void write_file_index_bytes(size_t size, uint8_t *buf) { size_t bufpos = 0; while (bufpos < size) { size_t len = size - bufpos; size_t space = CHUNKSIZE - gFileIndexBufPos; if (len > space) len = space; memcpy(gFileIndexBuf + gFileIndexBufPos, buf + bufpos, len); gFileIndexBufPos += len; bufpos += len; if (gFileIndexBufPos == CHUNKSIZE) { write_file_index_buf(LZMA_RUN); gFileIndexBufPos = 0; } } } static void write_file_index_buf(lzma_action action) { uint8_t obuf[CHUNKSIZE]; gStream.avail_in = gFileIndexBufPos; gStream.next_in = gFileIndexBuf; lzma_ret err = LZMA_OK; while (err != LZMA_STREAM_END && (action == LZMA_FINISH || gStream.avail_in)) { gStream.avail_out = CHUNKSIZE; gStream.next_out = obuf; err = lzma_code(&gStream, action); if (err != LZMA_OK && err != LZMA_STREAM_END) die("Error encoding file index"); if (gStream.avail_out != CHUNKSIZE) { if (fwrite(obuf, CHUNKSIZE - gStream.avail_out, 1, gOutFile) != 1) die("Error writing file index"); } } gFileIndexBufPos = 0; }