pax_global_header00006660000000000000000000000064125563136240014521gustar00rootroot0000000000000052 comment=723cc1e459b8eea2dea4583200fd60757d40097a golang-github-golang-snappy-0.0+git20150730.723cc1e/000077500000000000000000000000001255631362400213335ustar00rootroot00000000000000golang-github-golang-snappy-0.0+git20150730.723cc1e/AUTHORS000066400000000000000000000006741255631362400224120ustar00rootroot00000000000000# This is the official list of Snappy-Go authors for copyright purposes. # This file is distinct from the CONTRIBUTORS files. # See the latter for an explanation. # Names should be added to this file as # Name or Organization # The email address is not required for organizations. # Please keep the list sorted. Damian Gryski Google Inc. Jan Mercl <0xjnml@gmail.com> Sebastien Binet golang-github-golang-snappy-0.0+git20150730.723cc1e/CONTRIBUTORS000066400000000000000000000025461255631362400232220ustar00rootroot00000000000000# This is the official list of people who can contribute # (and typically have contributed) code to the Snappy-Go repository. # The AUTHORS file lists the copyright holders; this file # lists people. For example, Google employees are listed here # but not in AUTHORS, because Google holds the copyright. # # The submission process automatically checks to make sure # that people submitting code are listed in this file (by email address). # # Names should be added to this file only after verifying that # the individual or the individual's organization has agreed to # the appropriate Contributor License Agreement, found here: # # http://code.google.com/legal/individual-cla-v1.0.html # http://code.google.com/legal/corporate-cla-v1.0.html # # The agreement for individuals can be filled out on the web. # # When adding J Random Contributor's name to this file, # either J's name or J's organization's name should be # added to the AUTHORS file, depending on whether the # individual or corporate CLA was used. # Names should be added to this file like so: # Name # Please keep the list sorted. Damian Gryski Jan Mercl <0xjnml@gmail.com> Kai Backman Marc-Antoine Ruel Nigel Tao Rob Pike Russ Cox Sebastien Binet golang-github-golang-snappy-0.0+git20150730.723cc1e/LICENSE000066400000000000000000000027161255631362400223460ustar00rootroot00000000000000Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. golang-github-golang-snappy-0.0+git20150730.723cc1e/README000066400000000000000000000004011255631362400222060ustar00rootroot00000000000000The Snappy compression format in the Go programming language. To download and install from source: $ go get github.com/golang/snappy Unless otherwise noted, the Snappy-Go source files are distributed under the BSD-style license found in the LICENSE file. golang-github-golang-snappy-0.0+git20150730.723cc1e/decode.go000066400000000000000000000156141255631362400231140ustar00rootroot00000000000000// Copyright 2011 The Snappy-Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package snappy import ( "encoding/binary" "errors" "io" ) var ( // ErrCorrupt reports that the input is invalid. ErrCorrupt = errors.New("snappy: corrupt input") // ErrTooLarge reports that the uncompressed length is too large. ErrTooLarge = errors.New("snappy: decoded block is too large") // ErrUnsupported reports that the input isn't supported. ErrUnsupported = errors.New("snappy: unsupported input") ) // DecodedLen returns the length of the decoded block. func DecodedLen(src []byte) (int, error) { v, _, err := decodedLen(src) return v, err } // decodedLen returns the length of the decoded block and the number of bytes // that the length header occupied. func decodedLen(src []byte) (blockLen, headerLen int, err error) { v, n := binary.Uvarint(src) if n <= 0 || v > 0xffffffff { return 0, 0, ErrCorrupt } const wordSize = 32 << (^uint(0) >> 32 & 1) if wordSize == 32 && v > 0x7fffffff { return 0, 0, ErrTooLarge } return int(v), n, nil } // Decode returns the decoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire decoded block. // Otherwise, a newly allocated slice will be returned. // It is valid to pass a nil dst. func Decode(dst, src []byte) ([]byte, error) { dLen, s, err := decodedLen(src) if err != nil { return nil, err } if len(dst) < dLen { dst = make([]byte, dLen) } var d, offset, length int for s < len(src) { switch src[s] & 0x03 { case tagLiteral: x := uint(src[s] >> 2) switch { case x < 60: s++ case x == 60: s += 2 if s > len(src) { return nil, ErrCorrupt } x = uint(src[s-1]) case x == 61: s += 3 if s > len(src) { return nil, ErrCorrupt } x = uint(src[s-2]) | uint(src[s-1])<<8 case x == 62: s += 4 if s > len(src) { return nil, ErrCorrupt } x = uint(src[s-3]) | uint(src[s-2])<<8 | uint(src[s-1])<<16 case x == 63: s += 5 if s > len(src) { return nil, ErrCorrupt } x = uint(src[s-4]) | uint(src[s-3])<<8 | uint(src[s-2])<<16 | uint(src[s-1])<<24 } length = int(x + 1) if length <= 0 { return nil, errors.New("snappy: unsupported literal length") } if length > len(dst)-d || length > len(src)-s { return nil, ErrCorrupt } copy(dst[d:], src[s:s+length]) d += length s += length continue case tagCopy1: s += 2 if s > len(src) { return nil, ErrCorrupt } length = 4 + int(src[s-2])>>2&0x7 offset = int(src[s-2])&0xe0<<3 | int(src[s-1]) case tagCopy2: s += 3 if s > len(src) { return nil, ErrCorrupt } length = 1 + int(src[s-3])>>2 offset = int(src[s-2]) | int(src[s-1])<<8 case tagCopy4: return nil, errors.New("snappy: unsupported COPY_4 tag") } end := d + length if offset > d || end > len(dst) { return nil, ErrCorrupt } for ; d < end; d++ { dst[d] = dst[d-offset] } } if d != dLen { return nil, ErrCorrupt } return dst[:d], nil } // NewReader returns a new Reader that decompresses from r, using the framing // format described at // https://github.com/google/snappy/blob/master/framing_format.txt func NewReader(r io.Reader) *Reader { return &Reader{ r: r, decoded: make([]byte, maxUncompressedChunkLen), buf: make([]byte, MaxEncodedLen(maxUncompressedChunkLen)+checksumSize), } } // Reader is an io.Reader than can read Snappy-compressed bytes. type Reader struct { r io.Reader err error decoded []byte buf []byte // decoded[i:j] contains decoded bytes that have not yet been passed on. i, j int readHeader bool } // Reset discards any buffered data, resets all state, and switches the Snappy // reader to read from r. This permits reusing a Reader rather than allocating // a new one. func (r *Reader) Reset(reader io.Reader) { r.r = reader r.err = nil r.i = 0 r.j = 0 r.readHeader = false } func (r *Reader) readFull(p []byte) (ok bool) { if _, r.err = io.ReadFull(r.r, p); r.err != nil { if r.err == io.ErrUnexpectedEOF { r.err = ErrCorrupt } return false } return true } // Read satisfies the io.Reader interface. func (r *Reader) Read(p []byte) (int, error) { if r.err != nil { return 0, r.err } for { if r.i < r.j { n := copy(p, r.decoded[r.i:r.j]) r.i += n return n, nil } if !r.readFull(r.buf[:4]) { return 0, r.err } chunkType := r.buf[0] if !r.readHeader { if chunkType != chunkTypeStreamIdentifier { r.err = ErrCorrupt return 0, r.err } r.readHeader = true } chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 if chunkLen > len(r.buf) { r.err = ErrUnsupported return 0, r.err } // The chunk types are specified at // https://github.com/google/snappy/blob/master/framing_format.txt switch chunkType { case chunkTypeCompressedData: // Section 4.2. Compressed data (chunk type 0x00). if chunkLen < checksumSize { r.err = ErrCorrupt return 0, r.err } buf := r.buf[:chunkLen] if !r.readFull(buf) { return 0, r.err } checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 buf = buf[checksumSize:] n, err := DecodedLen(buf) if err != nil { r.err = err return 0, r.err } if n > len(r.decoded) { r.err = ErrCorrupt return 0, r.err } if _, err := Decode(r.decoded, buf); err != nil { r.err = err return 0, r.err } if crc(r.decoded[:n]) != checksum { r.err = ErrCorrupt return 0, r.err } r.i, r.j = 0, n continue case chunkTypeUncompressedData: // Section 4.3. Uncompressed data (chunk type 0x01). if chunkLen < checksumSize { r.err = ErrCorrupt return 0, r.err } buf := r.buf[:checksumSize] if !r.readFull(buf) { return 0, r.err } checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 // Read directly into r.decoded instead of via r.buf. n := chunkLen - checksumSize if !r.readFull(r.decoded[:n]) { return 0, r.err } if crc(r.decoded[:n]) != checksum { r.err = ErrCorrupt return 0, r.err } r.i, r.j = 0, n continue case chunkTypeStreamIdentifier: // Section 4.1. Stream identifier (chunk type 0xff). if chunkLen != len(magicBody) { r.err = ErrCorrupt return 0, r.err } if !r.readFull(r.buf[:len(magicBody)]) { return 0, r.err } for i := 0; i < len(magicBody); i++ { if r.buf[i] != magicBody[i] { r.err = ErrCorrupt return 0, r.err } } continue } if chunkType <= 0x7f { // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). r.err = ErrUnsupported return 0, r.err } // Section 4.4 Padding (chunk type 0xfe). // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). if !r.readFull(r.buf[:chunkLen]) { return 0, r.err } } } golang-github-golang-snappy-0.0+git20150730.723cc1e/encode.go000066400000000000000000000160311255631362400231200ustar00rootroot00000000000000// Copyright 2011 The Snappy-Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package snappy import ( "encoding/binary" "io" ) // We limit how far copy back-references can go, the same as the C++ code. const maxOffset = 1 << 15 // emitLiteral writes a literal chunk and returns the number of bytes written. func emitLiteral(dst, lit []byte) int { i, n := 0, uint(len(lit)-1) switch { case n < 60: dst[0] = uint8(n)<<2 | tagLiteral i = 1 case n < 1<<8: dst[0] = 60<<2 | tagLiteral dst[1] = uint8(n) i = 2 case n < 1<<16: dst[0] = 61<<2 | tagLiteral dst[1] = uint8(n) dst[2] = uint8(n >> 8) i = 3 case n < 1<<24: dst[0] = 62<<2 | tagLiteral dst[1] = uint8(n) dst[2] = uint8(n >> 8) dst[3] = uint8(n >> 16) i = 4 case int64(n) < 1<<32: dst[0] = 63<<2 | tagLiteral dst[1] = uint8(n) dst[2] = uint8(n >> 8) dst[3] = uint8(n >> 16) dst[4] = uint8(n >> 24) i = 5 default: panic("snappy: source buffer is too long") } if copy(dst[i:], lit) != len(lit) { panic("snappy: destination buffer is too short") } return i + len(lit) } // emitCopy writes a copy chunk and returns the number of bytes written. func emitCopy(dst []byte, offset, length int) int { i := 0 for length > 0 { x := length - 4 if 0 <= x && x < 1<<3 && offset < 1<<11 { dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1 dst[i+1] = uint8(offset) i += 2 break } x = length if x > 1<<6 { x = 1 << 6 } dst[i+0] = uint8(x-1)<<2 | tagCopy2 dst[i+1] = uint8(offset) dst[i+2] = uint8(offset >> 8) i += 3 length -= x } return i } // Encode returns the encoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire encoded block. // Otherwise, a newly allocated slice will be returned. // It is valid to pass a nil dst. func Encode(dst, src []byte) []byte { if n := MaxEncodedLen(len(src)); len(dst) < n { dst = make([]byte, n) } // The block starts with the varint-encoded length of the decompressed bytes. d := binary.PutUvarint(dst, uint64(len(src))) // Return early if src is short. if len(src) <= 4 { if len(src) != 0 { d += emitLiteral(dst[d:], src) } return dst[:d] } // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. const maxTableSize = 1 << 14 shift, tableSize := uint(32-8), 1<<8 for tableSize < maxTableSize && tableSize < len(src) { shift-- tableSize *= 2 } var table [maxTableSize]int // Iterate over the source bytes. var ( s int // The iterator position. t int // The last position with the same hash as s. lit int // The start position of any pending literal bytes. ) for s+3 < len(src) { // Update the hash table. b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3] h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24 p := &table[(h*0x1e35a7bd)>>shift] // We need to to store values in [-1, inf) in table. To save // some initialization time, (re)use the table's zero value // and shift the values against this zero: add 1 on writes, // subtract 1 on reads. t, *p = *p-1, s+1 // If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte. if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] { s++ continue } // Otherwise, we have a match. First, emit any pending literal bytes. if lit != s { d += emitLiteral(dst[d:], src[lit:s]) } // Extend the match to be as long as possible. s0 := s s, t = s+4, t+4 for s < len(src) && src[s] == src[t] { s++ t++ } // Emit the copied bytes. d += emitCopy(dst[d:], s-t, s-s0) lit = s } // Emit any final pending literal bytes and return. if lit != len(src) { d += emitLiteral(dst[d:], src[lit:]) } return dst[:d] } // MaxEncodedLen returns the maximum length of a snappy block, given its // uncompressed length. func MaxEncodedLen(srcLen int) int { // Compressed data can be defined as: // compressed := item* literal* // item := literal* copy // // The trailing literal sequence has a space blowup of at most 62/60 // since a literal of length 60 needs one tag byte + one extra byte // for length information. // // Item blowup is trickier to measure. Suppose the "copy" op copies // 4 bytes of data. Because of a special check in the encoding code, // we produce a 4-byte copy only if the offset is < 65536. Therefore // the copy op takes 3 bytes to encode, and this type of item leads // to at most the 62/60 blowup for representing literals. // // Suppose the "copy" op copies 5 bytes of data. If the offset is big // enough, it will take 5 bytes to encode the copy op. Therefore the // worst case here is a one-byte literal followed by a five-byte copy. // That is, 6 bytes of input turn into 7 bytes of "compressed" data. // // This last factor dominates the blowup, so the final estimate is: return 32 + srcLen + srcLen/6 } // NewWriter returns a new Writer that compresses to w, using the framing // format described at // https://github.com/google/snappy/blob/master/framing_format.txt func NewWriter(w io.Writer) *Writer { return &Writer{ w: w, enc: make([]byte, MaxEncodedLen(maxUncompressedChunkLen)), } } // Writer is an io.Writer than can write Snappy-compressed bytes. type Writer struct { w io.Writer err error enc []byte buf [checksumSize + chunkHeaderSize]byte wroteHeader bool } // Reset discards the writer's state and switches the Snappy writer to write to // w. This permits reusing a Writer rather than allocating a new one. func (w *Writer) Reset(writer io.Writer) { w.w = writer w.err = nil w.wroteHeader = false } // Write satisfies the io.Writer interface. func (w *Writer) Write(p []byte) (n int, errRet error) { if w.err != nil { return 0, w.err } if !w.wroteHeader { copy(w.enc, magicChunk) if _, err := w.w.Write(w.enc[:len(magicChunk)]); err != nil { w.err = err return n, err } w.wroteHeader = true } for len(p) > 0 { var uncompressed []byte if len(p) > maxUncompressedChunkLen { uncompressed, p = p[:maxUncompressedChunkLen], p[maxUncompressedChunkLen:] } else { uncompressed, p = p, nil } checksum := crc(uncompressed) // Compress the buffer, discarding the result if the improvement // isn't at least 12.5%. chunkType := uint8(chunkTypeCompressedData) chunkBody := Encode(w.enc, uncompressed) if len(chunkBody) >= len(uncompressed)-len(uncompressed)/8 { chunkType, chunkBody = chunkTypeUncompressedData, uncompressed } chunkLen := 4 + len(chunkBody) w.buf[0] = chunkType w.buf[1] = uint8(chunkLen >> 0) w.buf[2] = uint8(chunkLen >> 8) w.buf[3] = uint8(chunkLen >> 16) w.buf[4] = uint8(checksum >> 0) w.buf[5] = uint8(checksum >> 8) w.buf[6] = uint8(checksum >> 16) w.buf[7] = uint8(checksum >> 24) if _, err := w.w.Write(w.buf[:]); err != nil { w.err = err return n, err } if _, err := w.w.Write(chunkBody); err != nil { w.err = err return n, err } n += len(uncompressed) } return n, nil } golang-github-golang-snappy-0.0+git20150730.723cc1e/snappy.go000066400000000000000000000046671255631362400232110ustar00rootroot00000000000000// Copyright 2011 The Snappy-Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package snappy implements the snappy block-based compression format. // It aims for very high speeds and reasonable compression. // // The C++ snappy implementation is at https://github.com/google/snappy package snappy // import "github.com/golang/snappy" import ( "hash/crc32" ) /* Each encoded block begins with the varint-encoded length of the decoded data, followed by a sequence of chunks. Chunks begin and end on byte boundaries. The first byte of each chunk is broken into its 2 least and 6 most significant bits called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. Zero means a literal tag. All other values mean a copy tag. For literal tags: - If m < 60, the next 1 + m bytes are literal bytes. - Otherwise, let n be the little-endian unsigned integer denoted by the next m - 59 bytes. The next 1 + n bytes after that are literal bytes. For copy tags, length bytes are copied from offset bytes ago, in the style of Lempel-Ziv compression algorithms. In particular: - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 of the offset. The next byte is bits 0-7 of the offset. - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). The length is 1 + m. The offset is the little-endian unsigned integer denoted by the next 2 bytes. - For l == 3, this tag is a legacy format that is no longer supported. */ const ( tagLiteral = 0x00 tagCopy1 = 0x01 tagCopy2 = 0x02 tagCopy4 = 0x03 ) const ( checksumSize = 4 chunkHeaderSize = 4 magicChunk = "\xff\x06\x00\x00" + magicBody magicBody = "sNaPpY" // https://github.com/google/snappy/blob/master/framing_format.txt says // that "the uncompressed data in a chunk must be no longer than 65536 bytes". maxUncompressedChunkLen = 65536 ) const ( chunkTypeCompressedData = 0x00 chunkTypeUncompressedData = 0x01 chunkTypePadding = 0xfe chunkTypeStreamIdentifier = 0xff ) var crcTable = crc32.MakeTable(crc32.Castagnoli) // crc implements the checksum specified in section 3 of // https://github.com/google/snappy/blob/master/framing_format.txt func crc(b []byte) uint32 { c := crc32.Update(0, crcTable, b) return uint32(c>>15|c<<17) + 0xa282ead8 } golang-github-golang-snappy-0.0+git20150730.723cc1e/snappy_test.go000066400000000000000000000250331255631362400242360ustar00rootroot00000000000000// Copyright 2011 The Snappy-Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package snappy import ( "bytes" "flag" "fmt" "io" "io/ioutil" "math/rand" "net/http" "os" "path/filepath" "strings" "testing" ) var ( download = flag.Bool("download", false, "If true, download any missing files before running benchmarks") testdata = flag.String("testdata", "testdata", "Directory containing the test data") ) func roundtrip(b, ebuf, dbuf []byte) error { d, err := Decode(dbuf, Encode(ebuf, b)) if err != nil { return fmt.Errorf("decoding error: %v", err) } if !bytes.Equal(b, d) { return fmt.Errorf("roundtrip mismatch:\n\twant %v\n\tgot %v", b, d) } return nil } func TestEmpty(t *testing.T) { if err := roundtrip(nil, nil, nil); err != nil { t.Fatal(err) } } func TestSmallCopy(t *testing.T) { for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} { for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} { for i := 0; i < 32; i++ { s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb" if err := roundtrip([]byte(s), ebuf, dbuf); err != nil { t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err) } } } } } func TestSmallRand(t *testing.T) { rng := rand.New(rand.NewSource(27354294)) for n := 1; n < 20000; n += 23 { b := make([]byte, n) for i := range b { b[i] = uint8(rng.Uint32()) } if err := roundtrip(b, nil, nil); err != nil { t.Fatal(err) } } } func TestSmallRegular(t *testing.T) { for n := 1; n < 20000; n += 23 { b := make([]byte, n) for i := range b { b[i] = uint8(i%10 + 'a') } if err := roundtrip(b, nil, nil); err != nil { t.Fatal(err) } } } func TestInvalidVarint(t *testing.T) { data := []byte("\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00") if _, err := DecodedLen(data); err != ErrCorrupt { t.Errorf("DecodedLen: got %v, want ErrCorrupt", err) } if _, err := Decode(nil, data); err != ErrCorrupt { t.Errorf("Decode: got %v, want ErrCorrupt", err) } // The encoded varint overflows 32 bits data = []byte("\xff\xff\xff\xff\xff\x00") if _, err := DecodedLen(data); err != ErrCorrupt { t.Errorf("DecodedLen: got %v, want ErrCorrupt", err) } if _, err := Decode(nil, data); err != ErrCorrupt { t.Errorf("Decode: got %v, want ErrCorrupt", err) } } func cmp(a, b []byte) error { if len(a) != len(b) { return fmt.Errorf("got %d bytes, want %d", len(a), len(b)) } for i := range a { if a[i] != b[i] { return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i]) } } return nil } func TestFramingFormat(t *testing.T) { // src is comprised of alternating 1e5-sized sequences of random // (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen // because it is larger than maxUncompressedChunkLen (64k). src := make([]byte, 1e6) rng := rand.New(rand.NewSource(1)) for i := 0; i < 10; i++ { if i%2 == 0 { for j := 0; j < 1e5; j++ { src[1e5*i+j] = uint8(rng.Intn(256)) } } else { for j := 0; j < 1e5; j++ { src[1e5*i+j] = uint8(i) } } } buf := new(bytes.Buffer) if _, err := NewWriter(buf).Write(src); err != nil { t.Fatalf("Write: encoding: %v", err) } dst, err := ioutil.ReadAll(NewReader(buf)) if err != nil { t.Fatalf("ReadAll: decoding: %v", err) } if err := cmp(dst, src); err != nil { t.Fatal(err) } } func TestReaderReset(t *testing.T) { gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000) buf := new(bytes.Buffer) if _, err := NewWriter(buf).Write(gold); err != nil { t.Fatalf("Write: %v", err) } encoded, invalid, partial := buf.String(), "invalid", "partial" r := NewReader(nil) for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} { if s == partial { r.Reset(strings.NewReader(encoded)) if _, err := r.Read(make([]byte, 101)); err != nil { t.Errorf("#%d: %v", i, err) continue } continue } r.Reset(strings.NewReader(s)) got, err := ioutil.ReadAll(r) switch s { case encoded: if err != nil { t.Errorf("#%d: %v", i, err) continue } if err := cmp(got, gold); err != nil { t.Errorf("#%d: %v", i, err) continue } case invalid: if err == nil { t.Errorf("#%d: got nil error, want non-nil", i) continue } } } } func TestWriterReset(t *testing.T) { gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000) var gots, wants [][]byte const n = 20 w, failed := NewWriter(nil), false for i := 0; i <= n; i++ { buf := new(bytes.Buffer) w.Reset(buf) want := gold[:len(gold)*i/n] if _, err := w.Write(want); err != nil { t.Errorf("#%d: Write: %v", i, err) failed = true continue } got, err := ioutil.ReadAll(NewReader(buf)) if err != nil { t.Errorf("#%d: ReadAll: %v", i, err) failed = true continue } gots = append(gots, got) wants = append(wants, want) } if failed { return } for i := range gots { if err := cmp(gots[i], wants[i]); err != nil { t.Errorf("#%d: %v", i, err) } } } func benchDecode(b *testing.B, src []byte) { encoded := Encode(nil, src) // Bandwidth is in amount of uncompressed data. b.SetBytes(int64(len(src))) b.ResetTimer() for i := 0; i < b.N; i++ { Decode(src, encoded) } } func benchEncode(b *testing.B, src []byte) { // Bandwidth is in amount of uncompressed data. b.SetBytes(int64(len(src))) dst := make([]byte, MaxEncodedLen(len(src))) b.ResetTimer() for i := 0; i < b.N; i++ { Encode(dst, src) } } func readFile(b testing.TB, filename string) []byte { src, err := ioutil.ReadFile(filename) if err != nil { b.Skipf("skipping benchmark: %v", err) } if len(src) == 0 { b.Fatalf("%s has zero length", filename) } return src } // expand returns a slice of length n containing repeated copies of src. func expand(src []byte, n int) []byte { dst := make([]byte, n) for x := dst; len(x) > 0; { i := copy(x, src) x = x[i:] } return dst } func benchWords(b *testing.B, n int, decode bool) { // Note: the file is OS-language dependent so the resulting values are not // directly comparable for non-US-English OS installations. data := expand(readFile(b, "/usr/share/dict/words"), n) if decode { benchDecode(b, data) } else { benchEncode(b, data) } } func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) } func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) } func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) } func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) } func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) } func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) } func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) } func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) } // testFiles' values are copied directly from // https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc // The label field is unused in snappy-go. var testFiles = []struct { label string filename string }{ {"html", "html"}, {"urls", "urls.10K"}, {"jpg", "fireworks.jpeg"}, {"jpg_200", "fireworks.jpeg"}, {"pdf", "paper-100k.pdf"}, {"html4", "html_x_4"}, {"txt1", "alice29.txt"}, {"txt2", "asyoulik.txt"}, {"txt3", "lcet10.txt"}, {"txt4", "plrabn12.txt"}, {"pb", "geo.protodata"}, {"gaviota", "kppkn.gtb"}, } // The test data files are present at this canonical URL. const baseURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/" func downloadTestdata(b *testing.B, basename string) (errRet error) { filename := filepath.Join(*testdata, basename) if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 { return nil } if !*download { b.Skipf("test data not found; skipping benchmark without the -download flag") } // Download the official snappy C++ implementation reference test data // files for benchmarking. if err := os.Mkdir(*testdata, 0777); err != nil && !os.IsExist(err) { return fmt.Errorf("failed to create testdata: %s", err) } f, err := os.Create(filename) if err != nil { return fmt.Errorf("failed to create %s: %s", filename, err) } defer f.Close() defer func() { if errRet != nil { os.Remove(filename) } }() url := baseURL + basename resp, err := http.Get(url) if err != nil { return fmt.Errorf("failed to download %s: %s", url, err) } defer resp.Body.Close() if s := resp.StatusCode; s != http.StatusOK { return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s)) } _, err = io.Copy(f, resp.Body) if err != nil { return fmt.Errorf("failed to download %s to %s: %s", url, filename, err) } return nil } func benchFile(b *testing.B, n int, decode bool) { if err := downloadTestdata(b, testFiles[n].filename); err != nil { b.Fatalf("failed to download testdata: %s", err) } data := readFile(b, filepath.Join(*testdata, testFiles[n].filename)) if decode { benchDecode(b, data) } else { benchEncode(b, data) } } // Naming convention is kept similar to what snappy's C++ implementation uses. func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) } func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) } func Benchmark_UFlat2(b *testing.B) { benchFile(b, 2, true) } func Benchmark_UFlat3(b *testing.B) { benchFile(b, 3, true) } func Benchmark_UFlat4(b *testing.B) { benchFile(b, 4, true) } func Benchmark_UFlat5(b *testing.B) { benchFile(b, 5, true) } func Benchmark_UFlat6(b *testing.B) { benchFile(b, 6, true) } func Benchmark_UFlat7(b *testing.B) { benchFile(b, 7, true) } func Benchmark_UFlat8(b *testing.B) { benchFile(b, 8, true) } func Benchmark_UFlat9(b *testing.B) { benchFile(b, 9, true) } func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) } func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) } func Benchmark_ZFlat0(b *testing.B) { benchFile(b, 0, false) } func Benchmark_ZFlat1(b *testing.B) { benchFile(b, 1, false) } func Benchmark_ZFlat2(b *testing.B) { benchFile(b, 2, false) } func Benchmark_ZFlat3(b *testing.B) { benchFile(b, 3, false) } func Benchmark_ZFlat4(b *testing.B) { benchFile(b, 4, false) } func Benchmark_ZFlat5(b *testing.B) { benchFile(b, 5, false) } func Benchmark_ZFlat6(b *testing.B) { benchFile(b, 6, false) } func Benchmark_ZFlat7(b *testing.B) { benchFile(b, 7, false) } func Benchmark_ZFlat8(b *testing.B) { benchFile(b, 8, false) } func Benchmark_ZFlat9(b *testing.B) { benchFile(b, 9, false) } func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) } func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }