pax_global_header00006660000000000000000000000064131422452230014510gustar00rootroot0000000000000052 comment=5c37fe3735342a2e0d01c87a907579987c8936cc xxhash-1.0.0/000077500000000000000000000000001314224522300130115ustar00rootroot00000000000000xxhash-1.0.0/LICENSE.txt000066400000000000000000000020541314224522300146350ustar00rootroot00000000000000Copyright (c) 2016 Caleb Spare MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. xxhash-1.0.0/README.md000066400000000000000000000032711314224522300142730ustar00rootroot00000000000000# xxhash [![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash) xxhash is a Go implementation of the 64-bit [xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a high-quality hashing algorithm that is much faster than anything in the Go standard library. The API is very small, taking its cue from the other hashing packages in the standard library: $ go doc github.com/cespare/xxhash ! package xxhash // import "github.com/cespare/xxhash" Package xxhash implements the 64-bit variant of xxHash (XXH64) as described at http://cyan4973.github.io/xxHash/. func New() hash.Hash64 func Sum64(b []byte) uint64 func Sum64String(s string) uint64 This implementation provides a fast pure-Go implementation and an even faster assembly implementation for amd64. ## Benchmarks Here are some quick benchmarks comparing the pure-Go and assembly implementations of Sum64 against another popular Go XXH64 implementation, [github.com/OneOfOne/xxhash](https://github.com/OneOfOne/xxhash): | input size | OneOfOne | cespare (noasm) | cespare | | --- | --- | --- | --- | | 5 B | 438.34 MB/s | 596.40 MB/s | 711.11 MB/s | | 100 B | 3676.54 MB/s | 4301.40 MB/s | 4598.95 MB/s | | 4 KB | 8128.64 MB/s | 8840.83 MB/s | 10549.72 MB/s | | 10 MB | 7335.19 MB/s | 7736.64 MB/s | 9024.04 MB/s | ## Projects using this package - [InfluxDB](https://github.com/influxdata/influxdb) - [Prometheus](https://github.com/prometheus/prometheus) xxhash-1.0.0/xxhash.go000066400000000000000000000103411314224522300146420ustar00rootroot00000000000000// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described // at http://cyan4973.github.io/xxHash/. package xxhash import ( "encoding/binary" "hash" ) const ( prime1 uint64 = 11400714785074694791 prime2 uint64 = 14029467366897019727 prime3 uint64 = 1609587929392839161 prime4 uint64 = 9650029242287828579 prime5 uint64 = 2870177450012600261 ) // NOTE(caleb): I'm using both consts and vars of the primes. Using consts where // possible in the Go code is worth a small (but measurable) performance boost // by avoiding some MOVQs. Vars are needed for the asm and also are useful for // convenience in the Go code in a few places where we need to intentionally // avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the // result overflows a uint64). var ( prime1v = prime1 prime2v = prime2 prime3v = prime3 prime4v = prime4 prime5v = prime5 ) type xxh struct { v1 uint64 v2 uint64 v3 uint64 v4 uint64 total int mem [32]byte n int // how much of mem is used } // New creates a new hash.Hash64 that implements the 64-bit xxHash algorithm. func New() hash.Hash64 { var x xxh x.Reset() return &x } func (x *xxh) Reset() { x.n = 0 x.total = 0 x.v1 = prime1v + prime2 x.v2 = prime2 x.v3 = 0 x.v4 = -prime1v } func (x *xxh) Size() int { return 8 } func (x *xxh) BlockSize() int { return 32 } // Write adds more data to x. It always returns len(b), nil. func (x *xxh) Write(b []byte) (n int, err error) { n = len(b) x.total += len(b) if x.n+len(b) < 32 { // This new data doesn't even fill the current block. copy(x.mem[x.n:], b) x.n += len(b) return } if x.n > 0 { // Finish off the partial block. copy(x.mem[x.n:], b) x.v1 = round(x.v1, u64(x.mem[0:8])) x.v2 = round(x.v2, u64(x.mem[8:16])) x.v3 = round(x.v3, u64(x.mem[16:24])) x.v4 = round(x.v4, u64(x.mem[24:32])) b = b[32-x.n:] x.n = 0 } if len(b) >= 32 { // One or more full blocks left. b = writeBlocks(x, b) } // Store any remaining partial block. copy(x.mem[:], b) x.n = len(b) return } func (x *xxh) Sum(b []byte) []byte { s := x.Sum64() return append( b, byte(s>>56), byte(s>>48), byte(s>>40), byte(s>>32), byte(s>>24), byte(s>>16), byte(s>>8), byte(s), ) } func (x *xxh) Sum64() uint64 { var h uint64 if x.total >= 32 { v1, v2, v3, v4 := x.v1, x.v2, x.v3, x.v4 h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) h = mergeRound(h, v1) h = mergeRound(h, v2) h = mergeRound(h, v3) h = mergeRound(h, v4) } else { h = x.v3 + prime5 } h += uint64(x.total) i, end := 0, x.n for ; i+8 <= end; i += 8 { k1 := round(0, u64(x.mem[i:i+8])) h ^= k1 h = rol27(h)*prime1 + prime4 } if i+4 <= end { h ^= uint64(u32(x.mem[i:i+4])) * prime1 h = rol23(h)*prime2 + prime3 i += 4 } for i < end { h ^= uint64(x.mem[i]) * prime5 h = rol11(h) * prime1 i++ } h ^= h >> 33 h *= prime2 h ^= h >> 29 h *= prime3 h ^= h >> 32 return h } func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) } func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) } func round(acc, input uint64) uint64 { acc += input * prime2 acc = rol31(acc) acc *= prime1 return acc } func mergeRound(acc, val uint64) uint64 { val = round(0, val) acc ^= val acc = acc*prime1 + prime4 return acc } // It's important for performance to get the rotates to actually compile to // ROLQs. gc will do this for us but only if rotate amount is a constant. // // TODO(caleb): In Go 1.9 a single function // rol(x uint64, k uint) uint64 // should do instead. See https://golang.org/issue/18254. // // TODO(caleb): In Go 1.x (1.9?) consider using the new math/bits package to be more // explicit about things. See https://golang.org/issue/18616. func rol1(x uint64) uint64 { return (x << 1) | (x >> (64 - 1)) } func rol7(x uint64) uint64 { return (x << 7) | (x >> (64 - 7)) } func rol11(x uint64) uint64 { return (x << 11) | (x >> (64 - 11)) } func rol12(x uint64) uint64 { return (x << 12) | (x >> (64 - 12)) } func rol18(x uint64) uint64 { return (x << 18) | (x >> (64 - 18)) } func rol23(x uint64) uint64 { return (x << 23) | (x >> (64 - 23)) } func rol27(x uint64) uint64 { return (x << 27) | (x >> (64 - 27)) } func rol31(x uint64) uint64 { return (x << 31) | (x >> (64 - 31)) } xxhash-1.0.0/xxhash_amd64.go000066400000000000000000000003151314224522300156350ustar00rootroot00000000000000// +build !appengine // +build gc // +build !noasm package xxhash // Sum64 computes the 64-bit xxHash digest of b. // //go:noescape func Sum64(b []byte) uint64 func writeBlocks(x *xxh, b []byte) []byte xxhash-1.0.0/xxhash_amd64.s000066400000000000000000000101541314224522300154740ustar00rootroot00000000000000// +build !appengine // +build gc // +build !noasm #include "textflag.h" // Register allocation: // AX h // CX pointer to advance through b // DX n // BX loop end // R8 v1, k1 // R9 v2 // R10 v3 // R11 v4 // R12 tmp // R13 prime1v // R14 prime2v // R15 prime4v // round reads from and advances the buffer pointer in CX. // It assumes that R13 has prime1v and R14 has prime2v. #define round(r) \ MOVQ (CX), R12 \ ADDQ $8, CX \ IMULQ R14, R12 \ ADDQ R12, r \ ROLQ $31, r \ IMULQ R13, r // mergeRound applies a merge round on the two registers acc and val. // It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v. #define mergeRound(acc, val) \ IMULQ R14, val \ ROLQ $31, val \ IMULQ R13, val \ XORQ val, acc \ IMULQ R13, acc \ ADDQ R15, acc // func Sum64(b []byte) uint64 TEXT ·Sum64(SB), NOSPLIT, $0-32 // Load fixed primes. MOVQ ·prime1v(SB), R13 MOVQ ·prime2v(SB), R14 MOVQ ·prime4v(SB), R15 // Load slice. MOVQ b_base+0(FP), CX MOVQ b_len+8(FP), DX LEAQ (CX)(DX*1), BX // The first loop limit will be len(b)-32. SUBQ $32, BX // Check whether we have at least one block. CMPQ DX, $32 JLT noBlocks // Set up initial state (v1, v2, v3, v4). MOVQ R13, R8 ADDQ R14, R8 MOVQ R14, R9 XORQ R10, R10 XORQ R11, R11 SUBQ R13, R11 // Loop until CX > BX. blockLoop: round(R8) round(R9) round(R10) round(R11) CMPQ CX, BX JLE blockLoop MOVQ R8, AX ROLQ $1, AX MOVQ R9, R12 ROLQ $7, R12 ADDQ R12, AX MOVQ R10, R12 ROLQ $12, R12 ADDQ R12, AX MOVQ R11, R12 ROLQ $18, R12 ADDQ R12, AX mergeRound(AX, R8) mergeRound(AX, R9) mergeRound(AX, R10) mergeRound(AX, R11) JMP afterBlocks noBlocks: MOVQ ·prime5v(SB), AX afterBlocks: ADDQ DX, AX // Right now BX has len(b)-32, and we want to loop until CX > len(b)-8. ADDQ $24, BX CMPQ CX, BX JG fourByte wordLoop: // Calculate k1. MOVQ (CX), R8 ADDQ $8, CX IMULQ R14, R8 ROLQ $31, R8 IMULQ R13, R8 XORQ R8, AX ROLQ $27, AX IMULQ R13, AX ADDQ R15, AX CMPQ CX, BX JLE wordLoop fourByte: ADDQ $4, BX CMPQ CX, BX JG singles MOVL (CX), R8 ADDQ $4, CX IMULQ R13, R8 XORQ R8, AX ROLQ $23, AX IMULQ R14, AX ADDQ ·prime3v(SB), AX singles: ADDQ $4, BX CMPQ CX, BX JGE finalize singlesLoop: MOVBQZX (CX), R12 ADDQ $1, CX IMULQ ·prime5v(SB), R12 XORQ R12, AX ROLQ $11, AX IMULQ R13, AX CMPQ CX, BX JL singlesLoop finalize: MOVQ AX, R12 SHRQ $33, R12 XORQ R12, AX IMULQ R14, AX MOVQ AX, R12 SHRQ $29, R12 XORQ R12, AX IMULQ ·prime3v(SB), AX MOVQ AX, R12 SHRQ $32, R12 XORQ R12, AX MOVQ AX, ret+24(FP) RET // writeBlocks uses the same registers as above except that it uses AX to store // the x pointer. // func writeBlocks(x *xxh, b []byte) []byte TEXT ·writeBlocks(SB), NOSPLIT, $0-56 // Load fixed primes needed for round. MOVQ ·prime1v(SB), R13 MOVQ ·prime2v(SB), R14 // Load slice. MOVQ b_base+8(FP), CX MOVQ CX, ret_base+32(FP) // initialize return base pointer; see NOTE below MOVQ b_len+16(FP), DX LEAQ (CX)(DX*1), BX SUBQ $32, BX // Load vN from x. MOVQ x+0(FP), AX MOVQ 0(AX), R8 // v1 MOVQ 8(AX), R9 // v2 MOVQ 16(AX), R10 // v3 MOVQ 24(AX), R11 // v4 // We don't need to check the loop condition here; this function is // always called with at least one block of data to process. blockLoop: round(R8) round(R9) round(R10) round(R11) CMPQ CX, BX JLE blockLoop // Copy vN back to x. MOVQ R8, 0(AX) MOVQ R9, 8(AX) MOVQ R10, 16(AX) MOVQ R11, 24(AX) // Construct return slice. // NOTE: It's important that we don't construct a slice that has a base // pointer off the end of the original slice, as in Go 1.7+ this will // cause runtime crashes. (See discussion in, for example, // https://github.com/golang/go/issues/16772.) // Therefore, we calculate the length/cap first, and if they're zero, we // keep the old base. This is what the compiler does as well if you // write code like // b = b[len(b):] // New length is 32 - (CX - BX) -> BX+32 - CX. ADDQ $32, BX SUBQ CX, BX JZ afterSetBase MOVQ CX, ret_base+32(FP) afterSetBase: MOVQ BX, ret_len+40(FP) MOVQ BX, ret_cap+48(FP) // set cap == len RET xxhash-1.0.0/xxhash_amd64_test.go000066400000000000000000000025261314224522300167020ustar00rootroot00000000000000// +build !appengine // +build gc // +build !noasm package xxhash // TODO(caleb): Fix and re-enable with any ideas I get from // https://groups.google.com/d/msg/golang-nuts/wb5I2tjrwoc/xCzk6uchBgAJ //func TestSum64ASM(t *testing.T) { // for i := 0; i < 500; i++ { // b := make([]byte, i) // for j := range b { // b[j] = byte(j) // } // pureGo := sum64Go(b) // asm := Sum64(b) // if pureGo != asm { // t.Fatalf("[i=%d] pure go gave 0x%x; asm gave 0x%x", i, pureGo, asm) // } // } //} //func TestWriteBlocksASM(t *testing.T) { // x0 := New().(*xxh) // x1 := New().(*xxh) // for i := 32; i < 500; i++ { // b := make([]byte, i) // for j := range b { // b[j] = byte(j) // } // pureGo := writeBlocksGo(x0, b) // asm := writeBlocks(x1, b) // if !reflect.DeepEqual(pureGo, asm) { // t.Fatalf("[i=%d] pure go gave %v; asm gave %v", i, pureGo, asm) // } // if !reflect.DeepEqual(x0, x1) { // t.Fatalf("[i=%d] pure go had state %v; asm had state %v", i, x0, x1) // } // } //} xxhash-1.0.0/xxhash_other.go000066400000000000000000000027741314224522300160560ustar00rootroot00000000000000// +build !amd64 appengine !gc noasm package xxhash // Sum64 computes the 64-bit xxHash digest of b. func Sum64(b []byte) uint64 { // A simpler version would be // x := New() // x.Write(b) // return x.Sum64() // but this is faster, particularly for small inputs. n := len(b) var h uint64 if n >= 32 { v1 := prime1v + prime2 v2 := prime2 v3 := uint64(0) v4 := -prime1v for len(b) >= 32 { v1 = round(v1, u64(b[0:8:len(b)])) v2 = round(v2, u64(b[8:16:len(b)])) v3 = round(v3, u64(b[16:24:len(b)])) v4 = round(v4, u64(b[24:32:len(b)])) b = b[32:len(b):len(b)] } h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) h = mergeRound(h, v1) h = mergeRound(h, v2) h = mergeRound(h, v3) h = mergeRound(h, v4) } else { h = prime5 } h += uint64(n) i, end := 0, len(b) for ; i+8 <= end; i += 8 { k1 := round(0, u64(b[i:i+8:len(b)])) h ^= k1 h = rol27(h)*prime1 + prime4 } if i+4 <= end { h ^= uint64(u32(b[i:i+4:len(b)])) * prime1 h = rol23(h)*prime2 + prime3 i += 4 } for ; i < end; i++ { h ^= uint64(b[i]) * prime5 h = rol11(h) * prime1 } h ^= h >> 33 h *= prime2 h ^= h >> 29 h *= prime3 h ^= h >> 32 return h } func writeBlocks(x *xxh, b []byte) []byte { v1, v2, v3, v4 := x.v1, x.v2, x.v3, x.v4 for len(b) >= 32 { v1 = round(v1, u64(b[0:8:len(b)])) v2 = round(v2, u64(b[8:16:len(b)])) v3 = round(v3, u64(b[16:24:len(b)])) v4 = round(v4, u64(b[24:32:len(b)])) b = b[32:len(b):len(b)] } x.v1, x.v2, x.v3, x.v4 = v1, v2, v3, v4 return b } xxhash-1.0.0/xxhash_safe.go000066400000000000000000000003531314224522300156420ustar00rootroot00000000000000// +build appengine // This file contains the safe implementations of otherwise unsafe-using code. package xxhash // Sum64String computes the 64-bit xxHash digest of s. func Sum64String(s string) uint64 { return Sum64([]byte(s)) } xxhash-1.0.0/xxhash_test.go000066400000000000000000000064471314224522300157150ustar00rootroot00000000000000package xxhash import ( "bytes" "encoding/binary" "fmt" "hash" "hash/crc32" "strings" "testing" OneOfOne "github.com/OneOfOne/xxhash" "github.com/spaolacci/murmur3" ) var result uint64 func BenchmarkStringHash(b *testing.B) { const s = "abcdefghijklmnop" var r uint64 b.ReportAllocs() for n := 0; n < b.N; n++ { r = Sum64([]byte(s)) } result = r } func TestSum(t *testing.T) { for i, tt := range []struct { input string want uint64 }{ {"", 0xef46db3751d8e999}, {"a", 0xd24ec4f1a98c6e5b}, {"as", 0x1c330fb2d66be179}, {"asd", 0x631c37ce72a97393}, {"asdf", 0x415872f599cea71e}, { // Exactly 63 characters, which exercises all code paths. "Call me Ishmael. Some years ago--never mind how long precisely-", 0x02a2e85470d6fd96, }, } { for chunkSize := 1; chunkSize <= len(tt.input); chunkSize++ { x := New() for j := 0; j < len(tt.input); j += chunkSize { end := j + chunkSize if end > len(tt.input) { end = len(tt.input) } chunk := []byte(tt.input[j:end]) n, err := x.Write(chunk) if err != nil || n != len(chunk) { t.Fatalf("[i=%d,chunkSize=%d] Write: got (%d, %v); want (%d, nil)", i, chunkSize, n, err, len(chunk)) } } if got := x.Sum64(); got != tt.want { t.Fatalf("[i=%d,chunkSize=%d] got 0x%x; want 0x%x", i, chunkSize, got, tt.want) } var b [8]byte binary.BigEndian.PutUint64(b[:], tt.want) if got := x.Sum(nil); !bytes.Equal(got, b[:]) { t.Fatalf("[i=%d,chunkSize=%d] Sum: got %v; want %v", i, chunkSize, got, b[:]) } } if got := Sum64([]byte(tt.input)); got != tt.want { t.Fatalf("[i=%d] Sum64: got 0x%x; want 0x%x", i, got, tt.want) } if got := Sum64String(tt.input); got != tt.want { t.Fatalf("[i=%d] Sum64String: got 0x%x; want 0x%x", i, got, tt.want) } } } func TestReset(t *testing.T) { parts := []string{"The quic", "k br", "o", "wn fox jumps", " ov", "er the lazy ", "dog."} x := New() for _, part := range parts { x.Write([]byte(part)) } h0 := x.Sum64() x.Reset() x.Write([]byte(strings.Join(parts, ""))) h1 := x.Sum64() if h0 != h1 { t.Errorf("0x%x != 0x%x", h0, h1) } } var ( sink uint64 sinkb []byte ) func sumFunc(h hash.Hash) func(b []byte) uint64 { return func(b []byte) uint64 { h.Reset() h.Write(b) sinkb = h.Sum(nil) return 0 // value doesn't matter } } func BenchmarkHashes(b *testing.B) { for _, ht := range []struct { name string f interface{} }{ {"xxhash", Sum64}, {"xxhash-string", Sum64String}, {"OneOfOne", OneOfOne.Checksum64}, {"murmur3", murmur3.Sum64}, {"CRC-32", sumFunc(crc32.NewIEEE())}, } { for _, nt := range []struct { name string n int }{ {"5 B", 5}, {"100 B", 100}, {"4 KB", 4e3}, {"10 MB", 10e6}, } { input := make([]byte, nt.n) for i := range input { input[i] = byte(i) } benchName := fmt.Sprintf("%s,n=%s", ht.name, nt.name) if ht.name == "xxhash-string" { f := ht.f.(func(string) uint64) s := string(input) b.Run(benchName, func(b *testing.B) { b.SetBytes(int64(len(input))) for i := 0; i < b.N; i++ { sink = f(s) } }) } else { f := ht.f.(func([]byte) uint64) b.Run(benchName, func(b *testing.B) { b.SetBytes(int64(len(input))) for i := 0; i < b.N; i++ { sink = f(input) } }) } } } } xxhash-1.0.0/xxhash_unsafe.go000066400000000000000000000016121314224522300162040ustar00rootroot00000000000000// +build !appengine // This file encapsulates usage of unsafe. // xxhash_safe.go contains the safe implementations. package xxhash import ( "reflect" "unsafe" ) // Sum64String computes the 64-bit xxHash digest of s. // It may be faster than Sum64([]byte(s)) by avoiding a copy. // // TODO(caleb): Consider removing this if an optimization is ever added to make // it unnecessary: https://golang.org/issue/2205. // // TODO(caleb): We still have a function call; we could instead write Go/asm // copies of Sum64 for strings to squeeze out a bit more speed. func Sum64String(s string) uint64 { // See https://groups.google.com/d/msg/golang-nuts/dcjzJy-bSpw/tcZYBzQqAQAJ // for some discussion about this unsafe conversion. var b []byte bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data bh.Len = len(s) bh.Cap = len(s) return Sum64(b) } xxhash-1.0.0/xxhsum/000077500000000000000000000000001314224522300143455ustar00rootroot00000000000000xxhash-1.0.0/xxhsum/.gitignore000066400000000000000000000000101314224522300163240ustar00rootroot00000000000000/xxhsum xxhash-1.0.0/xxhsum/xxhsum.go000066400000000000000000000015521314224522300162330ustar00rootroot00000000000000package main import ( "fmt" "io" "os" "github.com/cespare/xxhash" ) func main() { if contains(os.Args[1:], "-h") { fmt.Fprintf(os.Stderr, `Usage: %s [filenames] If no filenames are provided or only - is given, input is read from stdin. `, os.Args[0]) os.Exit(1) } if len(os.Args) < 2 || len(os.Args) == 2 && string(os.Args[1]) == "-" { printHash(os.Stdin, "-") return } for _, path := range os.Args[1:] { f, err := os.Open(path) if err != nil { fmt.Fprintln(os.Stderr, err) continue } printHash(f, path) f.Close() } } func contains(ss []string, s string) bool { for _, s1 := range ss { if s1 == s { return true } } return false } func printHash(r io.Reader, name string) { h := xxhash.New() if _, err := io.Copy(h, r); err != nil { fmt.Fprintln(os.Stderr, err) return } fmt.Printf("%016x %s\n", h.Sum64(), name) }