pax_global_header00006660000000000000000000000064146036033120014510gustar00rootroot0000000000000052 comment=998dce232f17418a7a5721ecf87ca714025a3243 xxhash-2.3.0/000077500000000000000000000000001460360331200130155ustar00rootroot00000000000000xxhash-2.3.0/.github/000077500000000000000000000000001460360331200143555ustar00rootroot00000000000000xxhash-2.3.0/.github/workflows/000077500000000000000000000000001460360331200164125ustar00rootroot00000000000000xxhash-2.3.0/.github/workflows/test.yml000066400000000000000000000024311460360331200201140ustar00rootroot00000000000000name: Test on: push: branches: [main] pull_request: jobs: test: strategy: matrix: go-version: [1.18.x, 1.19.x] os: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.os }} steps: - name: Install go uses: WillAbides/setup-go-faster@v1.5.0 with: go-version: ${{ matrix.go-version }} - name: Check out code uses: actions/checkout@v2 - name: Test run: go test -count 1 -bench . -benchtime 1x ./... - name: Test with -tags purego run: go test -count 1 -bench . -benchtime 1x -tags purego ./... test-qemu: needs: test strategy: matrix: go-version: [1.18.x, 1.19.x] arch: [386, arm, arm64] runs-on: ubuntu-latest steps: - name: Install go uses: WillAbides/setup-go-faster@v1.5.0 with: go-version: ${{ matrix.go-version }} - name: Install QEMU uses: docker/setup-qemu-action@v1 - name: Check out code uses: actions/checkout@v2 - name: Run test via qemu/binfmt # TODO: Run the dynamic linking tests as well. That is a little more # involved. run: go test -v -count 1 -bench . -benchtime 1x env: GOARCH: ${{ matrix.arch }} xxhash-2.3.0/LICENSE.txt000066400000000000000000000020541460360331200146410ustar00rootroot00000000000000Copyright (c) 2016 Caleb Spare MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. xxhash-2.3.0/README.md000066400000000000000000000046551460360331200143060ustar00rootroot00000000000000# xxhash [![Go Reference](https://pkg.go.dev/badge/github.com/cespare/xxhash/v2.svg)](https://pkg.go.dev/github.com/cespare/xxhash/v2) [![Test](https://github.com/cespare/xxhash/actions/workflows/test.yml/badge.svg)](https://github.com/cespare/xxhash/actions/workflows/test.yml) xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a high-quality hashing algorithm that is much faster than anything in the Go standard library. This package provides a straightforward API: ``` func Sum64(b []byte) uint64 func Sum64String(s string) uint64 type Digest struct{ ... } func New() *Digest ``` The `Digest` type implements hash.Hash64. Its key methods are: ``` func (*Digest) Write([]byte) (int, error) func (*Digest) WriteString(string) (int, error) func (*Digest) Sum64() uint64 ``` The package is written with optimized pure Go and also contains even faster assembly implementations for amd64 and arm64. If desired, the `purego` build tag opts into using the Go code even on those architectures. [xxHash]: http://cyan4973.github.io/xxHash/ ## Compatibility This package is in a module and the latest code is in version 2 of the module. You need a version of Go with at least "minimal module compatibility" to use github.com/cespare/xxhash/v2: * 1.9.7+ for Go 1.9 * 1.10.3+ for Go 1.10 * Go 1.11 or later I recommend using the latest release of Go. ## Benchmarks Here are some quick benchmarks comparing the pure-Go and assembly implementations of Sum64. | input size | purego | asm | | ---------- | --------- | --------- | | 4 B | 1.3 GB/s | 1.2 GB/s | | 16 B | 2.9 GB/s | 3.5 GB/s | | 100 B | 6.9 GB/s | 8.1 GB/s | | 4 KB | 11.7 GB/s | 16.7 GB/s | | 10 MB | 12.0 GB/s | 17.3 GB/s | These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C CPU using the following commands under Go 1.19.2: ``` benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$') benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$') ``` ## Projects using this package - [InfluxDB](https://github.com/influxdata/influxdb) - [Prometheus](https://github.com/prometheus/prometheus) - [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) - [FreeCache](https://github.com/coocood/freecache) - [FastCache](https://github.com/VictoriaMetrics/fastcache) - [Ristretto](https://github.com/dgraph-io/ristretto) - [Badger](https://github.com/dgraph-io/badger) xxhash-2.3.0/bench_test.go000066400000000000000000000023511460360331200154630ustar00rootroot00000000000000package xxhash import ( "strings" "testing" ) var benchmarks = []struct { name string n int64 }{ {"4B", 4}, {"16B", 16}, {"100B", 100}, {"4KB", 4e3}, {"10MB", 10e6}, } func BenchmarkSum64(b *testing.B) { for _, bb := range benchmarks { in := make([]byte, bb.n) for i := range in { in[i] = byte(i) } b.Run(bb.name, func(b *testing.B) { b.SetBytes(bb.n) for i := 0; i < b.N; i++ { _ = Sum64(in) } }) } } func BenchmarkSum64String(b *testing.B) { for _, bb := range benchmarks { s := strings.Repeat("a", int(bb.n)) b.Run(bb.name, func(b *testing.B) { b.SetBytes(bb.n) for i := 0; i < b.N; i++ { _ = Sum64String(s) } }) } } func BenchmarkDigestBytes(b *testing.B) { for _, bb := range benchmarks { in := make([]byte, bb.n) for i := range in { in[i] = byte(i) } b.Run(bb.name, func(b *testing.B) { b.SetBytes(bb.n) for i := 0; i < b.N; i++ { h := New() h.Write(in) _ = h.Sum64() } }) } } func BenchmarkDigestString(b *testing.B) { for _, bb := range benchmarks { s := strings.Repeat("a", int(bb.n)) b.Run(bb.name, func(b *testing.B) { b.SetBytes(bb.n) for i := 0; i < b.N; i++ { h := New() h.WriteString(s) _ = h.Sum64() } }) } } xxhash-2.3.0/dynamic/000077500000000000000000000000001460360331200144415ustar00rootroot00000000000000xxhash-2.3.0/dynamic/.gitignore000066400000000000000000000000131460360331200164230ustar00rootroot00000000000000/plugin.so xxhash-2.3.0/dynamic/dynamic_test.go000066400000000000000000000015161460360331200174560ustar00rootroot00000000000000//go:build linux || darwin // +build linux darwin package main import ( "bytes" "log" "os" "os/exec" "plugin" "testing" ) // This is a cursory test that checks whether things work under dynamic linking. func TestMain(m *testing.M) { cmd := exec.Command( "go", "build", "-buildmode", "plugin", "-o", "plugin.so", "plugin.go", ) var out bytes.Buffer cmd.Stdout = &out cmd.Stderr = &out if err := cmd.Run(); err != nil { log.Fatalf("Error building plugin: %s\nOutput:\n%s", err, out.String()) } os.Exit(m.Run()) } func TestDynamic(t *testing.T) { plug, err := plugin.Open("plugin.so") if err != nil { t.Fatal(err) } for _, test := range []string{ "TestSum", "TestDigest", } { f, err := plug.Lookup(test) if err != nil { t.Fatalf("cannot find func %s: %s", test, err) } f.(func(*testing.T))(t) } } xxhash-2.3.0/dynamic/plugin.go000066400000000000000000000017551460360331200162760ustar00rootroot00000000000000//go:build ignore // +build ignore package main import ( "fmt" "log" "testing" "github.com/cespare/xxhash/v2" ) const ( in = "Call me Ishmael. Some years ago--never mind how long precisely-" want = uint64(0x02a2e85470d6fd96) ) func TestSum(t *testing.T) { got := xxhash.Sum64String(in) if got != want { t.Fatalf("Sum64String: got 0x%x; want 0x%x", got, want) } } func TestDigest(t *testing.T) { for chunkSize := 1; chunkSize <= len(in); chunkSize++ { name := fmt.Sprintf("[chunkSize=%d]", chunkSize) t.Run(name, func(t *testing.T) { d := xxhash.New() for i := 0; i < len(in); i += chunkSize { chunk := in[i:] if len(chunk) > chunkSize { chunk = chunk[:chunkSize] } n, err := d.WriteString(chunk) if err != nil || n != len(chunk) { t.Fatalf("Digest.WriteString: got (%d, %v); want (%d, nil)", n, err, len(chunk)) } } if got := d.Sum64(); got != want { log.Fatalf("Digest.Sum64: got 0x%x; want 0x%x", got, want) } }) } } xxhash-2.3.0/go.mod000066400000000000000000000000551460360331200141230ustar00rootroot00000000000000module github.com/cespare/xxhash/v2 go 1.11 xxhash-2.3.0/go.sum000066400000000000000000000000001460360331200141360ustar00rootroot00000000000000xxhash-2.3.0/testall.sh000077500000000000000000000004321460360331200150230ustar00rootroot00000000000000#!/bin/bash set -eu -o pipefail # Small convenience script for running the tests with various combinations of # arch/tags. This assumes we're running on amd64 and have qemu available. go test ./... go test -tags purego ./... GOARCH=arm64 go test GOARCH=arm64 go test -tags purego xxhash-2.3.0/xxhash.go000066400000000000000000000130341460360331200146500ustar00rootroot00000000000000// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described // at http://cyan4973.github.io/xxHash/. package xxhash import ( "encoding/binary" "errors" "math/bits" ) const ( prime1 uint64 = 11400714785074694791 prime2 uint64 = 14029467366897019727 prime3 uint64 = 1609587929392839161 prime4 uint64 = 9650029242287828579 prime5 uint64 = 2870177450012600261 ) // Store the primes in an array as well. // // The consts are used when possible in Go code to avoid MOVs but we need a // contiguous array for the assembly code. var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5} // Digest implements hash.Hash64. // // Note that a zero-valued Digest is not ready to receive writes. // Call Reset or create a Digest using New before calling other methods. type Digest struct { v1 uint64 v2 uint64 v3 uint64 v4 uint64 total uint64 mem [32]byte n int // how much of mem is used } // New creates a new Digest with a zero seed. func New() *Digest { return NewWithSeed(0) } // NewWithSeed creates a new Digest with the given seed. func NewWithSeed(seed uint64) *Digest { var d Digest d.ResetWithSeed(seed) return &d } // Reset clears the Digest's state so that it can be reused. // It uses a seed value of zero. func (d *Digest) Reset() { d.ResetWithSeed(0) } // ResetWithSeed clears the Digest's state so that it can be reused. // It uses the given seed to initialize the state. func (d *Digest) ResetWithSeed(seed uint64) { d.v1 = seed + prime1 + prime2 d.v2 = seed + prime2 d.v3 = seed d.v4 = seed - prime1 d.total = 0 d.n = 0 } // Size always returns 8 bytes. func (d *Digest) Size() int { return 8 } // BlockSize always returns 32 bytes. func (d *Digest) BlockSize() int { return 32 } // Write adds more data to d. It always returns len(b), nil. func (d *Digest) Write(b []byte) (n int, err error) { n = len(b) d.total += uint64(n) memleft := d.mem[d.n&(len(d.mem)-1):] if d.n+n < 32 { // This new data doesn't even fill the current block. copy(memleft, b) d.n += n return } if d.n > 0 { // Finish off the partial block. c := copy(memleft, b) d.v1 = round(d.v1, u64(d.mem[0:8])) d.v2 = round(d.v2, u64(d.mem[8:16])) d.v3 = round(d.v3, u64(d.mem[16:24])) d.v4 = round(d.v4, u64(d.mem[24:32])) b = b[c:] d.n = 0 } if len(b) >= 32 { // One or more full blocks left. nw := writeBlocks(d, b) b = b[nw:] } // Store any remaining partial block. copy(d.mem[:], b) d.n = len(b) return } // Sum appends the current hash to b and returns the resulting slice. func (d *Digest) Sum(b []byte) []byte { s := d.Sum64() return append( b, byte(s>>56), byte(s>>48), byte(s>>40), byte(s>>32), byte(s>>24), byte(s>>16), byte(s>>8), byte(s), ) } // Sum64 returns the current hash. func (d *Digest) Sum64() uint64 { var h uint64 if d.total >= 32 { v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4 h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) h = mergeRound(h, v1) h = mergeRound(h, v2) h = mergeRound(h, v3) h = mergeRound(h, v4) } else { h = d.v3 + prime5 } h += d.total b := d.mem[:d.n&(len(d.mem)-1)] for ; len(b) >= 8; b = b[8:] { k1 := round(0, u64(b[:8])) h ^= k1 h = rol27(h)*prime1 + prime4 } if len(b) >= 4 { h ^= uint64(u32(b[:4])) * prime1 h = rol23(h)*prime2 + prime3 b = b[4:] } for ; len(b) > 0; b = b[1:] { h ^= uint64(b[0]) * prime5 h = rol11(h) * prime1 } h ^= h >> 33 h *= prime2 h ^= h >> 29 h *= prime3 h ^= h >> 32 return h } const ( magic = "xxh\x06" marshaledSize = len(magic) + 8*5 + 32 ) // MarshalBinary implements the encoding.BinaryMarshaler interface. func (d *Digest) MarshalBinary() ([]byte, error) { b := make([]byte, 0, marshaledSize) b = append(b, magic...) b = appendUint64(b, d.v1) b = appendUint64(b, d.v2) b = appendUint64(b, d.v3) b = appendUint64(b, d.v4) b = appendUint64(b, d.total) b = append(b, d.mem[:d.n]...) b = b[:len(b)+len(d.mem)-d.n] return b, nil } // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface. func (d *Digest) UnmarshalBinary(b []byte) error { if len(b) < len(magic) || string(b[:len(magic)]) != magic { return errors.New("xxhash: invalid hash state identifier") } if len(b) != marshaledSize { return errors.New("xxhash: invalid hash state size") } b = b[len(magic):] b, d.v1 = consumeUint64(b) b, d.v2 = consumeUint64(b) b, d.v3 = consumeUint64(b) b, d.v4 = consumeUint64(b) b, d.total = consumeUint64(b) copy(d.mem[:], b) d.n = int(d.total % uint64(len(d.mem))) return nil } func appendUint64(b []byte, x uint64) []byte { var a [8]byte binary.LittleEndian.PutUint64(a[:], x) return append(b, a[:]...) } func consumeUint64(b []byte) ([]byte, uint64) { x := u64(b) return b[8:], x } func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) } func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) } func round(acc, input uint64) uint64 { acc += input * prime2 acc = rol31(acc) acc *= prime1 return acc } func mergeRound(acc, val uint64) uint64 { val = round(0, val) acc ^= val acc = acc*prime1 + prime4 return acc } func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) } func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) } func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) } func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) } func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) } func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) } func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) } func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) } xxhash-2.3.0/xxhash_amd64.s000066400000000000000000000067361460360331200155130ustar00rootroot00000000000000//go:build !appengine && gc && !purego // +build !appengine // +build gc // +build !purego #include "textflag.h" // Registers: #define h AX #define d AX #define p SI // pointer to advance through b #define n DX #define end BX // loop end #define v1 R8 #define v2 R9 #define v3 R10 #define v4 R11 #define x R12 #define prime1 R13 #define prime2 R14 #define prime4 DI #define round(acc, x) \ IMULQ prime2, x \ ADDQ x, acc \ ROLQ $31, acc \ IMULQ prime1, acc // round0 performs the operation x = round(0, x). #define round0(x) \ IMULQ prime2, x \ ROLQ $31, x \ IMULQ prime1, x // mergeRound applies a merge round on the two registers acc and x. // It assumes that prime1, prime2, and prime4 have been loaded. #define mergeRound(acc, x) \ round0(x) \ XORQ x, acc \ IMULQ prime1, acc \ ADDQ prime4, acc // blockLoop processes as many 32-byte blocks as possible, // updating v1, v2, v3, and v4. It assumes that there is at least one block // to process. #define blockLoop() \ loop: \ MOVQ +0(p), x \ round(v1, x) \ MOVQ +8(p), x \ round(v2, x) \ MOVQ +16(p), x \ round(v3, x) \ MOVQ +24(p), x \ round(v4, x) \ ADDQ $32, p \ CMPQ p, end \ JLE loop // func Sum64(b []byte) uint64 TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 // Load fixed primes. MOVQ ·primes+0(SB), prime1 MOVQ ·primes+8(SB), prime2 MOVQ ·primes+24(SB), prime4 // Load slice. MOVQ b_base+0(FP), p MOVQ b_len+8(FP), n LEAQ (p)(n*1), end // The first loop limit will be len(b)-32. SUBQ $32, end // Check whether we have at least one block. CMPQ n, $32 JLT noBlocks // Set up initial state (v1, v2, v3, v4). MOVQ prime1, v1 ADDQ prime2, v1 MOVQ prime2, v2 XORQ v3, v3 XORQ v4, v4 SUBQ prime1, v4 blockLoop() MOVQ v1, h ROLQ $1, h MOVQ v2, x ROLQ $7, x ADDQ x, h MOVQ v3, x ROLQ $12, x ADDQ x, h MOVQ v4, x ROLQ $18, x ADDQ x, h mergeRound(h, v1) mergeRound(h, v2) mergeRound(h, v3) mergeRound(h, v4) JMP afterBlocks noBlocks: MOVQ ·primes+32(SB), h afterBlocks: ADDQ n, h ADDQ $24, end CMPQ p, end JG try4 loop8: MOVQ (p), x ADDQ $8, p round0(x) XORQ x, h ROLQ $27, h IMULQ prime1, h ADDQ prime4, h CMPQ p, end JLE loop8 try4: ADDQ $4, end CMPQ p, end JG try1 MOVL (p), x ADDQ $4, p IMULQ prime1, x XORQ x, h ROLQ $23, h IMULQ prime2, h ADDQ ·primes+16(SB), h try1: ADDQ $4, end CMPQ p, end JGE finalize loop1: MOVBQZX (p), x ADDQ $1, p IMULQ ·primes+32(SB), x XORQ x, h ROLQ $11, h IMULQ prime1, h CMPQ p, end JL loop1 finalize: MOVQ h, x SHRQ $33, x XORQ x, h IMULQ prime2, h MOVQ h, x SHRQ $29, x XORQ x, h IMULQ ·primes+16(SB), h MOVQ h, x SHRQ $32, x XORQ x, h MOVQ h, ret+24(FP) RET // func writeBlocks(d *Digest, b []byte) int TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 // Load fixed primes needed for round. MOVQ ·primes+0(SB), prime1 MOVQ ·primes+8(SB), prime2 // Load slice. MOVQ b_base+8(FP), p MOVQ b_len+16(FP), n LEAQ (p)(n*1), end SUBQ $32, end // Load vN from d. MOVQ s+0(FP), d MOVQ 0(d), v1 MOVQ 8(d), v2 MOVQ 16(d), v3 MOVQ 24(d), v4 // We don't need to check the loop condition here; this function is // always called with at least one block of data to process. blockLoop() // Copy vN back to d. MOVQ v1, 0(d) MOVQ v2, 8(d) MOVQ v3, 16(d) MOVQ v4, 24(d) // The number of bytes written is p minus the old base pointer. SUBQ b_base+8(FP), p MOVQ p, ret+32(FP) RET xxhash-2.3.0/xxhash_arm64.s000066400000000000000000000064301460360331200155200ustar00rootroot00000000000000//go:build !appengine && gc && !purego // +build !appengine // +build gc // +build !purego #include "textflag.h" // Registers: #define digest R1 #define h R2 // return value #define p R3 // input pointer #define n R4 // input length #define nblocks R5 // n / 32 #define prime1 R7 #define prime2 R8 #define prime3 R9 #define prime4 R10 #define prime5 R11 #define v1 R12 #define v2 R13 #define v3 R14 #define v4 R15 #define x1 R20 #define x2 R21 #define x3 R22 #define x4 R23 #define round(acc, x) \ MADD prime2, acc, x, acc \ ROR $64-31, acc \ MUL prime1, acc // round0 performs the operation x = round(0, x). #define round0(x) \ MUL prime2, x \ ROR $64-31, x \ MUL prime1, x #define mergeRound(acc, x) \ round0(x) \ EOR x, acc \ MADD acc, prime4, prime1, acc // blockLoop processes as many 32-byte blocks as possible, // updating v1, v2, v3, and v4. It assumes that n >= 32. #define blockLoop() \ LSR $5, n, nblocks \ PCALIGN $16 \ loop: \ LDP.P 16(p), (x1, x2) \ LDP.P 16(p), (x3, x4) \ round(v1, x1) \ round(v2, x2) \ round(v3, x3) \ round(v4, x4) \ SUB $1, nblocks \ CBNZ nblocks, loop // func Sum64(b []byte) uint64 TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 LDP b_base+0(FP), (p, n) LDP ·primes+0(SB), (prime1, prime2) LDP ·primes+16(SB), (prime3, prime4) MOVD ·primes+32(SB), prime5 CMP $32, n CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 } BLT afterLoop ADD prime1, prime2, v1 MOVD prime2, v2 MOVD $0, v3 NEG prime1, v4 blockLoop() ROR $64-1, v1, x1 ROR $64-7, v2, x2 ADD x1, x2 ROR $64-12, v3, x3 ROR $64-18, v4, x4 ADD x3, x4 ADD x2, x4, h mergeRound(h, v1) mergeRound(h, v2) mergeRound(h, v3) mergeRound(h, v4) afterLoop: ADD n, h TBZ $4, n, try8 LDP.P 16(p), (x1, x2) round0(x1) // NOTE: here and below, sequencing the EOR after the ROR (using a // rotated register) is worth a small but measurable speedup for small // inputs. ROR $64-27, h EOR x1 @> 64-27, h, h MADD h, prime4, prime1, h round0(x2) ROR $64-27, h EOR x2 @> 64-27, h, h MADD h, prime4, prime1, h try8: TBZ $3, n, try4 MOVD.P 8(p), x1 round0(x1) ROR $64-27, h EOR x1 @> 64-27, h, h MADD h, prime4, prime1, h try4: TBZ $2, n, try2 MOVWU.P 4(p), x2 MUL prime1, x2 ROR $64-23, h EOR x2 @> 64-23, h, h MADD h, prime3, prime2, h try2: TBZ $1, n, try1 MOVHU.P 2(p), x3 AND $255, x3, x1 LSR $8, x3, x2 MUL prime5, x1 ROR $64-11, h EOR x1 @> 64-11, h, h MUL prime1, h MUL prime5, x2 ROR $64-11, h EOR x2 @> 64-11, h, h MUL prime1, h try1: TBZ $0, n, finalize MOVBU (p), x4 MUL prime5, x4 ROR $64-11, h EOR x4 @> 64-11, h, h MUL prime1, h finalize: EOR h >> 33, h MUL prime2, h EOR h >> 29, h MUL prime3, h EOR h >> 32, h MOVD h, ret+24(FP) RET // func writeBlocks(d *Digest, b []byte) int TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 LDP ·primes+0(SB), (prime1, prime2) // Load state. Assume v[1-4] are stored contiguously. MOVD d+0(FP), digest LDP 0(digest), (v1, v2) LDP 16(digest), (v3, v4) LDP b_base+8(FP), (p, n) blockLoop() // Store updated state. STP (v1, v2), 0(digest) STP (v3, v4), 16(digest) BIC $31, n MOVD n, ret+32(FP) RET xxhash-2.3.0/xxhash_asm.go000066400000000000000000000004761460360331200155160ustar00rootroot00000000000000//go:build (amd64 || arm64) && !appengine && gc && !purego // +build amd64 arm64 // +build !appengine // +build gc // +build !purego package xxhash // Sum64 computes the 64-bit xxHash digest of b with a zero seed. // //go:noescape func Sum64(b []byte) uint64 //go:noescape func writeBlocks(d *Digest, b []byte) int xxhash-2.3.0/xxhash_other.go000066400000000000000000000031231460360331200160470ustar00rootroot00000000000000//go:build (!amd64 && !arm64) || appengine || !gc || purego // +build !amd64,!arm64 appengine !gc purego package xxhash // Sum64 computes the 64-bit xxHash digest of b with a zero seed. func Sum64(b []byte) uint64 { // A simpler version would be // d := New() // d.Write(b) // return d.Sum64() // but this is faster, particularly for small inputs. n := len(b) var h uint64 if n >= 32 { v1 := primes[0] + prime2 v2 := prime2 v3 := uint64(0) v4 := -primes[0] for len(b) >= 32 { v1 = round(v1, u64(b[0:8:len(b)])) v2 = round(v2, u64(b[8:16:len(b)])) v3 = round(v3, u64(b[16:24:len(b)])) v4 = round(v4, u64(b[24:32:len(b)])) b = b[32:len(b):len(b)] } h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) h = mergeRound(h, v1) h = mergeRound(h, v2) h = mergeRound(h, v3) h = mergeRound(h, v4) } else { h = prime5 } h += uint64(n) for ; len(b) >= 8; b = b[8:] { k1 := round(0, u64(b[:8])) h ^= k1 h = rol27(h)*prime1 + prime4 } if len(b) >= 4 { h ^= uint64(u32(b[:4])) * prime1 h = rol23(h)*prime2 + prime3 b = b[4:] } for ; len(b) > 0; b = b[1:] { h ^= uint64(b[0]) * prime5 h = rol11(h) * prime1 } h ^= h >> 33 h *= prime2 h ^= h >> 29 h *= prime3 h ^= h >> 32 return h } func writeBlocks(d *Digest, b []byte) int { v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4 n := len(b) for len(b) >= 32 { v1 = round(v1, u64(b[0:8:len(b)])) v2 = round(v2, u64(b[8:16:len(b)])) v3 = round(v3, u64(b[16:24:len(b)])) v4 = round(v4, u64(b[24:32:len(b)])) b = b[32:len(b):len(b)] } d.v1, d.v2, d.v3, d.v4 = v1, v2, v3, v4 return n - len(b) } xxhash-2.3.0/xxhash_safe.go000066400000000000000000000006561460360331200156540ustar00rootroot00000000000000//go:build appengine // +build appengine // This file contains the safe implementations of otherwise unsafe-using code. package xxhash // Sum64String computes the 64-bit xxHash digest of s with a zero seed. func Sum64String(s string) uint64 { return Sum64([]byte(s)) } // WriteString adds more data to d. It always returns len(s), nil. func (d *Digest) WriteString(s string) (n int, err error) { return d.Write([]byte(s)) } xxhash-2.3.0/xxhash_test.go000066400000000000000000000115441460360331200157130ustar00rootroot00000000000000package xxhash import ( "bytes" "encoding/binary" "fmt" "math" "strings" "testing" ) func TestAll(t *testing.T) { // Exactly 63 characters, which exercises all code paths. const s63 = "Call me Ishmael. Some years ago--never mind how long precisely-" for _, tt := range []struct { input string seed uint64 want uint64 }{ {"", 0, 0xef46db3751d8e999}, {"a", 0, 0xd24ec4f1a98c6e5b}, {"as", 0, 0x1c330fb2d66be179}, {"asd", 0, 0x631c37ce72a97393}, {"asdf", 0, 0x415872f599cea71e}, {s63, 0, 0x02a2e85470d6fd96}, {"", 123, 0xe0db84de91f3e198}, {"asdf", math.MaxUint64, 0x9a2fd8473be539b6}, {s63, 54321, 0x1736d186daf5d1cd}, } { lastChunkSize := len(tt.input) if lastChunkSize == 0 { lastChunkSize = 1 } var name string if tt.input == "" { name = "input=empty" } else if len(tt.input) > 10 { name = fmt.Sprintf("input=len-%d", len(tt.input)) } else { name = fmt.Sprintf("input=%q", tt.input) } if tt.seed != 0 { name += fmt.Sprintf(",seed=%d", tt.seed) } for chunkSize := 1; chunkSize <= lastChunkSize; chunkSize++ { name := fmt.Sprintf("%s,chunkSize=%d", name, chunkSize) t.Run(name, func(t *testing.T) { testDigest(t, tt.input, tt.seed, chunkSize, tt.want) }) } if tt.seed == 0 { t.Run(name, func(t *testing.T) { testSum(t, tt.input, tt.want) }) } } } func testDigest(t *testing.T, input string, seed uint64, chunkSize int, want uint64) { d := NewWithSeed(seed) ds := NewWithSeed(seed) // uses WriteString for i := 0; i < len(input); i += chunkSize { chunk := input[i:] if len(chunk) > chunkSize { chunk = chunk[:chunkSize] } n, err := d.Write([]byte(chunk)) if err != nil || n != len(chunk) { t.Fatalf("Digest.Write: got (%d, %v); want (%d, nil)", n, err, len(chunk)) } n, err = ds.WriteString(chunk) if err != nil || n != len(chunk) { t.Fatalf("Digest.WriteString: got (%d, %v); want (%d, nil)", n, err, len(chunk)) } } if got := d.Sum64(); got != want { t.Fatalf("Digest.Sum64: got 0x%x; want 0x%x", got, want) } if got := ds.Sum64(); got != want { t.Fatalf("Digest.Sum64 (WriteString): got 0x%x; want 0x%x", got, want) } var b [8]byte binary.BigEndian.PutUint64(b[:], want) if got := d.Sum(nil); !bytes.Equal(got, b[:]) { t.Fatalf("Sum: got %v; want %v", got, b[:]) } } func testSum(t *testing.T, input string, want uint64) { if got := Sum64([]byte(input)); got != want { t.Fatalf("Sum64: got 0x%x; want 0x%x", got, want) } if got := Sum64String(input); got != want { t.Fatalf("Sum64String: got 0x%x; want 0x%x", got, want) } } func TestReset(t *testing.T) { parts := []string{"The quic", "k br", "o", "wn fox jumps", " ov", "er the lazy ", "dog."} d := New() for _, part := range parts { d.Write([]byte(part)) } h0 := d.Sum64() d.Reset() d.Write([]byte(strings.Join(parts, ""))) h1 := d.Sum64() if h0 != h1 { t.Errorf("0x%x != 0x%x", h0, h1) } } func TestResetWithSeed(t *testing.T) { parts := []string{"The quic", "k br", "o", "wn fox jumps", " ov", "er the lazy ", "dog."} d := NewWithSeed(123) for _, part := range parts { d.Write([]byte(part)) } h0 := d.Sum64() d.ResetWithSeed(123) d.Write([]byte(strings.Join(parts, ""))) h1 := d.Sum64() if h0 != h1 { t.Errorf("0x%x != 0x%x", h0, h1) } } func TestBinaryMarshaling(t *testing.T) { d := New() d.WriteString("abc") b, err := d.MarshalBinary() if err != nil { t.Fatal(err) } d = New() d.WriteString("junk") if err := d.UnmarshalBinary(b); err != nil { t.Fatal(err) } d.WriteString("def") if got, want := d.Sum64(), Sum64String("abcdef"); got != want { t.Fatalf("after MarshalBinary+UnmarshalBinary, got 0x%x; want 0x%x", got, want) } d0 := New() d1 := New() for i := 0; i < 64; i++ { b, err := d0.MarshalBinary() if err != nil { t.Fatal(err) } d0 = new(Digest) if err := d0.UnmarshalBinary(b); err != nil { t.Fatal(err) } if got, want := d0.Sum64(), d1.Sum64(); got != want { t.Fatalf("after %d Writes, unmarshaled Digest gave sum 0x%x; want 0x%x", i, got, want) } d0.Write([]byte{'a'}) d1.Write([]byte{'a'}) } } var sink uint64 func TestAllocs(t *testing.T) { const shortStr = "abcdefghijklmnop" // Sum64([]byte(shortString)) shouldn't allocate because the // intermediate []byte ought not to escape. // (See https://github.com/cespare/xxhash/pull/2.) t.Run("Sum64", func(t *testing.T) { testAllocs(t, func() { sink = Sum64([]byte(shortStr)) }) }) // Creating and using a Digest shouldn't allocate because its methods // shouldn't make it escape. (A previous version of New returned a // hash.Hash64 which forces an allocation.) t.Run("Digest", func(t *testing.T) { b := []byte("asdf") testAllocs(t, func() { d := New() d.Write(b) sink = d.Sum64() }) }) } func testAllocs(t *testing.T, fn func()) { t.Helper() if allocs := int(testing.AllocsPerRun(10, fn)); allocs > 0 { t.Fatalf("got %d allocation(s) (want zero)", allocs) } } xxhash-2.3.0/xxhash_unsafe.go000066400000000000000000000040571460360331200162160ustar00rootroot00000000000000//go:build !appengine // +build !appengine // This file encapsulates usage of unsafe. // xxhash_safe.go contains the safe implementations. package xxhash import ( "unsafe" ) // In the future it's possible that compiler optimizations will make these // XxxString functions unnecessary by realizing that calls such as // Sum64([]byte(s)) don't need to copy s. See https://go.dev/issue/2205. // If that happens, even if we keep these functions they can be replaced with // the trivial safe code. // NOTE: The usual way of doing an unsafe string-to-[]byte conversion is: // // var b []byte // bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) // bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data // bh.Len = len(s) // bh.Cap = len(s) // // Unfortunately, as of Go 1.15.3 the inliner's cost model assigns a high enough // weight to this sequence of expressions that any function that uses it will // not be inlined. Instead, the functions below use a different unsafe // conversion designed to minimize the inliner weight and allow both to be // inlined. There is also a test (TestInlining) which verifies that these are // inlined. // // See https://github.com/golang/go/issues/42739 for discussion. // Sum64String computes the 64-bit xxHash digest of s with a zero seed. // It may be faster than Sum64([]byte(s)) by avoiding a copy. func Sum64String(s string) uint64 { b := *(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)})) return Sum64(b) } // WriteString adds more data to d. It always returns len(s), nil. // It may be faster than Write([]byte(s)) by avoiding a copy. func (d *Digest) WriteString(s string) (n int, err error) { d.Write(*(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)}))) // d.Write always returns len(s), nil. // Ignoring the return output and returning these fixed values buys a // savings of 6 in the inliner's cost model. return len(s), nil } // sliceHeader is similar to reflect.SliceHeader, but it assumes that the layout // of the first two words is the same as the layout of a string. type sliceHeader struct { s string cap int } xxhash-2.3.0/xxhash_unsafe_test.go000066400000000000000000000023301460360331200172450ustar00rootroot00000000000000//go:build !appengine // +build !appengine package xxhash import ( "os/exec" "sort" "strings" "testing" ) func TestStringAllocs(t *testing.T) { longStr := strings.Repeat("a", 1000) t.Run("Sum64String", func(t *testing.T) { testAllocs(t, func() { sink = Sum64String(longStr) }) }) t.Run("Digest.WriteString", func(t *testing.T) { testAllocs(t, func() { d := New() d.WriteString(longStr) sink = d.Sum64() }) }) } // This test is inspired by the Go runtime tests in https://go.dev/cl/57410. // It asserts that certain important functions may be inlined. func TestInlining(t *testing.T) { funcs := map[string]struct{}{ "Sum64String": {}, "(*Digest).WriteString": {}, } cmd := exec.Command("go", "test", "-gcflags=-m", "-run", "xxxx") out, err := cmd.CombinedOutput() if err != nil { t.Log(string(out)) t.Fatal(err) } for _, line := range strings.Split(string(out), "\n") { parts := strings.Split(line, ": can inline") if len(parts) < 2 { continue } delete(funcs, strings.TrimSpace(parts[1])) } var failed []string for fn := range funcs { failed = append(failed, fn) } sort.Strings(failed) for _, fn := range failed { t.Errorf("function %s not inlined", fn) } } xxhash-2.3.0/xxhashbench/000077500000000000000000000000001460360331200153205ustar00rootroot00000000000000xxhash-2.3.0/xxhashbench/go.mod000066400000000000000000000003711460360331200164270ustar00rootroot00000000000000module github.com/cespare/xxhash/xxhashbench go 1.13 require ( github.com/OneOfOne/xxhash v1.2.5 github.com/cespare/xxhash/v2 v2.0.0-00010101000000-000000000000 github.com/spaolacci/murmur3 v1.1.0 ) replace github.com/cespare/xxhash/v2 => ../ xxhash-2.3.0/xxhashbench/go.sum000066400000000000000000000010521460360331200164510ustar00rootroot00000000000000github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI= github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= xxhash-2.3.0/xxhashbench/xxhashbench_test.go000066400000000000000000000072521460360331200212170ustar00rootroot00000000000000package xxhashbench import ( "fmt" "hash/crc32" "hash/fnv" "testing" OneOfOne "github.com/OneOfOne/xxhash" "github.com/cespare/xxhash/v2" "github.com/spaolacci/murmur3" ) // TODO: The main benchmarks live in the xxhash package now, so the only purpose // of this is to compare different hash functions. Consider deleting xxhashbench // or replacing it with a more minimal comparison. var sink uint64 var benchmarks = []struct { name string directBytes func([]byte) uint64 directString func(string) uint64 digestBytes func([]byte) uint64 digestString func(string) uint64 }{ { name: "xxhash", directBytes: xxhash.Sum64, directString: xxhash.Sum64String, digestBytes: func(b []byte) uint64 { h := xxhash.New() h.Write(b) return h.Sum64() }, digestString: func(s string) uint64 { h := xxhash.New() h.WriteString(s) return h.Sum64() }, }, { name: "OneOfOne", directBytes: OneOfOne.Checksum64, directString: OneOfOne.ChecksumString64, digestBytes: func(b []byte) uint64 { h := OneOfOne.New64() h.Write(b) return h.Sum64() }, digestString: func(s string) uint64 { h := OneOfOne.New64() h.WriteString(s) return h.Sum64() }, }, { name: "murmur3", directBytes: murmur3.Sum64, directString: func(s string) uint64 { return murmur3.Sum64([]byte(s)) }, digestBytes: func(b []byte) uint64 { h := murmur3.New64() h.Write(b) return h.Sum64() }, digestString: func(s string) uint64 { h := murmur3.New64() h.Write([]byte(s)) return h.Sum64() }, }, { name: "CRC-32", directBytes: func(b []byte) uint64 { return uint64(crc32.ChecksumIEEE(b)) }, directString: func(s string) uint64 { return uint64(crc32.ChecksumIEEE([]byte(s))) }, digestBytes: func(b []byte) uint64 { h := crc32.NewIEEE() h.Write(b) return uint64(h.Sum32()) }, digestString: func(s string) uint64 { h := crc32.NewIEEE() h.Write([]byte(s)) return uint64(h.Sum32()) }, }, { name: "FNV-1a", digestBytes: func(b []byte) uint64 { h := fnv.New64() h.Write(b) return h.Sum64() }, digestString: func(s string) uint64 { h := fnv.New64a() h.Write([]byte(s)) return h.Sum64() }, }, } func BenchmarkHashes(b *testing.B) { for _, bb := range benchmarks { for _, benchSize := range []struct { name string n int }{ {"5B", 5}, {"100B", 100}, {"4KB", 4e3}, {"10MB", 10e6}, } { input := make([]byte, benchSize.n) for i := range input { input[i] = byte(i) } inputString := string(input) if bb.directBytes != nil { name := fmt.Sprintf("%s,direct,bytes,n=%s", bb.name, benchSize.name) b.Run(name, func(b *testing.B) { benchmarkHashBytes(b, input, bb.directBytes) }) } if bb.directString != nil { name := fmt.Sprintf("%s,direct,string,n=%s", bb.name, benchSize.name) b.Run(name, func(b *testing.B) { benchmarkHashString(b, inputString, bb.directString) }) } if bb.digestBytes != nil { name := fmt.Sprintf("%s,digest,bytes,n=%s", bb.name, benchSize.name) b.Run(name, func(b *testing.B) { benchmarkHashBytes(b, input, bb.digestBytes) }) } if bb.digestString != nil { name := fmt.Sprintf("%s,digest,string,n=%s", bb.name, benchSize.name) b.Run(name, func(b *testing.B) { benchmarkHashString(b, inputString, bb.digestString) }) } } } } func benchmarkHashBytes(b *testing.B, input []byte, fn func([]byte) uint64) { b.SetBytes(int64(len(input))) for i := 0; i < b.N; i++ { sink = fn(input) } } func benchmarkHashString(b *testing.B, input string, fn func(string) uint64) { b.SetBytes(int64(len(input))) for i := 0; i < b.N; i++ { sink = fn(input) } } xxhash-2.3.0/xxhsum/000077500000000000000000000000001460360331200143515ustar00rootroot00000000000000xxhash-2.3.0/xxhsum/.gitignore000066400000000000000000000000101460360331200163300ustar00rootroot00000000000000/xxhsum xxhash-2.3.0/xxhsum/xxhsum.go000066400000000000000000000015451460360331200162410ustar00rootroot00000000000000package main import ( "fmt" "io" "os" "github.com/cespare/xxhash/v2" ) func main() { if contains(os.Args[1:], "-h") { fmt.Fprintf(os.Stderr, `Usage: %s [filenames] If no filenames are provided or only - is given, input is read from stdin. `, os.Args[0]) os.Exit(1) } if len(os.Args) < 2 || len(os.Args) == 2 && os.Args[1] == "-" { printHash(os.Stdin, "-") return } for _, path := range os.Args[1:] { f, err := os.Open(path) if err != nil { fmt.Fprintln(os.Stderr, err) continue } printHash(f, path) f.Close() } } func contains(ss []string, s string) bool { for _, s1 := range ss { if s1 == s { return true } } return false } func printHash(r io.Reader, name string) { h := xxhash.New() if _, err := io.Copy(h, r); err != nil { fmt.Fprintln(os.Stderr, err) return } fmt.Printf("%016x %s\n", h.Sum64(), name) }