pax_global_header00006660000000000000000000000064133453143150014514gustar00rootroot0000000000000052 comment=62ed8f5e2ddfe7db3ff0f2f51d72e66c3cc3da99 rollinghash-4.0.0/000077500000000000000000000000001334531431500140275ustar00rootroot00000000000000rollinghash-4.0.0/.travis.yml000066400000000000000000000003511334531431500161370ustar00rootroot00000000000000language: go go: - "1.10" - "1.9" - "1.8" - "1.7" before_install: - go get github.com/mattn/goveralls script: - go test -v ./... - go test -bench=. ./... - $HOME/gopath/bin/goveralls -package=./... -service=travis-ci rollinghash-4.0.0/LICENSE.txt000066400000000000000000000020511334531431500156500ustar00rootroot00000000000000Copyright 2015 Christophe-Marie Duquesne Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. rollinghash-4.0.0/README.md000066400000000000000000000076451334531431500153220ustar00rootroot00000000000000[![Build Status](https://travis-ci.org/chmduquesne/rollinghash.svg?branch=master)](https://travis-ci.org/chmduquesne/rollinghash) [![Coverage Status](https://coveralls.io/repos/github/chmduquesne/rollinghash/badge.svg?branch=master)](https://coveralls.io/github/chmduquesne/rollinghash?branch=master) [![GoDoc Reference](http://godoc.org/github.com/chmduquesne/rollinghash?status.svg)](https://godoc.org/github.com/chmduquesne/rollinghash) ![Go 1.7+](https://img.shields.io/badge/go-1.7%2B-orange.svg) Rolling Hashes ============== Philosophy ---------- This package contains several various rolling hashes for you to play with crazy ideas. The API design philosophy is to stick as closely as possible to the interface provided by the builtin hash package (the hashes implemented here are effectively drop-in replacements for their builtin counterparts), while providing simultaneously the highest speed and simplicity. Usage ----- A [`rollinghash.Hash`](https://godoc.org/github.com/chmduquesne/rollinghash#Hash) is just a [`hash.Hash`](https://golang.org/pkg/hash/#Hash) which implements the [`Roller`](https://godoc.org/github.com/chmduquesne/rollinghash#Roller) interface. Here is how it is typically used: ```golang data := []byte("here is some data to roll on") h := buzhash64.New() n := 16 // Initialize the rolling window h.Write(data[:n]) for _, c := range(data[n:]) { // Slide the window and update the hash h.Roll(c) // Get the updated hash value fmt.Println(h.Sum64()) } ``` Gotchas ------- The rolling window MUST be initialized by calling `Write` first (which saves a copy). The byte leaving the rolling window is inferred from the internal copy of the rolling window, which is updated with every call to `Roll`. If you want your code to run at the highest speed, do NOT cast the result of a `New()` as a rollinghash.Hash. Instead, use the native type returned by `New()`. This is because the go compiler cannot inline calls from an interface. When later you call Roll(), the native type call will be inlined by the compiler, but not the casted type call. ```golang var h1 rollinghash.Hash h1 = buzhash32.New() h2 := buzhash32.New() [...] h1.Roll(b) // Not inlined (slow) h2.Roll(b) // inlined (fast) ``` What's new in v4 ---------------- In v4: * `Write` has become fully consistent with `hash.Hash`. As opposed to previous versions, where writing data would reinitialize the window, it now appends this data to the existing window. In order to reset the window, one should instead use the `Reset` method. * Calling `Roll` on an empty window is considered a bug, and now triggers a panic. Brief reminder of the behaviors in previous versions: * From v0.x.x to v2.x.x: `Roll` returns an error for an empty window. `Write` reinitializes the rolling window. * v3.x.x : `Roll` does not return anything. `Write` still reinitializes the rolling window. The rolling window always has a minimum size of 1, which yields wrong results when using roll before having initialized the window. Go versions ----------- The `RabinKarp64` rollinghash does not yield consistent results before go1.7. This is because it uses `Rand.Read()` from the builtin `math/rand`. This function was [fixed in go 1.7](https://golang.org/doc/go1.7#math_rand) to produce a consistent stream of bytes that is independant of the size of the input buffer. If you depend on this hash, it is strongly recommended to stick to versions of go superior to 1.7. License ------- This code is delivered to you under the terms of the MIT public license, except the `rabinkarp64` subpackage, which has been adapted from [restic](https://github.com/restic/chunker) (BSD 2-clause "Simplified"). Notable users ------------- * [syncthing](https://syncthing.net/), a decentralized synchronisation solution * [muscato](https://github.com/kshedden/muscato), a genome analysis tool If you are using this in production or for research, let me know and I will happily put a link here! rollinghash-4.0.0/adler32/000077500000000000000000000000001334531431500152635ustar00rootroot00000000000000rollinghash-4.0.0/adler32/adler32.go000066400000000000000000000055671334531431500170630ustar00rootroot00000000000000// Package rollinghash/adler32 implements a rolling version of hash/adler32 package adler32 import ( "hash" vanilla "hash/adler32" "github.com/chmduquesne/rollinghash" ) const ( Mod = 65521 Size = 4 ) // Adler32 is a digest which satisfies the rollinghash.Hash32 interface. // It implements the adler32 algorithm https://en.wikipedia.org/wiki/Adler-32 type Adler32 struct { a, b uint32 n uint32 // window is treated like a circular buffer, where the oldest element // is indicated by d.oldest window []byte oldest int vanilla hash.Hash32 } // Reset resets the digest to its initial state. func (d *Adler32) Reset() { d.window = d.window[:0] // Reset the size but don't reallocate d.oldest = 0 d.a = 1 d.b = 0 d.n = 0 d.vanilla.Reset() } // New returns a new Adler32 digest func New() *Adler32 { return &Adler32{ a: 1, b: 0, n: 0, window: make([]byte, 0, rollinghash.DefaultWindowCap), oldest: 0, vanilla: vanilla.New(), } } // Size is 4 bytes func (d *Adler32) Size() int { return Size } // BlockSize is 1 byte func (d *Adler32) BlockSize() int { return 1 } // Write appends data to the rolling window and updates the digest. func (d *Adler32) Write(data []byte) (int, error) { l := len(data) if l == 0 { return 0, nil } // Re-arrange the window so that the leftmost element is at index 0 n := len(d.window) if d.oldest != 0 { tmp := make([]byte, d.oldest) copy(tmp, d.window[:d.oldest]) copy(d.window, d.window[d.oldest:]) copy(d.window[n-d.oldest:], tmp) d.oldest = 0 } d.window = append(d.window, data...) // Piggy-back on the core implementation d.vanilla.Reset() d.vanilla.Write(d.window) s := d.vanilla.Sum32() d.a, d.b = s&0xffff, s>>16 d.n = uint32(len(d.window)) % Mod return len(data), nil } // Sum32 returns the hash as a uint32 func (d *Adler32) Sum32() uint32 { return d.b<<16 | d.a } // Sum returns the hash as a byte slice func (d *Adler32) Sum(b []byte) []byte { v := d.Sum32() return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) } // Roll updates the checksum of the window from the entering byte. You // MUST initialize a window with Write() before calling this method. func (d *Adler32) Roll(b byte) { // This check costs 10-15% performance. If we disable it, we crash // when the window is empty. If we enable it, we are always correct // (an empty window never changes no matter how much you roll it). //if len(d.window) == 0 { // return //} // extract the entering/leaving bytes and update the circular buffer. enter := uint32(b) leave := uint32(d.window[d.oldest]) d.window[d.oldest] = b d.oldest += 1 if d.oldest >= len(d.window) { d.oldest = 0 } // See http://stackoverflow.com/questions/40985080/why-does-my-rolling-adler32-checksum-not-work-in-go-modulo-arithmetic d.a = (d.a + Mod + enter - leave) % Mod d.b = (d.b + (d.n*leave/Mod+1)*Mod + d.a - (d.n * leave) - 1) % Mod } rollinghash-4.0.0/adler32/adler32_test.go000066400000000000000000000112651334531431500201120ustar00rootroot00000000000000package adler32_test import ( "bufio" "hash" "hash/adler32" "log" "os" "strings" "testing" "github.com/chmduquesne/rollinghash" rollsum "github.com/chmduquesne/rollinghash/adler32" ) // Stolen from hash/adler32 var golden = []struct { out uint32 in string }{ //{0x00000001, ""}, // panics {0x00620062, "a"}, {0x012600c4, "ab"}, {0x024d0127, "abc"}, {0x03d8018b, "abcd"}, {0x05c801f0, "abcde"}, {0x081e0256, "abcdef"}, {0x0adb02bd, "abcdefg"}, {0x0e000325, "abcdefgh"}, {0x118e038e, "abcdefghi"}, {0x158603f8, "abcdefghij"}, {0x3f090f02, "Discard medicine more than two years old."}, {0x46d81477, "He who has a shady past knows that nice guys finish last."}, {0x40ee0ee1, "I wouldn't marry him with a ten foot pole."}, {0x16661315, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, {0x5b2e1480, "The days of the digital watch are numbered. -Tom Stoppard"}, {0x8c3c09ea, "Nepal premier won't resign."}, {0x45ac18fd, "For every action there is an equal and opposite government program."}, {0x53c61462, "His money is twice tainted: 'taint yours and 'taint mine."}, {0x7e511e63, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, {0xe4801a6a, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, {0x61b507df, "size: a.out: bad magic"}, {0xb8631171, "The major problem is with sendmail. -Mark Horton"}, {0x8b5e1904, "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, {0x7cc6102b, "If the enemy is within range, then so are you."}, {0x700318e7, "It's well we cannot hear the screams/That we create in others' dreams."}, {0x1e601747, "You remind me of a TV show, but that's all right: I watch it anyway."}, {0xb55b0b09, "C is as portable as Stonehedge!!"}, {0x39111dd0, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, {0x91dd304f, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, {0x2e5d1316, "How can you write a big system without C++? -Paul Glick"}, {0xd0201df6, "'Invariant assertions' is the most elegant programming technique! -Tom Szymanski"}, {0x211297c8, strings.Repeat("\xff", 5548) + "8"}, {0xbaa198c8, strings.Repeat("\xff", 5549) + "9"}, {0x553499be, strings.Repeat("\xff", 5550) + "0"}, {0xf0c19abe, strings.Repeat("\xff", 5551) + "1"}, {0x8d5c9bbe, strings.Repeat("\xff", 5552) + "2"}, {0x2af69cbe, strings.Repeat("\xff", 5553) + "3"}, {0xc9809dbe, strings.Repeat("\xff", 5554) + "4"}, {0x69189ebe, strings.Repeat("\xff", 5555) + "5"}, {0x86af0001, strings.Repeat("\x00", 1e5)}, {0x79660b4d, strings.Repeat("a", 1e5)}, {0x110588ee, strings.Repeat("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e4)}, } // Prove that we implement hash.Hash32 var _ = hash.Hash32(rollsum.New()) // Prove that we implement rollinghash.Hash32 var _ = rollinghash.Hash32(rollsum.New()) // Sum32ByWriteAndRoll computes the sum by prepending the input slice with // a '\0', writing the first bytes of this slice into the sum, then // sliding on the last byte and returning the result of Sum32 func Sum32ByWriteAndRoll(b []byte) uint32 { q := []byte("\x00") q = append(q, b...) roll := rollsum.New() roll.Write(q[:len(q)-1]) roll.Roll(q[len(q)-1]) return roll.Sum32() } func TestGolden(t *testing.T) { for _, g := range golden { in := g.in // We test the classic implementation p := []byte(g.in) classic := hash.Hash32(adler32.New()) classic.Write(p) if got := classic.Sum32(); got != g.out { t.Errorf("classic implementation: for %q, expected 0x%x, got 0x%x", in, g.out, got) continue } if got := Sum32ByWriteAndRoll(p); got != g.out { t.Errorf("rolling implementation: for %q, expected 0x%x, got 0x%x", in, g.out, got) continue } } } func BenchmarkRolling64B(b *testing.B) { b.SetBytes(1024) b.ReportAllocs() window := make([]byte, 64) for i := range window { window[i] = byte(i) } h := rollsum.New() in := make([]byte, 0, h.Size()) h.Write(window) b.ResetTimer() for i := 0; i < b.N; i++ { h.Roll(byte(i)) h.Sum(in) } } func BenchmarkReadUrandom(b *testing.B) { b.SetBytes(1024) b.ReportAllocs() f, err := os.Open("/dev/urandom") if err != nil { b.Errorf("Could not open /dev/urandom") } defer func() { if err := f.Close(); err != nil { log.Fatal(err) } }() r := bufio.NewReader(f) ws := 64 window := make([]byte, ws) n, err := r.Read(window) if n != ws || err != nil { b.Errorf("Could not read %d bytes", ws) } h := rollsum.New() in := make([]byte, 0, h.Size()) h.Write(window) b.ResetTimer() for i := 0; i < b.N; i++ { c, err := r.ReadByte() if err != nil { b.Errorf("%s", err) } h.Roll(c) h.Sum(in) } } rollinghash-4.0.0/adler32/doc_test.go000066400000000000000000000037251334531431500174250ustar00rootroot00000000000000package adler32_test import ( "fmt" "hash" "hash/adler32" "log" _adler32 "github.com/chmduquesne/rollinghash/adler32" ) func Example() { s := []byte("The quick brown fox jumps over the lazy dog") classic := hash.Hash32(adler32.New()) rolling := _adler32.New() // Window len n := 16 // You MUST load an initial window into the rolling hash before being // able to roll bytes rolling.Write(s[:n]) // Roll it and compare the result with full re-calculus every time for i := n; i < len(s); i++ { // Reset and write the window in classic classic.Reset() classic.Write(s[i-n+1 : i+1]) // Roll the incoming byte in rolling rolling.Roll(s[i]) fmt.Printf("%v: checksum %x\n", string(s[i-n+1:i+1]), rolling.Sum32()) // Compare the hashes if classic.Sum32() != rolling.Sum32() { log.Fatalf("%v: expected %x, got %x", s[i-n+1:i+1], classic.Sum32(), rolling.Sum32()) } } // Output: // he quick brown f: checksum 31e905d9 // e quick brown fo: checksum 314805e0 // quick brown fox: checksum 30ea05f3 // quick brown fox : checksum 34dc05f3 // uick brown fox j: checksum 33b705ec // ick brown fox ju: checksum 325205ec // ck brown fox jum: checksum 31b105f0 // k brown fox jump: checksum 317d05fd // brown fox jumps: checksum 30d10605 // brown fox jumps : checksum 34d50605 // rown fox jumps o: checksum 34c60612 // own fox jumps ov: checksum 33bb0616 // wn fox jumps ove: checksum 32d6060c // n fox jumps over: checksum 316c0607 // fox jumps over : checksum 304405b9 // fox jumps over t: checksum 3450060d // ox jumps over th: checksum 33fe060f // x jumps over the: checksum 33120605 // jumps over the : checksum 313e05ad // jumps over the l: checksum 353605f9 // umps over the la: checksum 348505f0 // mps over the laz: checksum 332905f5 // ps over the lazy: checksum 32590601 // s over the lazy : checksum 310905b1 // over the lazy d: checksum 2f7a05a2 // over the lazy do: checksum 336a05f1 // ver the lazy dog: checksum 326205e9 } rollinghash-4.0.0/bozo32/000077500000000000000000000000001334531431500151455ustar00rootroot00000000000000rollinghash-4.0.0/bozo32/bozo32.go000066400000000000000000000055031334531431500166150ustar00rootroot00000000000000// Package rollinghash/bozo32 is a wrong implementation of the rabinkarp // checksum. In practice, it works very well and exhibits all the // properties wanted from a rolling checksum, so after realising that this // code did not implement the rabinkarp checksum as described in the // original paper, it was renamed from rabinkarp32 to bozo32 and kept // in this package. package bozo32 import rollinghash "github.com/chmduquesne/rollinghash" // The size of the checksum. const Size = 4 // Bozo32 is a digest which satisfies the rollinghash.Hash32 interface. type Bozo32 struct { a uint32 aⁿ uint32 value uint32 // window is treated like a circular buffer, where the oldest element // is indicated by d.oldest window []byte oldest int } // Reset resets the Hash to its initial state. func (d *Bozo32) Reset() { d.value = 0 d.aⁿ = 1 d.oldest = 0 d.window = d.window[:0] } func NewFromInt(a uint32) *Bozo32 { return &Bozo32{ a: a, value: 0, aⁿ: 1, window: make([]byte, 0, rollinghash.DefaultWindowCap), oldest: 0, } } func New() *Bozo32 { return NewFromInt(65521) // largest prime fitting in 16 bits } // Size is 4 bytes func (d *Bozo32) Size() int { return Size } // BlockSize is 1 byte func (d *Bozo32) BlockSize() int { return 1 } // Write appends data to the rolling window and updates the digest. It // never returns an error. func (d *Bozo32) Write(data []byte) (int, error) { l := len(data) if l == 0 { return 0, nil } // Re-arrange the window so that the leftmost element is at index 0 n := len(d.window) if d.oldest != 0 { tmp := make([]byte, d.oldest) copy(tmp, d.window[:d.oldest]) copy(d.window, d.window[d.oldest:]) copy(d.window[n-d.oldest:], tmp) d.oldest = 0 } d.window = append(d.window, data...) d.value = 0 d.aⁿ = 1 for _, c := range d.window { d.value *= d.a d.value += uint32(c) d.aⁿ *= d.a } return len(d.window), nil } // Sum32 returns the hash as a uint32 func (d *Bozo32) Sum32() uint32 { return d.value } // Sum returns the hash as byte slice func (d *Bozo32) Sum(b []byte) []byte { v := d.Sum32() return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) } // Roll updates the checksum of the window from the entering byte. You // MUST initialize a window with Write() before calling this method. func (d *Bozo32) Roll(c byte) { // This check costs 10-15% performance. If we disable it, we crash // when the window is empty. If we enable it, we are always correct // (an empty window never changes no matter how much you roll it). //if len(d.window) == 0 { // return //} // extract the entering/leaving bytes and update the circular buffer. enter := uint32(c) leave := uint32(d.window[d.oldest]) d.window[d.oldest] = c l := len(d.window) d.oldest += 1 if d.oldest >= l { d.oldest = 0 } d.value = d.value*d.a + enter - leave*d.aⁿ } rollinghash-4.0.0/bozo32/bozo32_test.go000066400000000000000000000111471334531431500176550ustar00rootroot00000000000000package bozo32_test import ( "bufio" "hash" "log" "os" "strings" "testing" "github.com/chmduquesne/rollinghash" rollsum "github.com/chmduquesne/rollinghash/bozo32" ) var golden = []struct { out uint32 in string }{ //{0x0, ""}, // panics {0x61, "a"}, {0x60fab3, "ab"}, {0xf5044fe6, "abc"}, {0xf4a551ea, "abcd"}, {0xfc3a33af, "abcde"}, {0x6c45f925, "abcdef"}, {0xa10b673c, "abcdefg"}, {0xf790f3e4, "abcdefgh"}, {0x7265b60d, "abcdefghi"}, {0x21755a7, "abcdefghij"}, {0xc0e67701, "Discard medicine more than two years old."}, {0xb62ddac6, "He who has a shady past knows that nice guys finish last."}, {0x29941b90, "I wouldn't marry him with a ten foot pole."}, {0xbdfa9c64, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, {0x3973640f, "The days of the digital watch are numbered. -Tom Stoppard"}, {0x2caf4c69, "Nepal premier won't resign."}, {0x4370a2fc, "For every action there is an equal and opposite government program."}, {0x105c181, "His money is twice tainted: 'taint yours and 'taint mine."}, {0xf636f6a2, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, {0xd53ee79, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, {0xfa8b9ee, "size: a.out: bad magic"}, {0xbf79f440, "The major problem is with sendmail. -Mark Horton"}, {0x9da762a3, "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, {0x658aa63a, "If the enemy is within range, then so are you."}, {0xfa49ca46, "It's well we cannot hear the screams/That we create in others' dreams."}, {0x419a8bb6, "You remind me of a TV show, but that's all right: I watch it anyway."}, {0x1d9b58d8, "C is as portable as Stonehedge!!"}, {0x9234f2df, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, {0x7e43d6de, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, {0xf2f16f5, "How can you write a big system without C++? -Paul Glick"}, {0xd9e43015, "'Invariant assertions' is the most elegant programming technique! -Tom Szymanski"}, {0x28ffc26c, strings.Repeat("\xff", 5548) + "8"}, {0x5c36903c, strings.Repeat("\xff", 5549) + "9"}, {0x29cf8112, strings.Repeat("\xff", 5550) + "0"}, {0xeb86402, strings.Repeat("\xff", 5551) + "1"}, {0x88021802, strings.Repeat("\xff", 5552) + "2"}, {0x20af8c12, strings.Repeat("\xff", 5553) + "3"}, {0xa294bf32, strings.Repeat("\xff", 5554) + "4"}, {0x3945c062, strings.Repeat("\xff", 5555) + "5"}, {0x0, strings.Repeat("\x00", 1e5)}, {0xc18a57a0, strings.Repeat("a", 1e5)}, {0xc7d4c4f0, strings.Repeat("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e4)}, } // Prove that we implement rollinghash.Hash32 var _ = rollinghash.Hash32(rollsum.New()) // Prove that we implement hash.Hash32 var _ = hash.Hash32(rollsum.New()) // Sum32ByWriteAndRoll computes the sum by prepending the input slice with // a '\0', writing the first bytes of this slice into the sum, then // sliding on the last byte and returning the result of Sum32 func Sum32ByWriteAndRoll(b []byte) uint32 { q := []byte("\x00") q = append(q, b...) roll := rollsum.New() roll.Write(q[:len(q)-1]) roll.Roll(q[len(q)-1]) return roll.Sum32() } func TestGolden(t *testing.T) { for _, g := range golden { in := g.in // We test the classic implementation p := []byte(g.in) classic := hash.Hash32(rollsum.New()) classic.Write(p) if got := classic.Sum32(); got != g.out { t.Errorf("classic implentation: for %q, expected 0x%x, got 0x%x", in, g.out, got) continue } if got := Sum32ByWriteAndRoll(p); got != g.out { t.Errorf("rolling implentation: for %q, expected 0x%x, got 0x%x", in, g.out, got) continue } } } func BenchmarkRolling64B(b *testing.B) { b.SetBytes(1024) b.ReportAllocs() window := make([]byte, 64) for i := range window { window[i] = byte(i) } h := rollsum.New() in := make([]byte, 0, h.Size()) h.Write(window) b.ResetTimer() for i := 0; i < b.N; i++ { h.Roll(byte(i)) h.Sum(in) } } func BenchmarkReadUrandom(b *testing.B) { b.SetBytes(1024) b.ReportAllocs() f, err := os.Open("/dev/urandom") if err != nil { b.Errorf("Could not open /dev/urandom") } defer func() { if err := f.Close(); err != nil { log.Fatal(err) } }() r := bufio.NewReader(f) ws := 64 window := make([]byte, ws) n, err := r.Read(window) if n != ws || err != nil { b.Errorf("Could not read %d bytes", ws) } h := rollsum.New() in := make([]byte, 0, h.Size()) h.Write(window) b.ResetTimer() for i := 0; i < b.N; i++ { c, err := r.ReadByte() if err != nil { b.Errorf("%s", err) } h.Roll(c) h.Sum(in) } } rollinghash-4.0.0/bozo32/doc_test.go000066400000000000000000000036641334531431500173110ustar00rootroot00000000000000package bozo32_test import ( "fmt" "hash" "log" "github.com/chmduquesne/rollinghash/bozo32" ) func Example() { s := []byte("The quick brown fox jumps over the lazy dog") classic := hash.Hash32(bozo32.New()) rolling := bozo32.New() // Window len n := 16 // You MUST load an initial window into the rolling hash before being // able to roll bytes rolling.Write(s[:n]) // Roll it and compare the result with full re-calculus every time for i := n; i < len(s); i++ { // Reset and write the window in classic classic.Reset() classic.Write(s[i-n+1 : i+1]) // Roll the incoming byte in rolling rolling.Roll(s[i]) fmt.Printf("%v: checksum %x\n", string(s[i-n+1:i+1]), rolling.Sum32()) // Compare the hashes if classic.Sum32() != rolling.Sum32() { log.Fatalf("%v: expected %x, got %x", s[i-n+1:i+1], classic.Sum32(), rolling.Sum32()) } } // Output: // he quick brown f: checksum 43ccedc8 // e quick brown fo: checksum 58edb94f // quick brown fox: checksum 24a53172 // quick brown fox : checksum 2a953a52 // uick brown fox j: checksum 68660e2b // ick brown fox ju: checksum a0dcc87b // ck brown fox jum: checksum a971cf // k brown fox jump: checksum 87384fec // brown fox jumps: checksum 8aaa9434 // brown fox jumps : checksum 930670f4 // rown fox jumps o: checksum b1f3d3c1 // own fox jumps ov: checksum 544099b5 // wn fox jumps ove: checksum d4d1655b // n fox jumps over: checksum 1fafbea6 // fox jumps over : checksum cd48b1f8 // fox jumps over t: checksum c986b2cc // ox jumps over th: checksum c6221c0e // x jumps over the: checksum aaf3c224 // jumps over the : checksum 316bd78c // jumps over the l: checksum 110b7f18 // umps over the la: checksum 6580478f // mps over the laz: checksum 5b76ba4 // ps over the lazy: checksum bedd0670 // s over the lazy : checksum 43588f20 // over the lazy d: checksum cbaf2811 // over the lazy do: checksum 579ec750 // ver the lazy dog: checksum cfe7b948 } rollinghash-4.0.0/buzhash32/000077500000000000000000000000001334531431500156405ustar00rootroot00000000000000rollinghash-4.0.0/buzhash32/buzhash32.go000066400000000000000000000070321334531431500200020ustar00rootroot00000000000000// Package rollinghash/buzhash implements buzhash as described by // https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial package buzhash32 import ( "math/rand" rollinghash "github.com/chmduquesne/rollinghash" ) var defaultHashes [256]uint32 func init() { defaultHashes = GenerateHashes(1) } // The size of the checksum. const Size = 4 // Buzhash32 is a digest which satisfies the rollinghash.Hash32 interface. // It implements the cyclic polynomial algorithm // https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial type Buzhash32 struct { sum uint32 nRotate uint nRotateComplement uint // redundant, but pre-computed to spare an operation // window is treated like a circular buffer, where the oldest element // is indicated by d.oldest window []byte oldest int bytehash [256]uint32 } // Reset resets the Hash to its initial state. func (d *Buzhash32) Reset() { d.window = d.window[:0] d.oldest = 0 d.sum = 0 } // GenerateHashes generates a list of hashes to use with buzhash func GenerateHashes(seed int64) (res [256]uint32) { random := rand.New(rand.NewSource(seed)) used := make(map[uint32]bool) for i, _ := range res { x := uint32(random.Int63()) for used[x] { x = uint32(random.Int63()) } used[x] = true res[i] = x } return res } // New returns a buzhash based on a list of hashes provided by a call to // GenerateHashes, seeded with the default value 1. func New() *Buzhash32 { return NewFromUint32Array(defaultHashes) } // NewFromUint32Array returns a buzhash based on the provided table uint32 values. func NewFromUint32Array(b [256]uint32) *Buzhash32 { return &Buzhash32{ sum: 0, window: make([]byte, 0, rollinghash.DefaultWindowCap), oldest: 0, bytehash: b, } } // Size is 4 bytes func (d *Buzhash32) Size() int { return Size } // BlockSize is 1 byte func (d *Buzhash32) BlockSize() int { return 1 } // Write appends data to the rolling window and updates the digest. func (d *Buzhash32) Write(data []byte) (int, error) { l := len(data) if l == 0 { return 0, nil } // Re-arrange the window so that the leftmost element is at index 0 n := len(d.window) if d.oldest != 0 { tmp := make([]byte, d.oldest) copy(tmp, d.window[:d.oldest]) copy(d.window, d.window[d.oldest:]) copy(d.window[n-d.oldest:], tmp) d.oldest = 0 } d.window = append(d.window, data...) d.sum = 0 for _, c := range d.window { d.sum = d.sum<<1 | d.sum>>31 d.sum ^= d.bytehash[int(c)] } d.nRotate = uint(len(d.window)) % 32 d.nRotateComplement = 32 - d.nRotate return len(d.window), nil } // Sum32 returns the hash as a uint32 func (d *Buzhash32) Sum32() uint32 { return d.sum } // Sum returns the hash as byte slice func (d *Buzhash32) Sum(b []byte) []byte { v := d.Sum32() return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) } // Roll updates the checksum of the window from the entering byte. You // MUST initialize a window with Write() before calling this method. func (d *Buzhash32) Roll(c byte) { // This check costs 10-15% performance. If we disable it, we crash // when the window is empty. If we enable it, we are always correct // (an empty window never changes no matter how much you roll it). //if len(d.window) == 0 { // return //} // extract the entering/leaving bytes and update the circular buffer. hn := d.bytehash[int(c)] h0 := d.bytehash[int(d.window[d.oldest])] d.window[d.oldest] = c l := len(d.window) d.oldest += 1 if d.oldest >= l { d.oldest = 0 } d.sum = (d.sum<<1 | d.sum>>31) ^ (h0<>d.nRotateComplement) ^ hn } rollinghash-4.0.0/buzhash32/buzhash32_test.go000066400000000000000000000111711334531431500210400ustar00rootroot00000000000000package buzhash32_test import ( "bufio" "hash" "log" "os" "strings" "testing" "github.com/chmduquesne/rollinghash" rollsum "github.com/chmduquesne/rollinghash/buzhash32" ) var golden = []struct { out uint32 in string }{ //{0x0, ""}, // panics {0x29ec300c, "a"}, {0xf188df2c, "ab"}, {0x44fce3ce, "abc"}, {0x77c42de5, "abcd"}, {0x573b093d, "abcde"}, {0x67c55693, "abcdef"}, {0x41d5a953, "abcdefg"}, {0x9aeab78c, "abcdefgh"}, {0x13411924, "abcdefghi"}, {0x4dc2d8f4, "abcdefghij"}, {0x99065f04, "Discard medicine more than two years old."}, {0x5a6c6c9a, "He who has a shady past knows that nice guys finish last."}, {0x51ac1bd0, "I wouldn't marry him with a ten foot pole."}, {0x62268af0, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, {0x704eb745, "The days of the digital watch are numbered. -Tom Stoppard"}, {0xd4a23048, "Nepal premier won't resign."}, {0x8eca545a, "For every action there is an equal and opposite government program."}, {0x3b87b0da, "His money is twice tainted: 'taint yours and 'taint mine."}, {0x4a1a9265, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, {0xd3cc3586, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, {0xd6ce8c5a, "size: a.out: bad magic"}, {0x47eaad99, "The major problem is with sendmail. -Mark Horton"}, {0x5ec2ffab, "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, {0x3a34b15, "If the enemy is within range, then so are you."}, {0x532df3e8, "It's well we cannot hear the screams/That we create in others' dreams."}, {0x7cbcf246, "You remind me of a TV show, but that's all right: I watch it anyway."}, {0xa653a57f, "C is as portable as Stonehedge!!"}, {0xaa4a402c, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, {0xbc881e9c, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, {0xfaffeaf5, "How can you write a big system without C++? -Paul Glick"}, {0x3775c6ce, "'Invariant assertions' is the most elegant programming technique! -Tom Szymanski"}, {0x92d904de, strings.Repeat("\xff", 5548) + "8"}, {0x280326bc, strings.Repeat("\xff", 5549) + "9"}, {0xc8c4ec97, strings.Repeat("\xff", 5550) + "0"}, {0x67e7441d, strings.Repeat("\xff", 5551) + "1"}, {0x95601eb, strings.Repeat("\xff", 5552) + "2"}, {0x37ce09c7, strings.Repeat("\xff", 5553) + "3"}, {0x64126a4b, strings.Repeat("\xff", 5554) + "4"}, {0x7a492c8e, strings.Repeat("\xff", 5555) + "5"}, {0xffffffff, strings.Repeat("\x00", 1e5)}, {0x0, strings.Repeat("a", 1e5)}, {0x55555555, strings.Repeat("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e4)}, } // Prove that we implement rollinghash.Hash32 var _ = rollinghash.Hash32(rollsum.New()) // Prove that we implement hash.Hash32 var _ = hash.Hash32(rollsum.New()) // Sum32ByWriteAndRoll computes the sum by prepending the input slice with // a '\0', writing the first bytes of this slice into the sum, then // sliding on the last byte and returning the result of Sum32 func Sum32ByWriteAndRoll(b []byte) uint32 { q := []byte("\x00") q = append(q, b...) roll := rollsum.New() roll.Write(q[:len(q)-1]) roll.Roll(q[len(q)-1]) return roll.Sum32() } func TestGolden(t *testing.T) { for _, g := range golden { in := g.in // We test the classic implementation p := []byte(g.in) classic := hash.Hash32(rollsum.New()) classic.Write(p) if got := classic.Sum32(); got != g.out { t.Errorf("classic implentation: for %q, expected 0x%x, got 0x%x", in, g.out, got) continue } if got := Sum32ByWriteAndRoll(p); got != g.out { t.Errorf("rolling implentation: for %q, expected 0x%x, got 0x%x", in, g.out, got) continue } } } func BenchmarkRolling64B(b *testing.B) { b.SetBytes(1024) b.ReportAllocs() window := make([]byte, 64) for i := range window { window[i] = byte(i) } h := rollsum.New() in := make([]byte, 0, h.Size()) h.Write(window) b.ResetTimer() for i := 0; i < b.N; i++ { h.Roll(byte(i)) h.Sum(in) } } func BenchmarkReadUrandom(b *testing.B) { b.SetBytes(1024) b.ReportAllocs() f, err := os.Open("/dev/urandom") if err != nil { b.Errorf("Could not open /dev/urandom") } defer func() { if err := f.Close(); err != nil { log.Fatal(err) } }() r := bufio.NewReader(f) ws := 64 window := make([]byte, ws) n, err := r.Read(window) if n != ws || err != nil { b.Errorf("Could not read %d bytes", ws) } h := rollsum.New() in := make([]byte, 0, h.Size()) h.Write(window) b.ResetTimer() for i := 0; i < b.N; i++ { c, err := r.ReadByte() if err != nil { b.Errorf("%s", err) } h.Roll(c) h.Sum(in) } } rollinghash-4.0.0/buzhash32/doc_test.go000066400000000000000000000037031334531431500177760ustar00rootroot00000000000000package buzhash32_test import ( "fmt" "hash" "log" "github.com/chmduquesne/rollinghash/buzhash32" ) func Example() { s := []byte("The quick brown fox jumps over the lazy dog") classic := hash.Hash32(buzhash32.New()) rolling := buzhash32.New() // Window len n := 16 // You MUST load an initial window into the rolling hash before being // able to roll bytes rolling.Write(s[:n]) // Roll it and compare the result with full re-calculus every time for i := n; i < len(s); i++ { // Reset and write the window in classic classic.Reset() classic.Write(s[i-n+1 : i+1]) // Roll the incoming byte in rolling rolling.Roll(s[i]) fmt.Printf("%v: checksum %x\n", string(s[i-n+1:i+1]), rolling.Sum32()) // Compare the hashes if classic.Sum32() != rolling.Sum32() { log.Fatalf("%v: expected %x, got %x", s[i-n+1:i+1], classic.Sum32(), rolling.Sum32()) } } // Output: // he quick brown f: checksum 53e7e066 // e quick brown fo: checksum ecf5708c // quick brown fox: checksum c12d0faf // quick brown fox : checksum f2e76fe2 // uick brown fox j: checksum a8506342 // ick brown fox ju: checksum 201db638 // ck brown fox jum: checksum 759fe987 // k brown fox jump: checksum ecf78a18 // brown fox jumps: checksum 9062a9c9 // brown fox jumps : checksum 5078232e // rown fox jumps o: checksum b1d44d0d // own fox jumps ov: checksum 8177e796 // wn fox jumps ove: checksum 135d33ca // n fox jumps over: checksum 7a45e290 // fox jumps over : checksum 1655abcb // fox jumps over t: checksum 710c1810 // ox jumps over th: checksum bfb01cb9 // x jumps over the: checksum 6ed2c594 // jumps over the : checksum f2e2c8e7 // jumps over the l: checksum df544447 // umps over the la: checksum 7df8d3c3 // mps over the laz: checksum c8c88cc0 // ps over the lazy: checksum 3e7f980c // s over the lazy : checksum fb4663b8 // over the lazy d: checksum 31ccb20e // over the lazy do: checksum c476b45f // ver the lazy dog: checksum afb3c2da } rollinghash-4.0.0/buzhash64/000077500000000000000000000000001334531431500156455ustar00rootroot00000000000000rollinghash-4.0.0/buzhash64/buzhash64.go000066400000000000000000000071421334531431500200160ustar00rootroot00000000000000// Package rollinghash/buzhash implements buzhash as described by // https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial package buzhash64 import ( "math/rand" "github.com/chmduquesne/rollinghash" ) var defaultHashes [256]uint64 func init() { defaultHashes = GenerateHashes(1) } // The size of the checksum. const Size = 8 // Buzhash64 is a digest which satisfies the rollinghash.Hash64 interface. // It implements the cyclic polynomial algorithm // https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial type Buzhash64 struct { sum uint64 nRotate uint nRotateComplement uint // redundant, but pre-computed to spare an operation // window is treated like a circular buffer, where the oldest element // is indicated by d.oldest window []byte oldest int bytehash [256]uint64 } // Reset resets the Hash to its initial state. func (d *Buzhash64) Reset() { d.window = d.window[:0] d.oldest = 0 d.sum = 0 } // GenerateHashes generates a list of hashes to use with buzhash func GenerateHashes(seed int64) (res [256]uint64) { random := rand.New(rand.NewSource(seed)) used := make(map[uint64]bool) for i, _ := range res { x := uint64(random.Int63()) for used[x] { x = uint64(random.Int63()) } used[x] = true res[i] = x } return res } // New returns a buzhash based on a list of hashes provided by a call to // GenerateHashes, seeded with the default value 1. func New() *Buzhash64 { return NewFromUint64Array(defaultHashes) } // NewFromUint64Array returns a buzhash based on the provided table uint64 values. func NewFromUint64Array(b [256]uint64) *Buzhash64 { return &Buzhash64{ sum: 0, window: make([]byte, 0, rollinghash.DefaultWindowCap), oldest: 0, bytehash: b, } } // Size is 8 bytes func (d *Buzhash64) Size() int { return Size } // BlockSize is 1 byte func (d *Buzhash64) BlockSize() int { return 1 } // Write appends data to the rolling window and updates the digest. It // never returns an error. func (d *Buzhash64) Write(data []byte) (int, error) { l := len(data) if l == 0 { return 0, nil } // Re-arrange the window so that the leftmost element is at index 0 n := len(d.window) if d.oldest != 0 { tmp := make([]byte, d.oldest) copy(tmp, d.window[:d.oldest]) copy(d.window, d.window[d.oldest:]) copy(d.window[n-d.oldest:], tmp) d.oldest = 0 } d.window = append(d.window, data...) d.sum = 0 for _, c := range d.window { d.sum = d.sum<<1 | d.sum>>63 d.sum ^= d.bytehash[int(c)] } d.nRotate = uint(len(d.window)) % 64 d.nRotateComplement = 64 - d.nRotate return len(d.window), nil } // Sum64 returns the hash as a uint64 func (d *Buzhash64) Sum64() uint64 { return d.sum } // Sum returns the hash as a byte slice func (d *Buzhash64) Sum(b []byte) []byte { v := d.Sum64() return append(b, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) } // Roll updates the checksum of the window from the entering byte. You // MUST initialize a window with Write() before calling this method. func (d *Buzhash64) Roll(c byte) { // This check costs 10-15% performance. If we disable it, we crash // when the window is empty. If we enable it, we are always correct // (an empty window never changes no matter how much you roll it). //if len(d.window) == 0 { // return //} // extract the entering/leaving bytes and update the circular buffer. hn := d.bytehash[int(c)] h0 := d.bytehash[int(d.window[d.oldest])] d.window[d.oldest] = c l := len(d.window) d.oldest += 1 if d.oldest >= l { d.oldest = 0 } d.sum = (d.sum<<1 | d.sum>>63) ^ (h0<>d.nRotateComplement) ^ hn } rollinghash-4.0.0/buzhash64/buzhash64_test.go000066400000000000000000000117171334531431500210600ustar00rootroot00000000000000package buzhash64_test import ( "bufio" "hash" "log" "os" "strings" "testing" "github.com/chmduquesne/rollinghash" rollsum "github.com/chmduquesne/rollinghash/buzhash64" ) var golden = []struct { out uint64 in string }{ //{0x0, ""}, // panics {0x103970a329ec300c, "a"}, {0x47040cbf188df2c, "ab"}, {0x3c65f4e944fce3cf, "abc"}, {0x4f60df9377c42de7, "abcd"}, {0xcec2a201573b0939, "abcde"}, {0xdbee72d967c5569a, "abcdef"}, {0xf80e8c4f41d5a940, "abcdefg"}, {0xad48d3d99aeab7ab, "abcdefgh"}, {0x30df48c91341196a, "abcdefghi"}, {0x61ae47a04dc2d868, "abcdefghij"}, {0x865ce00093c2d75a, "Discard medicine more than two years old."}, {0xf6451e42f92f4791, "He who has a shady past knows that nice guys finish last."}, {0xffda8ce848bb89aa, "I wouldn't marry him with a ten foot pole."}, {0x5c12e27de6d83a30, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, {0x49e0ad4ab0ca7ce5, "The days of the digital watch are numbered. -Tom Stoppard"}, {0xd89090cdd4dd4903, "Nepal premier won't resign."}, {0x22e4214eed90dea, "For every action there is an equal and opposite government program."}, {0x314d09d305b82be6, "His money is twice tainted: 'taint yours and 'taint mine."}, {0x2e08403f31426a4f, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, {0x2f1b953fa66c1447, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, {0xb5f6b055d6aadc23, "size: a.out: bad magic"}, {0x5859ccebfdaddec6, "The major problem is with sendmail. -Mark Horton"}, {0x2576ff98789ac55a, "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, {0xc4022ec5788ce718, "If the enemy is within range, then so are you."}, {0x68c85e74ecf463e4, "It's well we cannot hear the screams/That we create in others' dreams."}, {0x315778aa0cbc1b2, "You remind me of a TV show, but that's all right: I watch it anyway."}, {0x44b49393f2b0a0de, "C is as portable as Stonehedge!!"}, {0xc81fb312ed79c064, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, {0xc732298d7f79e413, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, {0x708bba6595ed094a, "How can you write a big system without C++? -Paul Glick"}, {0x79f053c321ef8b17, "'Invariant assertions' is the most elegant programming technique! -Tom Szymanski"}, {0x1048f99c5be4db21, strings.Repeat("\xff", 5548) + "8"}, {0x1a5a34feba789943, strings.Repeat("\xff", 5549) + "9"}, {0xab1feddbec339368, strings.Repeat("\xff", 5550) + "0"}, {0xe493ddbd2e09bbe2, strings.Repeat("\xff", 5551) + "1"}, {0xb00cf0719a8bfe14, strings.Repeat("\xff", 5552) + "2"}, {0xf274c02c1075f638, strings.Repeat("\xff", 5553) + "3"}, {0x8f9ddd932b6595b4, strings.Repeat("\xff", 5554) + "4"}, {0xa2b79cf7e4a6d371, strings.Repeat("\xff", 5555) + "5"}, {0x3988d52ec6772ad1, strings.Repeat("\x00", 1e5)}, {0x174cc065174cc065, strings.Repeat("a", 1e5)}, {0x598883fc0cddd6a9, strings.Repeat("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e4)}, } // Prove that we implement rollinghash.Hash64 var _ = rollinghash.Hash64(rollsum.New()) // Prove that we implement hash.Hash64 var _ = hash.Hash64(rollsum.New()) // Sum64ByWriteAndRoll computes the sum by prepending the input slice with // a '\0', writing the first bytes of this slice into the sum, then // sliding on the last byte and returning the result of Sum32 func Sum64ByWriteAndRoll(b []byte) uint64 { q := []byte("\x00") q = append(q, b...) roll := rollsum.New() roll.Write(q[:len(q)-1]) roll.Roll(q[len(q)-1]) return roll.Sum64() } func TestGolden(t *testing.T) { for _, g := range golden { in := g.in // We test the classic implementation p := []byte(g.in) classic := hash.Hash64(rollsum.New()) classic.Write(p) if got := classic.Sum64(); got != g.out { t.Errorf("classic implentation: for %q, expected 0x%x, got 0x%x", in, g.out, got) continue } if got := Sum64ByWriteAndRoll(p); got != g.out { t.Errorf("rolling implentation: for %q, expected 0x%x, got 0x%x", in, g.out, got) continue } } } func BenchmarkRolling64B(b *testing.B) { b.SetBytes(1024) b.ReportAllocs() window := make([]byte, 64) for i := range window { window[i] = byte(i) } h := rollsum.New() in := make([]byte, 0, h.Size()) h.Write(window) b.ResetTimer() for i := 0; i < b.N; i++ { h.Roll(byte(i)) h.Sum(in) } } func BenchmarkReadUrandom(b *testing.B) { b.SetBytes(1024) b.ReportAllocs() f, err := os.Open("/dev/urandom") if err != nil { b.Errorf("Could not open /dev/urandom") } defer func() { if err := f.Close(); err != nil { log.Fatal(err) } }() r := bufio.NewReader(f) ws := 64 window := make([]byte, ws) n, err := r.Read(window) if n != ws || err != nil { b.Errorf("Could not read %d bytes", ws) } h := rollsum.New() in := make([]byte, 0, h.Size()) h.Write(window) b.ResetTimer() for i := 0; i < b.N; i++ { c, err := r.ReadByte() if err != nil { b.Errorf("%s", err) } h.Roll(c) h.Sum(in) } } rollinghash-4.0.0/buzhash64/doc_test.go000066400000000000000000000042311334531431500200000ustar00rootroot00000000000000package buzhash64_test import ( "fmt" "hash" "log" "github.com/chmduquesne/rollinghash/buzhash64" ) func Example() { s := []byte("The quick brown fox jumps over the lazy dog") classic := hash.Hash64(buzhash64.New()) rolling := buzhash64.New() // Window len n := 16 // You MUST load an initial window into the rolling hash before being // able to roll bytes rolling.Write(s[:n]) // Roll it and compare the result with full re-calculus every time for i := n; i < len(s); i++ { // Reset and write the window in classic classic.Reset() classic.Write(s[i-n+1 : i+1]) // Roll the incoming byte in rolling rolling.Roll(s[i]) fmt.Printf("%v: checksum %x\n", string(s[i-n+1:i+1]), rolling.Sum64()) // Compare the hashes if classic.Sum64() != rolling.Sum64() { log.Fatalf("%v: expected %x, got %x", s[i-n+1:i+1], classic.Sum64(), rolling.Sum64()) } } // Output: // he quick brown f: checksum 27b900ac53e7f3ee // e quick brown fo: checksum b05b4730ecf51388 // quick brown fox: checksum 473f08ecc12d2117 // quick brown fox : checksum 83e17140f2e75f95 // uick brown fox j: checksum 64d46288a85055a9 // ick brown fox ju: checksum a5cbdf3e201dada3 // ck brown fox jum: checksum 9b57bc98759f926a // k brown fox jump: checksum 401d9a62ecf7eeab // brown fox jumps: checksum 8bf712419062ba1c // brown fox jumps : checksum 1a71441a50786982 // rown fox jumps o: checksum a101754db1d45e07 // own fox jumps ov: checksum 375b7cc8177aedf // wn fox jumps ove: checksum 9c1dcae135d3b27 // n fox jumps over: checksum a3a8e55b7a45f607 // fox jumps over : checksum 749fb4791655a8bf // fox jumps over t: checksum bb9bbe73710c73fe // ox jumps over th: checksum 1cb97e12bfb044bc // x jumps over the: checksum 36584f126ed2efe1 // jumps over the : checksum 46cacdcff2e2ec93 // jumps over the l: checksum a77c4823df5461a8 // umps over the la: checksum 88f38ba47df8f34d // mps over the laz: checksum 39428e93c8c8bb91 // ps over the lazy: checksum 1a2767543e7f8a8c // s over the lazy : checksum 64b58d2cfb461f2b // over the lazy d: checksum 5cc1b31e31cca116 // over the lazy do: checksum 94364057c476ff69 // ver the lazy dog: checksum 38974742afb3cec8 } rollinghash-4.0.0/doc_test.go000066400000000000000000000017231334531431500161650ustar00rootroot00000000000000package rollinghash_test import ( "hash" "log" _adler32 "github.com/chmduquesne/rollinghash/adler32" ) func Example() { s := []byte("The quick brown fox jumps over the lazy dog") // This example works with adler32, but the api is identical for all // other rolling checksums. Consult the documentation of the checksum // of interest. classic := hash.Hash32(_adler32.New()) rolling := _adler32.New() // Window len n := 16 // You MUST load an initial window into the rolling hash before being // able to roll bytes rolling.Write(s[:n]) // Roll it and compare the result with full re-calculus every time for i := n; i < len(s); i++ { // Reset and write the window in classic classic.Reset() classic.Write(s[i-n+1 : i+1]) // Roll the incoming byte in rolling rolling.Roll(s[i]) // Compare the hashes if classic.Sum32() != rolling.Sum32() { log.Fatalf("%v: expected %x, got %x", s[i-n+1:i+1], classic.Sum32(), rolling.Sum32()) } } } rollinghash-4.0.0/rabinkarp64/000077500000000000000000000000001334531431500161525ustar00rootroot00000000000000rollinghash-4.0.0/rabinkarp64/doc_test.go000066400000000000000000000042511334531431500203070ustar00rootroot00000000000000// Copyright (C) 2017 Christophe-Marie Duquesne package rabinkarp64_test import ( "fmt" "hash" "log" "github.com/chmduquesne/rollinghash/rabinkarp64" ) func Example() { s := []byte("The quick brown fox jumps over the lazy dog") classic := hash.Hash64(rabinkarp64.New()) rolling := rabinkarp64.New() // Window len n := 16 // You MUST load an initial window into the rolling hash before being // able to roll bytes rolling.Write(s[:n]) // Roll it and compare the result with full re-calculus every time for i := n; i < len(s); i++ { // Reset and write the window in classic classic.Reset() classic.Write(s[i-n+1 : i+1]) // Roll the incoming byte in rolling rolling.Roll(s[i]) fmt.Printf("%v: checksum %x\n", string(s[i-n+1:i+1]), rolling.Sum64()) // Compare the hashes if classic.Sum64() != rolling.Sum64() { log.Fatalf("%v: expected %x, got %x", string(s[i-n+1:i+1]), classic.Sum64(), rolling.Sum64()) } } // Output: // he quick brown f: checksum 1ab89e68de7c15 // e quick brown fo: checksum 1d26864e21619f // quick brown fox: checksum 13fdc4aaefcf91 // quick brown fox : checksum 1fab0ef7daee4a // uick brown fox j: checksum 6aee0bda40445 // ick brown fox ju: checksum a8cf05560301d // ck brown fox jum: checksum 1945eaabdb6b67 // k brown fox jump: checksum 18964a2ca37033 // brown fox jumps: checksum 7f1778d0e6456 // brown fox jumps : checksum 8d3dd9e2cf5a3 // rown fox jumps o: checksum 5e7672798a4e5 // own fox jumps ov: checksum 41e75561bd7ce // wn fox jumps ove: checksum 1db9e271edcead // n fox jumps over: checksum 7aeec087fe22a // fox jumps over : checksum 1a3acd0bfd0c1f // fox jumps over t: checksum c3620e2c8d91a // ox jumps over th: checksum 8b7049026154d // x jumps over the: checksum 1f639b25356c1d // jumps over the : checksum a7961a2d0f9c4 // jumps over the l: checksum 6e3c3ec495a7d // umps over the la: checksum a3dbdf68d695e // mps over the laz: checksum 1c443a5f275ca7 // ps over the lazy: checksum 57e965da5efe2 // s over the lazy : checksum 1d457b44849f9d // over the lazy d: checksum 1d54040df5f20f // over the lazy do: checksum 1aa7779b59c5fb // ver the lazy dog: checksum 1d72c7f255ba24 } rollinghash-4.0.0/rabinkarp64/polynomials.go000066400000000000000000000154171334531431500210570ustar00rootroot00000000000000// Copyright (c) 2014, Alexander Neumann // Copyright (c) 2017, Christophe-Marie Duquesne // // This file was adapted from restic https://github.com/restic/chunker // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // 1. Redistributions of source code must retain the above copyright notice, this // list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package rabinkarp64 import ( "encoding/binary" "errors" "fmt" "io" "math/rand" "strconv" ) // Pol is a polynomial from F_2[X]. type Pol uint64 // Add returns x+y. func (x Pol) Add(y Pol) Pol { r := Pol(uint64(x) ^ uint64(y)) return r } // mulOverflows returns true if the multiplication would overflow uint64. // Code by Rob Pike, see // https://groups.google.com/d/msg/golang-nuts/h5oSN5t3Au4/KaNQREhZh0QJ func mulOverflows(a, b Pol) bool { if a <= 1 || b <= 1 { return false } c := a.mul(b) d := c.Div(b) if d != a { return true } return false } func (x Pol) mul(y Pol) Pol { if x == 0 || y == 0 { return 0 } var res Pol for i := 0; i <= y.Deg(); i++ { if (y & (1 << uint(i))) > 0 { res = res.Add(x << uint(i)) } } return res } // Mul returns x*y. When an overflow occurs, Mul panics. func (x Pol) Mul(y Pol) Pol { if mulOverflows(x, y) { panic("multiplication would overflow uint64") } return x.mul(y) } // Deg returns the degree of the polynomial x. If x is zero, -1 is returned. func (x Pol) Deg() int { // the degree of 0 is -1 if x == 0 { return -1 } // see https://graphics.stanford.edu/~seander/bithacks.html#IntegerLog r := 0 if uint64(x)&0xffffffff00000000 > 0 { x >>= 32 r |= 32 } if uint64(x)&0xffff0000 > 0 { x >>= 16 r |= 16 } if uint64(x)&0xff00 > 0 { x >>= 8 r |= 8 } if uint64(x)&0xf0 > 0 { x >>= 4 r |= 4 } if uint64(x)&0xc > 0 { x >>= 2 r |= 2 } if uint64(x)&0x2 > 0 { x >>= 1 r |= 1 } return r } // String returns the coefficients in hex. func (x Pol) String() string { return "0x" + strconv.FormatUint(uint64(x), 16) } // Expand returns the string representation of the polynomial x. func (x Pol) Expand() string { if x == 0 { return "0" } s := "" for i := x.Deg(); i > 1; i-- { if x&(1< 0 { s += fmt.Sprintf("+x^%d", i) } } if x&2 > 0 { s += "+x" } if x&1 > 0 { s += "+1" } return s[1:] } // DivMod returns x / d = q, and remainder r, // see https://en.wikipedia.org/wiki/Division_algorithm func (x Pol) DivMod(d Pol) (Pol, Pol) { if x == 0 { return 0, 0 } if d == 0 { panic("division by zero") } D := d.Deg() diff := x.Deg() - D if diff < 0 { return 0, x } var q Pol for diff >= 0 { m := d << uint(diff) q |= (1 << uint(diff)) x = x.Add(m) diff = x.Deg() - D } return q, x } // Div returns the integer division result x / d. func (x Pol) Div(d Pol) Pol { q, _ := x.DivMod(d) return q } // Mod returns the remainder of x / d func (x Pol) Mod(d Pol) Pol { _, r := x.DivMod(d) return r } // I really dislike having a function that does not terminate, so specify a // really large upper bound for finding a new irreducible polynomial, and // return an error when no irreducible polynomial has been found within // randPolMaxTries. const randPolMaxTries = 1e6 // RandomPolynomial returns a new random irreducible polynomial // of degree 53 using the input seed as a source. // It is equivalent to calling DerivePolynomial(rand.Reader). func RandomPolynomial(seed int64) (Pol, error) { return DerivePolynomial(rand.New(rand.NewSource(seed))) } // DerivePolynomial returns an irreducible polynomial of degree 53 // (largest prime number below 64-8) by reading bytes from source. // There are (2^53-2/53) irreducible polynomials of degree 53 in // F_2[X], c.f. Michael O. Rabin (1981): "Fingerprinting by Random // Polynomials", page 4. If no polynomial could be found in one // million tries, an error is returned. func DerivePolynomial(source io.Reader) (Pol, error) { for i := 0; i < randPolMaxTries; i++ { var f Pol // choose polynomial at (pseudo)random err := binary.Read(source, binary.LittleEndian, &f) if err != nil { return 0, err } // mask away bits above bit 53 f &= Pol((1 << 54) - 1) // set highest and lowest bit so that the degree is 53 and the // polynomial is not trivially reducible f |= (1 << 53) | 1 // test if f is irreducible if f.Irreducible() { return f, nil } } // If this is reached, we haven't found an irreducible polynomial in // randPolMaxTries. This error is very unlikely to occur. return 0, errors.New("unable to find new random irreducible polynomial") } // GCD computes the Greatest Common Divisor x and f. func (x Pol) GCD(f Pol) Pol { if f == 0 { return x } if x == 0 { return f } if x.Deg() < f.Deg() { x, f = f, x } return f.GCD(x.Mod(f)) } // Irreducible returns true iff x is irreducible over F_2. This function // uses Ben Or's reducibility test. // // For details see "Tests and Constructions of Irreducible Polynomials over // Finite Fields". func (x Pol) Irreducible() bool { for i := 1; i <= x.Deg()/2; i++ { if x.GCD(qp(uint(i), x)) != 1 { return false } } return true } // MulMod computes x*f mod g func (x Pol) MulMod(f, g Pol) Pol { if x == 0 || f == 0 { return 0 } var res Pol for i := 0; i <= f.Deg(); i++ { if (f & (1 << uint(i))) > 0 { a := x for j := 0; j < i; j++ { a = a.Mul(2).Mod(g) } res = res.Add(a).Mod(g) } } return res } // qp computes the polynomial (x^(2^p)-x) mod g. This is needed for the // reducibility test. func qp(p uint, g Pol) Pol { num := (1 << p) i := 1 // start with x res := Pol(2) for i < num { // repeatedly square res res = res.MulMod(res, g) i *= 2 } // add x return res.Add(2).Mod(g) } rollinghash-4.0.0/rabinkarp64/polynomials_test.go000066400000000000000000000212061334531431500221070ustar00rootroot00000000000000// Copyright (c) 2014, Alexander Neumann // Copyright (c) 2017, Christophe-Marie Duquesne // // This file was adapted from restic https://github.com/restic/chunker // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // 1. Redistributions of source code must retain the above copyright notice, this // list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package rabinkarp64 import ( "strconv" "testing" ) var polAddTests = []struct { x, y Pol sum Pol }{ {23, 16, 23 ^ 16}, {0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4}, {0x9a7e30d1e855e0a0, 0x9a7e30d1e855e0a0, 0}, } func TestPolAdd(t *testing.T) { for i, test := range polAddTests { if test.sum != test.x.Add(test.y) { t.Errorf("test %d failed: sum != x+y", i) } if test.sum != test.y.Add(test.x) { t.Errorf("test %d failed: sum != y+x", i) } } } func parseBin(s string) Pol { i, err := strconv.ParseUint(s, 2, 64) if err != nil { panic(err) } return Pol(i) } var polMulTests = []struct { x, y Pol res Pol }{ {1, 2, 2}, { parseBin("1101"), parseBin("10"), parseBin("11010"), }, { parseBin("1101"), parseBin("11"), parseBin("10111"), }, { 0x40000000, 0x40000000, 0x1000000000000000, }, { parseBin("1010"), parseBin("100100"), parseBin("101101000"), }, { parseBin("100"), parseBin("11"), parseBin("1100"), }, { parseBin("11"), parseBin("110101"), parseBin("1011111"), }, { parseBin("10011"), parseBin("110101"), parseBin("1100001111"), }, } func TestPolMul(t *testing.T) { for i, test := range polMulTests { m := test.x.Mul(test.y) if test.res != m { t.Errorf("TestPolMul failed for test %d: %v * %v: want %v, got %v", i, test.x, test.y, test.res, m) } m = test.y.Mul(test.x) if test.res != test.y.Mul(test.x) { t.Errorf("TestPolMul failed for %d: %v * %v: want %v, got %v", i, test.x, test.y, test.res, m) } } } func TestPolMulOverflow(t *testing.T) { defer func() { // try to recover overflow error err := recover() if e, ok := err.(string); ok && e == "multiplication would overflow uint64" { return } t.Logf("invalid error raised: %v", err) // re-raise error if not overflow panic(err) }() x := Pol(1 << 63) x.Mul(2) t.Fatal("overflow test did not panic") } var polDivTests = []struct { x, y Pol res Pol }{ {10, 50, 0}, {0, 1, 0}, { parseBin("101101000"), // 0x168 parseBin("1010"), // 0xa parseBin("100100"), // 0x24 }, {2, 2, 1}, { 0x8000000000000000, 0x8000000000000000, 1, }, { parseBin("1100"), parseBin("100"), parseBin("11"), }, { parseBin("1100001111"), parseBin("10011"), parseBin("110101"), }, } func TestPolDiv(t *testing.T) { for i, test := range polDivTests { m := test.x.Div(test.y) if test.res != m { t.Errorf("TestPolDiv failed for test %d: %v * %v: want %v, got %v", i, test.x, test.y, test.res, m) } } } func TestPolDeg(t *testing.T) { var x Pol if x.Deg() != -1 { t.Errorf("deg(0) is not -1: %v", x.Deg()) } x = 1 if x.Deg() != 0 { t.Errorf("deg(1) is not 0: %v", x.Deg()) } for i := 0; i < 64; i++ { x = 1 << uint(i) if x.Deg() != i { t.Errorf("deg(1<<%d) is not %d: %v", i, i, x.Deg()) } } } var polModTests = []struct { x, y Pol res Pol }{ {10, 50, 10}, {0, 1, 0}, { parseBin("101101001"), parseBin("1010"), parseBin("1"), }, {2, 2, 0}, { 0x8000000000000000, 0x8000000000000000, 0, }, { parseBin("1100"), parseBin("100"), parseBin("0"), }, { parseBin("1100001111"), parseBin("10011"), parseBin("0"), }, } func TestPolModt(t *testing.T) { for i, test := range polModTests { res := test.x.Mod(test.y) if test.res != res { t.Errorf("test %d failed: want %v, got %v", i, test.res, res) } } } func BenchmarkPolDivMod(t *testing.B) { f := Pol(0x2482734cacca49) g := Pol(0x3af4b284899) for i := 0; i < t.N; i++ { g.DivMod(f) } } func BenchmarkPolDiv(t *testing.B) { f := Pol(0x2482734cacca49) g := Pol(0x3af4b284899) for i := 0; i < t.N; i++ { g.Div(f) } } func BenchmarkPolMod(t *testing.B) { f := Pol(0x2482734cacca49) g := Pol(0x3af4b284899) for i := 0; i < t.N; i++ { g.Mod(f) } } func BenchmarkPolDeg(t *testing.B) { f := Pol(0x3af4b284899) d := f.Deg() if d != 41 { t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d", d, 41) } for i := 0; i < t.N; i++ { f.Deg() } } func TestRandomPolynomial(t *testing.T) { _, err := RandomPolynomial(1) if err != nil { t.Fatal(err) } } func BenchmarkRandomPolynomial(t *testing.B) { for i := 0; i < t.N; i++ { _, err := RandomPolynomial(1) if err != nil { t.Fatal(err) } } } func TestExpandPolynomial(t *testing.T) { pol := Pol(0x3DA3358B4DC173) s := pol.Expand() if s != "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1" { t.Fatal("wrong result") } } var polIrredTests = []struct { f Pol irred bool }{ {0x38f1e565e288df, false}, {0x3DA3358B4DC173, true}, {0x30a8295b9d5c91, false}, {0x255f4350b962cb, false}, {0x267f776110a235, false}, {0x2f4dae10d41227, false}, {0x2482734cacca49, true}, {0x312daf4b284899, false}, {0x29dfb6553d01d1, false}, {0x3548245eb26257, false}, {0x3199e7ef4211b3, false}, {0x362f39017dae8b, false}, {0x200d57aa6fdacb, false}, {0x35e0a4efa1d275, false}, {0x2ced55b026577f, false}, {0x260b012010893d, false}, {0x2df29cbcd59e9d, false}, {0x3f2ac7488bd429, false}, {0x3e5cb1711669fb, false}, {0x226d8de57a9959, false}, {0x3c8de80aaf5835, false}, {0x2026a59efb219b, false}, {0x39dfa4d13fb231, false}, {0x3143d0464b3299, false}, } func TestPolIrreducible(t *testing.T) { for _, test := range polIrredTests { if test.f.Irreducible() != test.irred { t.Errorf("Irreducibility test for Polynomial %v failed: got %v, wanted %v", test.f, test.f.Irreducible(), test.irred) } } } func BenchmarkPolIrreducible(b *testing.B) { // find first irreducible polynomial var pol Pol for _, test := range polIrredTests { if test.irred { pol = test.f break } } for i := 0; i < b.N; i++ { if !pol.Irreducible() { b.Errorf("Irreducibility test for Polynomial %v failed", pol) } } } var polGCDTests = []struct { f1 Pol f2 Pol gcd Pol }{ {10, 50, 2}, {0, 1, 1}, { parseBin("101101001"), parseBin("1010"), parseBin("1"), }, {2, 2, 2}, { parseBin("1010"), parseBin("11"), parseBin("11"), }, { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, }, { parseBin("1100"), parseBin("101"), parseBin("11"), }, { parseBin("1100001111"), parseBin("10011"), parseBin("10011"), }, { 0x3DA3358B4DC173, 0x3DA3358B4DC173, 0x3DA3358B4DC173, }, { 0x3DA3358B4DC173, 0x230d2259defd, 1, }, { 0x230d2259defd, 0x51b492b3eff2, parseBin("10011"), }, } func TestPolGCD(t *testing.T) { for i, test := range polGCDTests { gcd := test.f1.GCD(test.f2) if test.gcd != gcd { t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v", i, test, gcd, test.gcd) } gcd = test.f2.GCD(test.f1) if test.gcd != gcd { t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v", i, test, gcd, test.gcd) } } } var polMulModTests = []struct { f1 Pol f2 Pol g Pol mod Pol }{ { 0x1230, 0x230, 0x55, 0x22, }, { 0x0eae8c07dbbb3026, 0xd5d6db9de04771de, 0xdd2bda3b77c9, 0x425ae8595b7a, }, } func TestPolMulMod(t *testing.T) { for i, test := range polMulModTests { mod := test.f1.MulMod(test.f2, test.g) if mod != test.mod { t.Errorf("MulMod test %d (%+v) failed: got %v, wanted %v", i, test, mod, test.mod) } } } rollinghash-4.0.0/rabinkarp64/rabinkarp64.go000066400000000000000000000151471334531431500206340ustar00rootroot00000000000000// Copyright (c) 2014, Alexander Neumann // Copyright (c) 2017, Christophe-Marie Duquesne // // This file was adapted from restic https://github.com/restic/chunker // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // 1. Redistributions of source code must retain the above copyright notice, this // list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package rabinkarp64 import ( "sync" "github.com/chmduquesne/rollinghash" ) const Size = 8 type tables struct { out [256]Pol mod [256]Pol } // tables are cacheable for a given pol and windowsize type index struct { pol Pol windowsize int } type RabinKarp64 struct { pol Pol tables *tables polShift uint value Pol // window is treated like a circular buffer, where the oldest element // is indicated by d.oldest window []byte oldest int } // cache precomputed tables, these are read-only anyway var cache struct { // For a given polynom and a given window size, we get a table entries map[index]*tables sync.Mutex } func init() { cache.entries = make(map[index]*tables) } func (d *RabinKarp64) updateTables() { windowsize := len(d.window) pol := d.pol idx := index{d.pol, windowsize} cache.Lock() t, ok := cache.entries[idx] cache.Unlock() if ok { d.tables = t return } d.tables = buildTables(pol, windowsize) cache.Lock() cache.entries[idx] = d.tables cache.Unlock() return } func buildTables(pol Pol, windowsize int) (t *tables) { t = &tables{} // calculate table for sliding out bytes. The byte to slide out is used as // the index for the table, the value contains the following: // out_table[b] = Hash(b || 0 || ... || 0) // \ windowsize-1 zero bytes / // To slide out byte b_0 for window size w with known hash // H := H(b_0 || ... || b_w), it is sufficient to add out_table[b_0]: // H(b_0 || ... || b_w) + H(b_0 || 0 || ... || 0) // = H(b_0 + b_0 || b_1 + 0 || ... || b_w + 0) // = H( 0 || b_1 || ... || b_w) // // Afterwards a new byte can be shifted in. for b := 0; b < 256; b++ { var h Pol h <<= 8 h |= Pol(b) h = h.Mod(pol) for i := 0; i < windowsize-1; i++ { h <<= 8 h |= Pol(0) h = h.Mod(pol) } t.out[b] = h } // calculate table for reduction mod Polynomial k := pol.Deg() for b := 0; b < 256; b++ { // mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and B = b(x) * x^k // // The 8 bits above deg(Polynomial) determine what happens next and so // these bits are used as a lookup to this table. The value is split in // two parts: Part A contains the result of the modulus operation, part // B is used to cancel out the 8 top bits so that one XOR operation is // enough to reduce modulo Polynomial t.mod[b] = Pol(uint64(b)<>56), byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) } // Roll updates the checksum of the window from the entering byte. You // MUST initialize a window with Write() before calling this method. func (d *RabinKarp64) Roll(c byte) { // This check costs 10-15% performance. If we disable it, we crash // when the window is empty. If we enable it, we are always correct // (an empty window never changes no matter how much you roll it). //if len(d.window) == 0 { // return //} // extract the entering/leaving bytes and update the circular buffer. enter := c leave := uint64(d.window[d.oldest]) d.window[d.oldest] = c d.oldest += 1 if d.oldest >= len(d.window) { d.oldest = 0 } d.value ^= d.tables.out[leave] index := byte(d.value >> d.polShift) d.value <<= 8 d.value |= Pol(enter) d.value ^= d.tables.mod[index] } rollinghash-4.0.0/rabinkarp64/rabinkarp64_test.go000066400000000000000000000115731334531431500216720ustar00rootroot00000000000000// Copyright (c) 2017 Christophe-Marie Duquesne package rabinkarp64_test import ( "bufio" "hash" "log" "os" "strings" "testing" "github.com/chmduquesne/rollinghash" rollsum "github.com/chmduquesne/rollinghash/rabinkarp64" ) var golden = []struct { out uint64 in string }{ //{0x0, ""}, // panics {0x61, "a"}, {0x6162, "ab"}, {0x616263, "abc"}, {0x61626364, "abcd"}, {0x6162636465, "abcde"}, {0x616263646566, "abcdef"}, {0x132021ba359c68, "abcdefg"}, {0x1fce6358ce1471, "abcdefgh"}, {0x65b425e3c80ca, "abcdefghi"}, {0xe9781880ddab2, "abcdefghij"}, {0x1bcc435d5a6760, "Discard medicine more than two years old."}, {0x1c56084394dbf5, "He who has a shady past knows that nice guys finish last."}, {0x7973e4550080f, "I wouldn't marry him with a ten foot pole."}, {0x1e2a9f14d4a366, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, {0x177a1e4d652838, "The days of the digital watch are numbered. -Tom Stoppard"}, {0x153bb8322d8614, "Nepal premier won't resign."}, {0x12309044aaafcd, "For every action there is an equal and opposite government program."}, {0x59187d7f34b99, "His money is twice tainted: 'taint yours and 'taint mine."}, {0x5e4f5ec20dbb, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, {0x5b605dca0167a, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, {0x1da35eec936b1c, "size: a.out: bad magic"}, {0x1b4a521659269a, "The major problem is with sendmail. -Mark Horton"}, {0x11b3791cfaf6ef, "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, {0x1ff6dcce41d7d9, "If the enemy is within range, then so are you."}, {0x1820adc68f03ec, "It's well we cannot hear the screams/That we create in others' dreams."}, {0x3f8660475a7fb, "You remind me of a TV show, but that's all right: I watch it anyway."}, {0x149d09de60bc54, "C is as portable as Stonehedge!!"}, {0x11686c8f59d7c7, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, {0xfb8afb28c9bcf, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, {0x1fa10a2313f3e6, "How can you write a big system without C++? -Paul Glick"}, {0x178cc568c9a3c, "'Invariant assertions' is the most elegant programming technique! -Tom Szymanski"}, {0x65bd2936a4628, strings.Repeat("\xff", 5548) + "8"}, {0xe074cdecbffe1, strings.Repeat("\xff", 5549) + "9"}, {0x1378f99580cada, strings.Repeat("\xff", 5550) + "0"}, {0x1b6a3079f8c522, strings.Repeat("\xff", 5551) + "1"}, {0x143e587f656d19, strings.Repeat("\xff", 5552) + "2"}, {0xbadb5a7005edf, strings.Repeat("\xff", 5553) + "3"}, {0xc040bc67bc471, strings.Repeat("\xff", 5554) + "4"}, {0x1758803a1fc391, strings.Repeat("\xff", 5555) + "5"}, {0x0, strings.Repeat("\x00", 1e5)}, {0x4ded86a56a148, strings.Repeat("a", 1e5)}, {0x2b16296a7e5a6, strings.Repeat("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e4)}, } // Prove that we implement rollinghash.Hash64 var _ = rollinghash.Hash64(rollsum.New()) // Prove that we implement hash.Hash64 var _ = hash.Hash64(rollsum.New()) // Sum64ByWriteAndRoll computes the sum by prepending the input slice with // a '\0', writing the first bytes of this slice into the sum, then // sliding on the last byte and returning the result of Sum32 func Sum64ByWriteAndRoll(b []byte) uint64 { q := []byte("\x00") q = append(q, b...) roll := rollsum.New() roll.Write(q[:len(q)-1]) roll.Roll(q[len(q)-1]) return roll.Sum64() } func TestGolden(t *testing.T) { for _, g := range golden { in := g.in // We test the classic implementation p := []byte(g.in) classic := hash.Hash64(rollsum.New()) classic.Write(p) if got := classic.Sum64(); got != g.out { t.Errorf("classic implentation: for %q, expected 0x%x, got 0x%x", in, g.out, got) continue } if got := Sum64ByWriteAndRoll(p); got != g.out { t.Errorf("rolling implentation: for %q, expected 0x%x, got 0x%x", in, g.out, got) continue } } } func BenchmarkRolling64B(b *testing.B) { b.SetBytes(1024) b.ReportAllocs() window := make([]byte, 64) for i := range window { window[i] = byte(i) } h := rollsum.New() in := make([]byte, 0, h.Size()) h.Write(window) b.ResetTimer() for i := 0; i < b.N; i++ { h.Roll(byte(i)) h.Sum(in) } } func BenchmarkReadUrandom(b *testing.B) { b.SetBytes(1024) b.ReportAllocs() f, err := os.Open("/dev/urandom") if err != nil { b.Errorf("Could not open /dev/urandom") } defer func() { if err := f.Close(); err != nil { log.Fatal(err) } }() r := bufio.NewReader(f) ws := 64 window := make([]byte, ws) n, err := r.Read(window) if n != ws || err != nil { b.Errorf("Could not read %d bytes", ws) } h := rollsum.New() in := make([]byte, 0, h.Size()) h.Write(window) b.ResetTimer() for i := 0; i < b.N; i++ { c, err := r.ReadByte() if err != nil { b.Errorf("%s", err) } h.Roll(c) h.Sum(in) } } rollinghash-4.0.0/roll/000077500000000000000000000000001334531431500147775ustar00rootroot00000000000000rollinghash-4.0.0/roll/main.go000066400000000000000000000050201334531431500162470ustar00rootroot00000000000000package main import ( "flag" "fmt" "io" "log" "os" "runtime/pprof" "time" "code.cloudfoundry.org/bytefmt" //rollsum "github.com/chmduquesne/rollinghash/adler32" //rollsum "github.com/chmduquesne/rollinghash/buzhash32" rollsum "github.com/chmduquesne/rollinghash/buzhash64" //rollsum "github.com/chmduquesne/rollinghash/bozo32" ) const ( KiB = 1024 MiB = 1024 * KiB GiB = 1024 * MiB clearscreen = "\033[2J\033[1;1H" clearline = "\x1b[2K" ) func genMasks() (res []uint64) { res = make([]uint64, 64) ones := ^uint64(0) // 0xffffffffffffffff for i := 0; i < 64; i++ { res[i] = ones >> uint(63-i) } return } func hash2uint64(s []byte) (res uint64) { for _, b := range s { res <<= 8 res |= uint64(b) } return } func main() { cpuprofile := flag.String("cpuprofile", "", "write cpu profile to file") dostats := flag.Bool("stats", false, "Do some stats about the rolling sum") size := flag.String("size", "256M", "How much data to read") flag.Parse() if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } fileSize, err := bytefmt.ToBytes(*size) if err != nil { log.Fatal(err) } bufsize := 16 * MiB rbuf := make([]byte, bufsize) hbuf := make([]byte, 0, 8) t := time.Now() f, err := os.Open("/dev/urandom") if err != nil { log.Fatal(err) } defer func() { if err := f.Close(); err != nil { log.Fatal(err) } }() io.ReadFull(f, rbuf) roll := rollsum.New() roll.Write(rbuf[:64]) masks := genMasks() hits := make(map[uint64]uint64) for _, m := range masks { hits[m] = 0 } n := uint64(0) k := 0 for n < fileSize { if k >= bufsize { status := fmt.Sprintf("Byte count: %s", bytefmt.ByteSize(n)) if *dostats { fmt.Printf(clearscreen) fmt.Println(status) for i, m := range masks { frequency := "NaN" if hits[m] != 0 { frequency = bytefmt.ByteSize(n / hits[m]) } fmt.Printf("0x%016x (%02d bits): every %s\n", m, i+1, frequency) } } else { fmt.Printf(clearline) fmt.Printf(status) fmt.Printf("\r") } _, err := io.ReadFull(f, rbuf) if err != nil { panic(err) } k = 0 } roll.Roll(rbuf[k]) if *dostats { s := hash2uint64(roll.Sum(hbuf)) for _, m := range masks { if s&m == m { hits[m] += 1 } else { break } } } k++ n++ } duration := time.Since(t) fmt.Printf("Rolled %s of data in %v (%s/s).\n", bytefmt.ByteSize(n), duration, bytefmt.ByteSize(n*1e9/uint64(duration)), ) } rollinghash-4.0.0/rollinghash.go000066400000000000000000000026251334531431500166750ustar00rootroot00000000000000/* Package rollinghash implements rolling versions of some hashes */ package rollinghash import "hash" // DefaultWindowCap is the default capacity of the internal window of a // new Hash. const DefaultWindowCap = 64 // A Roller is a type that has the method Roll. Roll updates the hash of a // rolling window from just the entering byte. You MUST call Write() // BEFORE using this method and provide it with an initial window of size // at least 1 byte. You can then call this method for every new byte // entering the window. The byte leaving the window is automatically // computed from a copy of the window internally kept in the checksum. // This window is updated along with the internal state of the checksum // every time Roll() is called. type Roller interface { Roll(b byte) } // rollinghash.Hash extends hash.Hash by adding the method Roll. A // rollinghash.Hash can be updated byte by byte, by specifying which byte // enters the window. type Hash interface { hash.Hash Roller } // rollinghash.Hash32 extends hash.Hash by adding the method Roll. A // rollinghash.Hash32 can be updated byte by byte, by specifying which // byte enters the window. type Hash32 interface { hash.Hash32 Roller } // rollinghash.Hash64 extends hash.Hash by adding the method Roll. A // rollinghash.Hash64 can be updated byte by byte, by specifying which // byte enters the window. type Hash64 interface { hash.Hash64 Roller } rollinghash-4.0.0/rollinghash_test.go000066400000000000000000000124311334531431500177300ustar00rootroot00000000000000package rollinghash_test import ( "hash" "math/rand" "testing" "github.com/chmduquesne/rollinghash" _adler32 "github.com/chmduquesne/rollinghash/adler32" "github.com/chmduquesne/rollinghash/bozo32" "github.com/chmduquesne/rollinghash/buzhash32" "github.com/chmduquesne/rollinghash/buzhash64" "github.com/chmduquesne/rollinghash/rabinkarp64" ) var allHashes = []struct { name string classic hash.Hash rolling rollinghash.Hash }{ {"adler32", _adler32.New(), _adler32.New()}, {"buzhash32", buzhash32.New(), buzhash32.New()}, {"buzhash64", buzhash64.New(), buzhash64.New()}, {"bozo32", bozo32.New(), bozo32.New()}, {"rabinkarp64", rabinkarp64.New(), rabinkarp64.New()}, } // Gets the hash sum as a uint64 func sum64(h hash.Hash) (res uint64) { buf := make([]byte, 0, 8) s := h.Sum(buf) for _, b := range s { res <<= 8 res |= uint64(b) } return } // Compute the hash by creating a byte slice with an additionnal '\0' at // the beginning, writing the slice without the last byte, and then // rolling the last byte. func SumByWriteAndRoll(h rollinghash.Hash, b []byte) uint64 { q := []byte("\x00") q = append(q, b...) h.Reset() h.Write(q[:len(q)-1]) h.Roll(q[len(q)-1]) return sum64(h) } // Compute the hash the classic way func SumByWriteOnly(h hash.Hash, b []byte) uint64 { h.Reset() h.Write(b) return sum64(h) } // Create some random slice (length betwen 0 and 8KB, random content) func RandomBytes() (res []byte) { n := rand.Intn(8192) res = make([]byte, n) rand.Read(res) return res } // Verify that, on random inputs, the classic hash and the rollinghash // return the same values func blackBox(t *testing.T, hashname string, classic hash.Hash, rolling rollinghash.Hash) { for i := 0; i < 1000; i++ { in := RandomBytes() if len(in) > 0 { sum := SumByWriteAndRoll(rolling, in) ref := SumByWriteOnly(classic, in) if ref != sum { t.Errorf("[%s] Expected 0x%x, got 0x%x", hashname, ref, sum) } } } } // Roll a window of 16 bytes with a classic hash and a rolling hash and // compare the results func foxDog(t *testing.T, hashname string, classic hash.Hash, rolling rollinghash.Hash) { s := []byte("The quick brown fox jumps over the lazy dog") // Window len n := 16 // Load the window into the rolling hash rolling.Write(s[:n]) // Roll it and compare the result with full re-calculus every time for i := n; i < len(s); i++ { // Reset and write the window in classic classic.Reset() classic.Write(s[i-n+1 : i+1]) // Roll the incoming byte in rolling rolling.Roll(s[i]) // Compare the hashes sumc := sum64(classic) sumr := sum64(rolling) if sumc != sumr { t.Errorf("[%s] %v: expected %x, got %x", hashname, s[i-n+1:i+1], sumc, sumr) } } } func rollEmptyWindow(t *testing.T, hashname string, classic hash.Hash, rolling rollinghash.Hash) { defer func() { if r := recover(); r == nil { t.Errorf("[%s] Rolling an empty window should cause a panic", hashname) } }() // This should panic rolling.Roll(byte('x')) } func writeTwice(t *testing.T, hashname string, classic hash.Hash, rolling rollinghash.Hash) { rolling.Write([]byte("hello ")) rolling.Write([]byte("world")) classic.Write([]byte("hello world")) if sum64(rolling) != sum64(classic) { t.Errorf("[%s] Expected same results on rolling and classic", hashname) } } func writeRollWrite(t *testing.T, hashname string, classic hash.Hash, rolling rollinghash.Hash) { rolling.Write([]byte(" hello")) rolling.Roll(byte(' ')) rolling.Write([]byte("world")) classic.Write([]byte("hello world")) if sum64(rolling) != sum64(classic) { t.Errorf("[%s] Expected same results on rolling and classic", hashname) } } func writeThenWriteNothing(t *testing.T, hashname string, classic hash.Hash, rolling rollinghash.Hash) { rolling.Write([]byte("hello")) rolling.Write([]byte("")) classic.Write([]byte("hello")) if sum64(rolling) != sum64(classic) { t.Errorf("[%s] Expected same results on rolling and classic", hashname) } } func writeNothing(t *testing.T, hashname string, classic hash.Hash, rolling rollinghash.Hash) { rolling.Write([]byte("")) if sum64(rolling) != sum64(classic) { t.Errorf("[%s] Expected same results on rolling and classic", hashname) } } func TestFoxDog(t *testing.T) { for _, h := range allHashes { h.classic.Reset() h.rolling.Reset() foxDog(t, h.name, h.classic, h.rolling) } } func TestBlackBox(t *testing.T) { for _, h := range allHashes { h.classic.Reset() h.rolling.Reset() blackBox(t, h.name, h.classic, h.rolling) } } func TestRollEmptyWindow(t *testing.T) { for _, h := range allHashes { h.classic.Reset() h.rolling.Reset() rollEmptyWindow(t, h.name, h.classic, h.rolling) } } func TestwriteTwice(t *testing.T) { for _, h := range allHashes { h.classic.Reset() h.rolling.Reset() writeTwice(t, h.name, h.classic, h.rolling) } } func TestwriteRollWrite(t *testing.T) { for _, h := range allHashes { h.classic.Reset() h.rolling.Reset() writeRollWrite(t, h.name, h.classic, h.rolling) } } func TestWriteThenWriteNothing(t *testing.T) { for _, h := range allHashes { h.classic.Reset() h.rolling.Reset() writeThenWriteNothing(t, h.name, h.classic, h.rolling) } } func TestWriteNothing(t *testing.T) { for _, h := range allHashes { h.classic.Reset() h.rolling.Reset() writeNothing(t, h.name, h.classic, h.rolling) } }