pax_global_header00006660000000000000000000000064145522731600014517gustar00rootroot0000000000000052 comment=12bec1d9b9c8d1736984bbd6f4165e15cafb2e17 go-sieve-0.2.1/000077500000000000000000000000001455227316000132355ustar00rootroot00000000000000go-sieve-0.2.1/.gitignore000066400000000000000000000012341455227316000152250ustar00rootroot00000000000000# Compiled Object files, Static and Dynamic libs (Shared Objects) *.o *.a *.so # Folders _obj _test # Architecture specific extensions/prefixes *.[568vq] [568vq].out *.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* _testmain.go *.exe *.test *.prof bin/* .*.sw? .idea logs/* # gg ignores vendor/src/* vendor/pkg/* servers.iml *.DS_Store # vagrant ignores tools/vagrant/.vagrant tools/vagrant/adsrv-conf/.frontend tools/vagrant/adsrv-conf/.bidder tools/vagrant/adsrv-conf/.transcoder tools/vagrant/redis-cluster-conf/7777/nodes.conf tools/vagrant/redis-cluster-conf/7778/nodes.conf tools/vagrant/redis-cluster-conf/7779/nodes.conf *.aof *.rdb *.deb go-sieve-0.2.1/LICENSE000066400000000000000000000024241455227316000142440ustar00rootroot00000000000000BSD 2-Clause License Copyright (c) 2024, Sudhi Herle Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. go-sieve-0.2.1/README.md000066400000000000000000000004651455227316000145210ustar00rootroot00000000000000# go-sieve - SIEVE is simpler than LRU ## What is it? `go-sieve` is golang implementation of the [SIEVE](https://yazhuozhang.com/assets/pdf/nsdi24-sieve.pdf) cache eviction algorithm. This implementation closely follows the paper's pseudo-code - but uses golang generics to provide an ergonomic interface. go-sieve-0.2.1/assert_test.go000066400000000000000000000014071455227316000161260ustar00rootroot00000000000000// assert_test.go - utility function for tests // // (c) 2024 Sudhi Herle // // Licensing Terms: GPLv2 // // If you need a commercial license for this work, please contact // the author. // // This software does not come with any express or implied // warranty; it is provided "as is". No claim is made to its // suitability for any purpose. package sieve_test import ( "fmt" "runtime" "testing" ) func newAsserter(t *testing.T) func(cond bool, msg string, args ...interface{}) { return func(cond bool, msg string, args ...interface{}) { if cond { return } _, file, line, ok := runtime.Caller(1) if !ok { file = "???" line = 0 } s := fmt.Sprintf(msg, args...) t.Fatalf("%s: %d: Assertion failed: %s\n", file, line, s) } } go-sieve-0.2.1/go.mod000066400000000000000000000000601455227316000143370ustar00rootroot00000000000000module github.com/opencoff/go-sieve go 1.21.1 go-sieve-0.2.1/go.sum000066400000000000000000000000001455227316000143560ustar00rootroot00000000000000go-sieve-0.2.1/sieve.go000066400000000000000000000155171455227316000147100ustar00rootroot00000000000000// sieve.go - SIEVE - a simple and efficient cache // // (c) 2024 Sudhi Herle // // Copyright 2024- Sudhi Herle // License: BSD-2-Clause // // If you need a commercial license for this work, please contact // the author. // // This software does not come with any express or implied // warranty; it is provided "as is". No claim is made to its // suitability for any purpose. // This is golang implementation of the SIEVE cache eviction algorithm // The original paper is: // https://yazhuozhang.com/assets/pdf/nsdi24-sieve.pdf // // This implementation closely follows the paper - but uses golang generics // for an ergonomic interface. // Package sieve implements the SIEVE cache eviction algorithm. // SIEVE stands in contrast to other eviction algorithms like LRU, 2Q, ARC // with its simplicity. The original paper is in: // https://yazhuozhang.com/assets/pdf/nsdi24-sieve.pdf // // SIEVE is built on a FIFO queue - with an extra pointer (called "hand") in // the paper. This "hand" plays a crucial role in determining who to evict // next. package sieve import ( "fmt" "strings" "sync" "sync/atomic" ) // node contains the tuple as a node in a linked list. type node[K comparable, V any] struct { sync.Mutex key K val V visited atomic.Bool next *node[K, V] prev *node[K, V] } // Sieve represents a cache mapping the key of type 'K' with // a value of type 'V'. The type 'K' must implement the // comparable trait. An instance of Sieve has a fixed max capacity; // new additions to the cache beyond the capacity will cause cache // eviction of other entries - as determined by the SIEVE algorithm. type Sieve[K comparable, V any] struct { mu sync.Mutex cache *syncMap[K, *node[K, V]] head *node[K, V] tail *node[K, V] hand *node[K, V] size int capacity int pool *syncPool[node[K, V]] } // New creates a new cache of size 'capacity' mapping key 'K' to value 'V' func New[K comparable, V any](capacity int) *Sieve[K, V] { s := &Sieve[K, V]{ cache: newSyncMap[K, *node[K, V]](), capacity: capacity, pool: newSyncPool[node[K, V]](), } return s } // Get fetches the value for a given key in the cache. // It returns true if the key is in the cache, false otherwise. // The zero value for 'V' is returned when key is not in the cache. func (s *Sieve[K, V]) Get(key K) (V, bool) { if v, ok := s.cache.Get(key); ok { v.visited.Store(true) return v.val, true } var x V return x, false } // Add adds a new element to the cache or overwrite one if it exists // Return true if we replaced, false otherwise func (s *Sieve[K, V]) Add(key K, val V) bool { if v, ok := s.cache.Get(key); ok { v.visited.Store(true) v.Lock() v.val = val v.Unlock() return true } s.mu.Lock() s.add(key, val) s.mu.Unlock() return false } // Probe adds if not present in the cache. // Returns: // // when key is present in the cache // when key is not present in the cache func (s *Sieve[K, V]) Probe(key K, val V) (V, bool) { if v, ok := s.cache.Get(key); ok { v.visited.Store(true) return v.val, true } s.mu.Lock() s.add(key, val) s.mu.Unlock() return val, false } // Delete deletes the named key from the cache // It returns true if the item was in the cache and false otherwise func (s *Sieve[K, V]) Delete(key K) bool { if v, ok := s.cache.Del(key); ok { s.mu.Lock() s.remove(v) s.mu.Unlock() return true } return false } // Purge resets the cache func (s *Sieve[K, V]) Purge() { s.mu.Lock() s.cache = newSyncMap[K, *node[K, V]]() s.head = nil s.tail = nil s.mu.Unlock() } // Len returns the current cache utilization func (s *Sieve[K, V]) Len() int { return s.size } // Cap returns the max cache capacity func (s *Sieve[K, V]) Cap() int { return s.capacity } // String returns a string description of the sieve cache func (s *Sieve[K, V]) String() string { s.mu.Lock() m := s.desc() s.mu.Unlock() return m } // Dump dumps all the cache contents as a newline delimited // string. func (s *Sieve[K, V]) Dump() string { var b strings.Builder s.mu.Lock() b.WriteString(s.desc()) b.WriteRune('\n') for n := s.head; n != nil; n = n.next { h := " " if n == s.hand { h = ">>" } b.WriteString(fmt.Sprintf("%svisited=%v, key=%v, val=%v\n", h, n.visited.Load(), n.key, n.val)) } s.mu.Unlock() return b.String() } // -- internal methods -- // add a new tuple to the cache and evict as necessary // caller must hold lock. func (s *Sieve[K, V]) add(key K, val V) { // cache miss; we evict and fnd a new node if s.size == s.capacity { s.evict() } n := s.newNode(key, val) // Eviction is guaranteed to remove one node; so this should never happen. if n == nil { msg := fmt.Sprintf("%T: add <%v>: objpool empty after eviction", s, key) panic(msg) } s.cache.Put(key, n) // insert at the head of the list n.next = s.head n.prev = nil if s.head != nil { s.head.prev = n } s.head = n if s.tail == nil { s.tail = n } s.size += 1 } // evict an item from the cache. // NB: Caller must hold the lock func (s *Sieve[K, V]) evict() { hand := s.hand if hand == nil { hand = s.tail } for hand != nil { if !hand.visited.Load() { s.cache.Del(hand.key) s.remove(hand) s.hand = hand.prev return } hand.visited.Store(false) hand = hand.prev // wrap around and start again if hand == nil { hand = s.tail } } s.hand = hand } func (s *Sieve[K, V]) remove(n *node[K, V]) { s.size -= 1 // remove node from list if n.prev != nil { n.prev.next = n.next } else { s.head = n.next } if n.next != nil { n.next.prev = n.prev } else { s.tail = n.prev } s.pool.Put(n) } func (s *Sieve[K, V]) newNode(key K, val V) *node[K, V] { n := s.pool.Get() n.key, n.val = key, val n.next, n.prev = nil, nil n.visited.Store(false) return n } // desc describes the properties of the sieve func (s *Sieve[K, V]) desc() string { m := fmt.Sprintf("cache<%T>: size %d, cap %d, head=%p, tail=%p, hand=%p", s, s.size, s.capacity, s.head, s.tail, s.hand) return m } // Generic sync.Pool type syncPool[T any] struct { pool sync.Pool } func newSyncPool[T any]() *syncPool[T] { p := &syncPool[T]{ pool: sync.Pool{ New: func() any { return new(T) }, }, } return p } func (s *syncPool[T]) Get() *T { p := s.pool.Get() return p.(*T) } func (s *syncPool[T]) Put(n *T) { s.pool.Put(n) } // generic sync.Map type syncMap[K comparable, V any] struct { m sync.Map } func newSyncMap[K comparable, V any]() *syncMap[K, V] { m := syncMap[K, V]{} return &m } func (m *syncMap[K, V]) Get(key K) (V, bool) { v, ok := m.m.Load(key) if ok { return v.(V), true } var z V return z, false } func (m *syncMap[K, V]) Put(key K, val V) { m.m.Store(key, val) } func (m *syncMap[K, V]) Del(key K) (V, bool) { x, ok := m.m.LoadAndDelete(key) if ok { return x.(V), true } var z V return z, false } go-sieve-0.2.1/sieve_bench_test.go000066400000000000000000000025441455227316000171020ustar00rootroot00000000000000// sieve_bench_test.go -- benchmark testing // // (c) 2024 Sudhi Herle // // Copyright 2024- Sudhi Herle // License: BSD-2-Clause // // If you need a commercial license for this work, please contact // the author. // // This software does not come with any express or implied // warranty; it is provided "as is". No claim is made to its // suitability for any purpose. package sieve_test import ( "math/rand" "sync/atomic" "testing" "github.com/opencoff/go-sieve" ) func BenchmarkSieve_Add(b *testing.B) { c := sieve.New[int, int](8192) ent := make([]int, b.N) for i := 0; i < b.N; i++ { var k int if i%2 == 0 { k = int(rand.Int63() % 16384) } else { k = int(rand.Int63() % 32768) } ent[i] = k } b.ResetTimer() for i := 0; i < b.N; i++ { k := ent[i] c.Add(k, k) } } func BenchmarkSieve_Get(b *testing.B) { c := sieve.New[int, int](8192) ent := make([]int, b.N) for i := 0; i < b.N; i++ { var k int if i%2 == 0 { k = int(rand.Int63() % 16384) } else { k = int(rand.Int63() % 32768) } c.Add(k, k) ent[i] = k } b.ResetTimer() var hit, miss int64 for i := 0; i < b.N; i++ { if _, ok := c.Get(ent[i]); ok { atomic.AddInt64(&hit, 1) } else { atomic.AddInt64(&miss, 1) } } b.Logf("%d: hit %d, miss %d, ratio %4.2f", b.N, hit, miss, float64(hit)/float64(hit+miss)) } go-sieve-0.2.1/sieve_test.go000066400000000000000000000122011455227316000157320ustar00rootroot00000000000000// sieve_test.go - test harness for sieve cache // // (c) 2024 Sudhi Herle // // Copyright 2024- Sudhi Herle // License: BSD-2-Clause // // If you need a commercial license for this work, please contact // the author. // // This software does not come with any express or implied // warranty; it is provided "as is". No claim is made to its // suitability for any purpose. package sieve_test import ( "encoding/binary" "fmt" "math/rand" "runtime" "strings" "sync" "sync/atomic" "testing" "time" "github.com/opencoff/go-sieve" ) func TestBasic(t *testing.T) { assert := newAsserter(t) s := sieve.New[int, string](4) ok := s.Add(1, "hello") assert(!ok, "empty cache: expected clean add of 1") ok = s.Add(2, "foo") assert(!ok, "empty cache: expected clean add of 2") ok = s.Add(3, "bar") assert(!ok, "empty cache: expected clean add of 3") ok = s.Add(4, "gah") assert(!ok, "empty cache: expected clean add of 4") ok = s.Add(1, "world") assert(ok, "key 1: expected to replace") ok = s.Add(5, "boo") assert(!ok, "adding 5: expected to be new add") _, ok = s.Get(2) assert(!ok, "evict: expected 2 to be evicted") } func TestEvictAll(t *testing.T) { assert := newAsserter(t) size := 128 s := sieve.New[int, string](size) for i := 0; i < size*2; i++ { val := fmt.Sprintf("val %d", i) _, ok := s.Probe(i, val) assert(!ok, "%d: exp new add", i) } // the first half should've been all evicted for i := 0; i < size; i++ { _, ok := s.Get(i) assert(!ok, "%d: exp to be evicted", i) } // leaving the second half intact for i := size; i < size*2; i++ { ok := s.Delete(i) assert(ok, "%d: exp del on existing cache elem") } } func TestAllOps(t *testing.T) { size := 8192 vals := randints(size * 3) s := sieve.New[uint64, uint64](size) for i := range vals { k := vals[i] s.Add(k, k) } vals = shuffle(vals) var hit, miss int for i := range vals { k := vals[i] _, ok := s.Get(k) if ok { hit++ } else { miss++ } } t.Logf("%d items: hit %d, miss %d, ratio %4.2f\n", len(vals), hit, miss, float64(hit)/float64(hit+miss)) } type timing struct { typ string d time.Duration hit, miss uint64 } type barrier atomic.Uint64 func (b *barrier) Wait() { v := (*atomic.Uint64)(b) for { if v.Load() == 1 { return } runtime.Gosched() } } func (b *barrier) Signal() { v := (*atomic.Uint64)(b) v.Store(1) } func TestSpeed(t *testing.T) { size := 32768 vals := randints(size * 3) //valr := shuffle(vals) // we will start 4 types of workers: add, get, del, probe // each worker will be working on a shuffled version of // the uint64 array. for ncpu := 2; ncpu <= 32; ncpu *= 2 { var wg sync.WaitGroup wg.Add(ncpu) s := sieve.New[uint64, uint64](size) var bar barrier // number of workers of each type m := ncpu / 2 ch := make(chan timing, m) for i := 0; i < m; i++ { go func(ch chan timing, wg *sync.WaitGroup) { var hit, miss uint64 bar.Wait() st := time.Now() // shuffled array for _, x := range vals { v := x % 16384 if _, ok := s.Get(v); ok { hit++ } else { miss++ } } d := time.Now().Sub(st) ch <- timing{ typ: "get", d: d, hit: hit, miss: miss, } wg.Done() }(ch, &wg) go func(ch chan timing, wg *sync.WaitGroup) { var hit, miss uint64 bar.Wait() st := time.Now() for _, x := range vals { v := x % 16384 if _, ok := s.Probe(v, v); ok { hit++ } else { miss++ } } d := time.Now().Sub(st) ch <- timing{ typ: "probe", d: d, hit: hit, miss: miss, } wg.Done() }(ch, &wg) } bar.Signal() // wait for goroutines to end and close the chan go func() { wg.Wait() close(ch) }() // now harvest timing times := map[string]timing{} for tm := range ch { if v, ok := times[tm.typ]; ok { z := (int64(v.d) + int64(tm.d)) / 2 v.d = time.Duration(z) v.hit = (v.hit + tm.hit) / 2 v.miss = (v.miss + tm.miss) / 2 times[tm.typ] = v } else { times[tm.typ] = tm } } var out strings.Builder fmt.Fprintf(&out, "Tot CPU %d, workers/type %d %d elems\n", ncpu, m, len(vals)) for _, v := range times { var ratio string ns := toNs(int64(v.d), len(vals), m) ratio = hitRatio(v.hit, v.miss) fmt.Fprintf(&out, "%6s %4.2f ns/op%s\n", v.typ, ns, ratio) } t.Logf(out.String()) } } func dup[T ~[]E, E any](v T) []E { n := len(v) g := make([]E, n) copy(g, v) return g } func shuffle[T ~[]E, E any](v T) []E { i := len(v) for i--; i >= 0; i-- { j := rand.Intn(i + 1) v[i], v[j] = v[j], v[i] } return v } func toNs(tot int64, nvals, ncpu int) float64 { return (float64(tot) / float64(nvals)) / float64(ncpu) } func hitRatio(hit, miss uint64) string { r := float64(hit) / float64(hit+miss) return fmt.Sprintf(" hit-ratio %4.2f (hit %d, miss %d)", r, hit, miss) } func randints(sz int) []uint64 { var b [8]byte v := make([]uint64, sz) for i := 0; i < sz; i++ { n, err := rand.Read(b[:]) if n != 8 || err != nil { panic("can't generate rand") } v[i] = binary.BigEndian.Uint64(b[:]) } return v }