pax_global_header00006660000000000000000000000064135633050470014520gustar00rootroot0000000000000052 comment=99d1bbbf28e64530eb246be0568fc7709a35ebdd ristretto-0.0.1/000077500000000000000000000000001356330504700135555ustar00rootroot00000000000000ristretto-0.0.1/.deepsource.toml000066400000000000000000000002751356330504700166720ustar00rootroot00000000000000version = 1 test_patterns = [ '**/*_test.go' ] exclude_patterns = [ ] [[analyzers]] name = 'go' enabled = true [analyzers.meta] import_path = 'github.com/dgraph-io/ristretto' ristretto-0.0.1/.github/000077500000000000000000000000001356330504700151155ustar00rootroot00000000000000ristretto-0.0.1/.github/CODEOWNERS000066400000000000000000000003161356330504700165100ustar00rootroot00000000000000# CODEOWNERS info: https://help.github.com/en/articles/about-code-owners # Owners are automatically requested for review for PRs that changes code # that they own. * @manishrjain @jarifibrahim @karlmcguire ristretto-0.0.1/.github/workflows/000077500000000000000000000000001356330504700171525ustar00rootroot00000000000000ristretto-0.0.1/.github/workflows/ci.yml000066400000000000000000000006561356330504700202770ustar00rootroot00000000000000name: tests on: [push, pull_request] jobs: ci: strategy: matrix: go-version: [1.12.x, 1.13.x] platform: [ubuntu-latest] name: CI runs-on: ${{ matrix.platform }} steps: - uses: actions/checkout@v1 - uses: actions/setup-go@v1 with: go-version: ${{ matrix.go-version }} - run: go fmt ./... - run: go test -race ./... - run: go test -v ./... ristretto-0.0.1/LICENSE000066400000000000000000000236751356330504700145770ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS ristretto-0.0.1/README.md000066400000000000000000000205341356330504700150400ustar00rootroot00000000000000# Ristretto [![Go Doc](https://img.shields.io/badge/godoc-reference-blue.svg)](http://godoc.org/github.com/dgraph-io/ristretto) [![Go Report Card](https://img.shields.io/badge/go%20report-A%2B-brightgreen)](https://goreportcard.com/report/github.com/dgraph-io/ristretto) [![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen)](https://gocover.io/github.com/dgraph-io/ristretto) ![Tests](https://github.com/dgraph-io/ristretto/workflows/tests/badge.svg) Ristretto is a fast, concurrent cache library built with a focus on performance and correctness. The motivation to build Ristretto comes from the need for a contention-free cache in [Dgraph][]. [Dgraph]: https://github.com/dgraph-io/dgraph ## Features * **High Hit Ratios** - with our unique admission/eviction policy pairing, Ristretto's performance is best in class. * **Eviction: SampledLFU** - on par with exact LRU and better performance on Search and Database traces. * **Admission: TinyLFU** - extra performance with little memory overhead (12 bits per counter). * **Fast Throughput** - we use a variety of techniques for managing contention and the result is excellent throughput. * **Cost-Based Eviction** - any large new item deemed valuable can evict multiple smaller items (cost could be anything). * **Fully Concurrent** - you can use as many goroutines as you want with little throughput degradation. * **Metrics** - optional performance metrics for throughput, hit ratios, and other stats. * **Simple API** - just figure out your ideal `Config` values and you're off and running. ## Status Ristretto is usable but still under active development. We expect it to be production ready in the near future. ## Table of Contents * [Usage](#Usage) * [Example](#Example) * [Config](#Config) * [NumCounters](#Config) * [MaxCost](#Config) * [BufferItems](#Config) * [Metrics](#Config) * [OnEvict](#Config) * [KeyToHash](#Config) * [Cost](#Config) * [Benchmarks](#Benchmarks) * [Hit Ratios](#Hit-Ratios) * [Search](#Search) * [Database](#Database) * [Looping](#Looping) * [CODASYL](#CODASYL) * [Throughput](#Throughput) * [Mixed](#Mixed) * [Read](#Read) * [Write](#Write) * [FAQ](#FAQ) ## Usage ### Example ```go func main() { cache, err := ristretto.NewCache(&ristretto.Config{ NumCounters: 1e7, // number of keys to track frequency of (10M). MaxCost: 1 << 30, // maximum cost of cache (1GB). BufferItems: 64, // number of keys per Get buffer. }) if err != nil { panic(err) } // set a value with a cost of 1 cache.Set("key", "value", 1) // wait for value to pass through buffers time.Sleep(10 * time.Millisecond) value, found := cache.Get("key") if !found { panic("missing value") } fmt.Println(value) cache.Del("key") } ``` ### Config The `Config` struct is passed to `NewCache` when creating Ristretto instances (see the example above). **NumCounters** `int64` NumCounters is the number of 4-bit access counters to keep for admission and eviction. We've seen good performance in setting this to 10x the number of items you expect to keep in the cache when full. For example, if you expect each item to have a cost of 1 and MaxCost is 100, set NumCounters to 1,000. Or, if you use variable cost values but expect the cache to hold around 10,000 items when full, set NumCounters to 100,000. The important thing is the *number of unique items* in the full cache, not necessarily the MaxCost value. **MaxCost** `int64` MaxCost is how eviction decisions are made. For example, if MaxCost is 100 and a new item with a cost of 1 increases total cache cost to 101, 1 item will be evicted. MaxCost can also be used to denote the max size in bytes. For example, if MaxCost is 1,000,000 (1MB) and the cache is full with 1,000 1KB items, a new item (that's accepted) would cause 5 1KB items to be evicted. MaxCost could be anything as long as it matches how you're using the cost values when calling Set. **BufferItems** `int64` BufferItems is the size of the Get buffers. The best value we've found for this is 64. If for some reason you see Get performance decreasing with lots of contention (you shouldn't), try increasing this value in increments of 64. This is a fine-tuning mechanism and you probably won't have to touch this. **Metrics** `bool` Metrics is true when you want real-time logging of a variety of stats. The reason this is a Config flag is because there's a 10% throughput performance overhead. **OnEvict** `func(hashes [2]uint64, value interface{}, cost int64)` OnEvict is called for every eviction. **KeyToHash** `func(key interface{}) [2]uint64` KeyToHash is the hashing algorithm used for every key. If this is nil, Ristretto has a variety of [defaults depending on the underlying interface type](https://github.com/dgraph-io/ristretto/blob/master/z/z.go#L19-L41). Note that if you want 128bit hashes you should use the full `[2]uint64`, otherwise just fill the `uint64` at the `0` position and it will behave like any 64bit hash. **Cost** `func(value interface{}) int64` Cost is an optional function you can pass to the Config in order to evaluate item cost at runtime, and only for the Set calls that aren't dropped (this is useful if calculating item cost is particularly expensive and you don't want to waste time on items that will be dropped anyways). To signal to Ristretto that you'd like to use this Cost function: 1. Set the Cost field to a non-nil function. 2. When calling Set for new items or item updates, use a `cost` of 0. ## Benchmarks The benchmarks can be found in https://github.com/dgraph-io/benchmarks/tree/master/cachebench/ristretto. ### Hit Ratios #### Search This trace is described as "disk read accesses initiated by a large commercial search engine in response to various web search requests."

#### Database This trace is described as "a database server running at a commercial site running an ERP application on top of a commercial database."

#### Looping This trace demonstrates a looping access pattern.

#### CODASYL This trace is described as "references to a CODASYL database for a one hour period."

### Throughput All throughput benchmarks were ran on an Intel Core i7-8700K (3.7GHz) with 16gb of RAM. #### Mixed

#### Read

#### Write

## FAQ ### How are you achieving this performance? What shortcuts are you taking? We go into detail in the [Ristretto blog post](https://blog.dgraph.io/post/introducing-ristretto-high-perf-go-cache/), but in short: our throughput performance can be attributed to a mix of batching and eventual consistency. Our hit ratio performance is mostly due to an excellent [admission policy](https://arxiv.org/abs/1512.00727) and SampledLFU eviction policy. As for "shortcuts," the only thing Ristretto does that could be construed as one is dropping some Set calls. That means a Set call for a new item (updates are guaranteed) isn't guaranteed to make it into the cache. The new item could be dropped at two points: when passing through the Set buffer or when passing through the admission policy. However, this doesn't affect hit ratios much at all as we expect the most popular items to be Set multiple times and eventually make it in the cache. ### Is Ristretto distributed? No, it's just like any other Go library that you can import into your project and use in a single process. ristretto-0.0.1/cache.go000066400000000000000000000334441356330504700151570ustar00rootroot00000000000000/* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // Ristretto is a fast, fixed size, in-memory cache with a dual focus on // throughput and hit ratio performance. You can easily add Ristretto to an // existing system and keep the most valuable data where you need it. package ristretto import ( "bytes" "errors" "fmt" "sync/atomic" "github.com/dgraph-io/ristretto/z" ) const ( // TODO: find the optimal value for this or make it configurable setBufSize = 32 * 1024 ) // Cache is a thread-safe implementation of a hashmap with a TinyLFU admission // policy and a Sampled LFU eviction policy. You can use the same Cache instance // from as many goroutines as you want. type Cache struct { // store is the central concurrent hashmap where key-value items are stored store store // policy determines what gets let in to the cache and what gets kicked out policy policy // getBuf is a custom ring buffer implementation that gets pushed to when // keys are read getBuf *ringBuffer // setBuf is a buffer allowing us to batch/drop Sets during times of high // contention setBuf chan *item // onEvict is called for item evictions onEvict func(uint64, uint64, interface{}, int64) // KeyToHash function is used to customize the key hashing algorithm. // Each key will be hashed using the provided function. If keyToHash value // is not set, the default keyToHash function is used. keyToHash func(interface{}) (uint64, uint64) // stop is used to stop the processItems goroutine stop chan struct{} // cost calculates cost from a value cost func(value interface{}) int64 // Metrics contains a running log of important statistics like hits, misses, // and dropped items Metrics *Metrics } // Config is passed to NewCache for creating new Cache instances. type Config struct { // NumCounters determines the number of counters (keys) to keep that hold // access frequency information. It's generally a good idea to have more // counters than the max cache capacity, as this will improve eviction // accuracy and subsequent hit ratios. // // For example, if you expect your cache to hold 1,000,000 items when full, // NumCounters should be 10,000,000 (10x). Each counter takes up 4 bits, so // keeping 10,000,000 counters would require 5MB of memory. NumCounters int64 // MaxCost can be considered as the cache capacity, in whatever units you // choose to use. // // For example, if you want the cache to have a max capacity of 100MB, you // would set MaxCost to 100,000,000 and pass an item's number of bytes as // the `cost` parameter for calls to Set. If new items are accepted, the // eviction process will take care of making room for the new item and not // overflowing the MaxCost value. MaxCost int64 // BufferItems determines the size of Get buffers. // // Unless you have a rare use case, using `64` as the BufferItems value // results in good performance. BufferItems int64 // Metrics determines whether cache statistics are kept during the cache's // lifetime. There *is* some overhead to keeping statistics, so you should // only set this flag to true when testing or throughput performance isn't a // major factor. Metrics bool // OnEvict is called for every eviction and passes the hashed key, value, // and cost to the function. OnEvict func(key, conflict uint64, value interface{}, cost int64) // KeyToHash function is used to customize the key hashing algorithm. // Each key will be hashed using the provided function. If keyToHash value // is not set, the default keyToHash function is used. KeyToHash func(key interface{}) (uint64, uint64) // Cost evaluates a value and outputs a corresponding cost. This function // is ran after Set is called for a new item or an item update with a cost // param of 0. Cost func(value interface{}) int64 } type itemFlag byte const ( itemNew itemFlag = iota itemDelete itemUpdate ) // item is passed to setBuf so items can eventually be added to the cache type item struct { flag itemFlag key uint64 conflict uint64 value interface{} cost int64 } // NewCache returns a new Cache instance and any configuration errors, if any. func NewCache(config *Config) (*Cache, error) { switch { case config.NumCounters == 0: return nil, errors.New("NumCounters can't be zero.") case config.MaxCost == 0: return nil, errors.New("MaxCost can't be zero.") case config.BufferItems == 0: return nil, errors.New("BufferItems can't be zero.") } policy := newPolicy(config.NumCounters, config.MaxCost) cache := &Cache{ store: newStore(), policy: policy, getBuf: newRingBuffer(policy, config.BufferItems), setBuf: make(chan *item, setBufSize), onEvict: config.OnEvict, keyToHash: config.KeyToHash, stop: make(chan struct{}), cost: config.Cost, } if cache.keyToHash == nil { cache.keyToHash = z.KeyToHash } if config.Metrics { cache.collectMetrics() } // NOTE: benchmarks seem to show that performance decreases the more // goroutines we have running cache.processItems(), so 1 should // usually be sufficient go cache.processItems() return cache, nil } // Get returns the value (if any) and a boolean representing whether the // value was found or not. The value can be nil and the boolean can be true at // the same time. func (c *Cache) Get(key interface{}) (interface{}, bool) { if c == nil || key == nil { return nil, false } keyHash, conflictHash := c.keyToHash(key) c.getBuf.Push(keyHash) value, ok := c.store.Get(keyHash, conflictHash) if ok { c.Metrics.add(hit, keyHash, 1) } else { c.Metrics.add(miss, keyHash, 1) } return value, ok } // Set attempts to add the key-value item to the cache. If it returns false, // then the Set was dropped and the key-value item isn't added to the cache. If // it returns true, there's still a chance it could be dropped by the policy if // its determined that the key-value item isn't worth keeping, but otherwise the // item will be added and other items will be evicted in order to make room. // // To dynamically evaluate the items cost using the Config.Coster function, set // the cost parameter to 0 and Coster will be ran when needed in order to find // the items true cost. func (c *Cache) Set(key, value interface{}, cost int64) bool { if c == nil || key == nil { return false } keyHash, conflictHash := c.keyToHash(key) i := &item{ flag: itemNew, key: keyHash, conflict: conflictHash, value: value, cost: cost, } // attempt to immediately update hashmap value and set flag to update so the // cost is eventually updated if c.store.Update(keyHash, conflictHash, i.value) { i.flag = itemUpdate } // attempt to send item to policy select { case c.setBuf <- i: return true default: c.Metrics.add(dropSets, keyHash, 1) return false } } // Del deletes the key-value item from the cache if it exists. func (c *Cache) Del(key interface{}) { if c == nil || key == nil { return } keyHash, conflictHash := c.keyToHash(key) c.setBuf <- &item{ flag: itemDelete, key: keyHash, conflict: conflictHash, } } // Close stops all goroutines and closes all channels. func (c *Cache) Close() { // block until processItems goroutine is returned c.stop <- struct{}{} close(c.stop) close(c.setBuf) c.policy.Close() } // Clear empties the hashmap and zeroes all policy counters. Note that this is // not an atomic operation (but that shouldn't be a problem as it's assumed that // Set/Get calls won't be occurring until after this). func (c *Cache) Clear() { // block until processItems goroutine is returned c.stop <- struct{}{} // swap out the setBuf channel c.setBuf = make(chan *item, setBufSize) // clear value hashmap and policy data c.policy.Clear() c.store.Clear() // only reset metrics if they're enabled if c.Metrics != nil { c.Metrics.Clear() } // restart processItems goroutine go c.processItems() } // processItems is ran by goroutines processing the Set buffer. func (c *Cache) processItems() { for { select { case i := <-c.setBuf: // calculate item cost value if new or update if i.cost == 0 && c.cost != nil && i.flag != itemDelete { i.cost = c.cost(i.value) } switch i.flag { case itemNew: victims, added := c.policy.Add(i.key, i.cost) if added { c.store.Set(i.key, i.conflict, i.value) c.Metrics.add(keyAdd, i.key, 1) c.Metrics.add(costAdd, i.key, uint64(i.cost)) } for _, victim := range victims { victim.conflict, victim.value = c.store.Del(victim.key, 0) if c.onEvict != nil { c.onEvict(victim.key, victim.conflict, victim.value, victim.cost) } c.Metrics.add(keyEvict, victim.key, 1) c.Metrics.add(costEvict, victim.key, uint64(victim.cost)) } case itemUpdate: c.policy.Update(i.key, i.cost) case itemDelete: c.policy.Del(i.key) c.store.Del(i.key, i.conflict) } case <-c.stop: return } } } // collectMetrics just creates a new *Metrics instance and adds the pointers // to the cache and policy instances. func (c *Cache) collectMetrics() { c.Metrics = newMetrics() c.policy.CollectMetrics(c.Metrics) } type metricType int const ( // The following 2 keep track of hits and misses. hit = iota miss // The following 3 keep track of number of keys added, updated and evicted. keyAdd keyUpdate keyEvict // The following 2 keep track of cost of keys added and evicted. costAdd costEvict // The following keep track of how many sets were dropped or rejected later. dropSets rejectSets // The following 2 keep track of how many gets were kept and dropped on the // floor. dropGets keepGets // This should be the final enum. Other enums should be set before this. doNotUse ) func stringFor(t metricType) string { switch t { case hit: return "hit" case miss: return "miss" case keyAdd: return "keys-added" case keyUpdate: return "keys-updated" case keyEvict: return "keys-evicted" case costAdd: return "cost-added" case costEvict: return "cost-evicted" case dropSets: return "sets-dropped" case rejectSets: return "sets-rejected" // by policy. case dropGets: return "gets-dropped" case keepGets: return "gets-kept" default: return "unidentified" } } // Metrics is a snapshot of performance statistics for the lifetime of a cache // instance. type Metrics struct { all [doNotUse][]*uint64 } func newMetrics() *Metrics { s := &Metrics{} for i := 0; i < doNotUse; i++ { s.all[i] = make([]*uint64, 256) slice := s.all[i] for j := range slice { slice[j] = new(uint64) } } return s } func (p *Metrics) add(t metricType, hash, delta uint64) { if p == nil { return } valp := p.all[t] // Avoid false sharing by padding at least 64 bytes of space between two // atomic counters which would be incremented. idx := (hash % 25) * 10 atomic.AddUint64(valp[idx], delta) } func (p *Metrics) get(t metricType) uint64 { if p == nil { return 0 } valp := p.all[t] var total uint64 for i := range valp { total += atomic.LoadUint64(valp[i]) } return total } // Hits is the number of Get calls where a value was found for the corresponding // key. func (p *Metrics) Hits() uint64 { return p.get(hit) } // Misses is the number of Get calls where a value was not found for the // corresponding key. func (p *Metrics) Misses() uint64 { return p.get(miss) } // KeysAdded is the total number of Set calls where a new key-value item was // added. func (p *Metrics) KeysAdded() uint64 { return p.get(keyAdd) } // KeysUpdated is the total number of Set calls where the value was updated. func (p *Metrics) KeysUpdated() uint64 { return p.get(keyUpdate) } // KeysEvicted is the total number of keys evicted. func (p *Metrics) KeysEvicted() uint64 { return p.get(keyEvict) } // CostAdded is the sum of costs that have been added (successful Set calls). func (p *Metrics) CostAdded() uint64 { return p.get(costAdd) } // CostEvicted is the sum of all costs that have been evicted. func (p *Metrics) CostEvicted() uint64 { return p.get(costEvict) } // SetsDropped is the number of Set calls that don't make it into internal // buffers (due to contention or some other reason). func (p *Metrics) SetsDropped() uint64 { return p.get(dropSets) } // SetsRejected is the number of Set calls rejected by the policy (TinyLFU). func (p *Metrics) SetsRejected() uint64 { return p.get(rejectSets) } // GetsDropped is the number of Get counter increments that are dropped // internally. func (p *Metrics) GetsDropped() uint64 { return p.get(dropGets) } // GetsKept is the number of Get counter increments that are kept. func (p *Metrics) GetsKept() uint64 { return p.get(keepGets) } // Ratio is the number of Hits over all accesses (Hits + Misses). This is the // percentage of successful Get calls. func (p *Metrics) Ratio() float64 { if p == nil { return 0.0 } hits, misses := p.get(hit), p.get(miss) if hits == 0 && misses == 0 { return 0.0 } return float64(hits) / float64(hits+misses) } func (p *Metrics) Clear() { if p == nil { return } for i := 0; i < doNotUse; i++ { for j := range p.all[i] { atomic.StoreUint64(p.all[i][j], 0) } } } func (p *Metrics) String() string { if p == nil { return "" } var buf bytes.Buffer for i := 0; i < doNotUse; i++ { t := metricType(i) fmt.Fprintf(&buf, "%s: %d ", stringFor(t), p.get(t)) } fmt.Fprintf(&buf, "gets-total: %d ", p.get(hit)+p.get(miss)) fmt.Fprintf(&buf, "hit-ratio: %.2f", p.Ratio()) return buf.String() } ristretto-0.0.1/cache_test.go000066400000000000000000000226641356330504700162200ustar00rootroot00000000000000package ristretto import ( "math/rand" "strings" "sync" "testing" "time" "github.com/dgraph-io/ristretto/z" ) var wait time.Duration = time.Millisecond * 10 func TestCacheKeyToHash(t *testing.T) { keyToHashCount := 0 c, err := NewCache(&Config{ NumCounters: 100, MaxCost: 10, BufferItems: 64, KeyToHash: func(key interface{}) (uint64, uint64) { keyToHashCount++ return z.KeyToHash(key) }, }) if err != nil { panic(err) } if c.Set(1, 1, 1) { time.Sleep(wait) if val, ok := c.Get(1); val == nil || !ok { t.Fatal("get should be successful") } else { c.Del(1) } } if keyToHashCount != 3 { t.Fatal("custom KeyToHash function should be called three times") } } func TestCacheMaxCost(t *testing.T) { charset := "abcdefghijklmnopqrstuvwxyz0123456789" key := func() []byte { k := make([]byte, 2) for i := range k { k[i] = charset[rand.Intn(len(charset))] } return k } c, err := NewCache(&Config{ NumCounters: 12960, // 36^2 * 10 MaxCost: 1e6, // 1mb BufferItems: 64, Metrics: true, }) if err != nil { panic(err) } stop := make(chan struct{}, 8) for i := 0; i < 8; i++ { go func() { for { select { case <-stop: return default: time.Sleep(time.Millisecond) k := key() if _, ok := c.Get(k); !ok { val := "" if rand.Intn(100) < 10 { val = "test" } else { val = strings.Repeat("a", 1000) } c.Set(key(), val, int64(2+len(val))) } } } }() } for i := 0; i < 20; i++ { time.Sleep(time.Second) cacheCost := c.Metrics.CostAdded() - c.Metrics.CostEvicted() t.Logf("total cache cost: %d\n", cacheCost) if float64(cacheCost) > float64(1e6*1.05) { t.Fatal("cache cost exceeding MaxCost") } } for i := 0; i < 8; i++ { stop <- struct{}{} } } func TestCache(t *testing.T) { if _, err := NewCache(&Config{ NumCounters: 0, }); err == nil { t.Fatal("numCounters can't be 0") } if _, err := NewCache(&Config{ NumCounters: 100, MaxCost: 0, }); err == nil { t.Fatal("maxCost can't be 0") } if _, err := NewCache(&Config{ NumCounters: 100, MaxCost: 10, BufferItems: 0, }); err == nil { t.Fatal("bufferItems can't be 0") } if c, err := NewCache(&Config{ NumCounters: 100, MaxCost: 10, BufferItems: 64, Metrics: true, }); c == nil || err != nil { t.Fatal("config should be good") } } func TestCacheProcessItems(t *testing.T) { m := &sync.Mutex{} evicted := make(map[uint64]struct{}) c, err := NewCache(&Config{ NumCounters: 100, MaxCost: 10, BufferItems: 64, Cost: func(value interface{}) int64 { return int64(value.(int)) }, OnEvict: func(key, conflict uint64, value interface{}, cost int64) { m.Lock() defer m.Unlock() evicted[key] = struct{}{} }, }) if err != nil { panic(err) } var key uint64 var conflict uint64 key, conflict = z.KeyToHash(1) c.setBuf <- &item{ flag: itemNew, key: key, conflict: conflict, value: 1, cost: 0, } time.Sleep(wait) if !c.policy.Has(1) || c.policy.Cost(1) != 1 { t.Fatal("cache processItems didn't add new item") } key, conflict = z.KeyToHash(1) c.setBuf <- &item{ flag: itemUpdate, key: key, conflict: conflict, value: 2, cost: 0, } time.Sleep(wait) if c.policy.Cost(1) != 2 { t.Fatal("cache processItems didn't update item cost") } key, conflict = z.KeyToHash(1) c.setBuf <- &item{ flag: itemDelete, key: key, conflict: conflict, } time.Sleep(wait) key, conflict = z.KeyToHash(1) if val, ok := c.store.Get(key, conflict); val != nil || ok { t.Fatal("cache processItems didn't delete item") } if c.policy.Has(1) { t.Fatal("cache processItems didn't delete item") } key, conflict = z.KeyToHash(2) c.setBuf <- &item{ flag: itemNew, key: key, conflict: conflict, value: 2, cost: 3, } key, conflict = z.KeyToHash(3) c.setBuf <- &item{ flag: itemNew, key: key, conflict: conflict, value: 3, cost: 3, } key, conflict = z.KeyToHash(4) c.setBuf <- &item{ flag: itemNew, key: key, conflict: conflict, value: 3, cost: 3, } key, conflict = z.KeyToHash(5) c.setBuf <- &item{ flag: itemNew, key: key, conflict: conflict, value: 3, cost: 5, } time.Sleep(wait) m.Lock() if len(evicted) == 0 { m.Unlock() t.Fatal("cache processItems not evicting or calling OnEvict") } m.Unlock() defer func() { if r := recover(); r == nil { t.Fatal("cache processItems didn't stop") } }() c.Close() c.setBuf <- &item{flag: itemNew} } func TestCacheGet(t *testing.T) { c, err := NewCache(&Config{ NumCounters: 100, MaxCost: 10, BufferItems: 64, Metrics: true, }) if err != nil { panic(err) } key, conflict := z.KeyToHash(1) c.store.Set(key, conflict, 1) if val, ok := c.Get(1); val == nil || !ok { t.Fatal("get should be successful") } if val, ok := c.Get(2); val != nil || ok { t.Fatal("get should not be successful") } // 0.5 and not 1.0 because we tried Getting each item twice if c.Metrics.Ratio() != 0.5 { t.Fatal("get should record metrics") } c = nil if val, ok := c.Get(0); val != nil || ok { t.Fatal("get should not be successful with nil cache") } } func TestCacheSet(t *testing.T) { c, err := NewCache(&Config{ NumCounters: 100, MaxCost: 10, BufferItems: 64, Metrics: true, }) if err != nil { panic(err) } if c.Set(1, 1, 1) { time.Sleep(wait) if val, ok := c.Get(1); val == nil || val.(int) != 1 || !ok { t.Fatal("set/get returned wrong value") } } else { if val, ok := c.Get(1); val != nil || ok { t.Fatal("set was dropped but value still added") } } c.Set(1, 2, 2) val, ok := c.store.Get(z.KeyToHash(1)) if val == nil || val.(int) != 2 || !ok { t.Fatal("set/update was unsuccessful") } c.stop <- struct{}{} for i := 0; i < setBufSize; i++ { key, conflict := z.KeyToHash(1) c.setBuf <- &item{ flag: itemUpdate, key: key, conflict: conflict, value: 1, cost: 1, } } if c.Set(2, 2, 1) { t.Fatal("set should be dropped with full setBuf") } if c.Metrics.SetsDropped() != 1 { t.Fatal("set should track dropSets") } close(c.setBuf) close(c.stop) c = nil if c.Set(1, 1, 1) { t.Fatal("set shouldn't be successful with nil cache") } } func TestCacheDel(t *testing.T) { c, err := NewCache(&Config{ NumCounters: 100, MaxCost: 10, BufferItems: 64, }) if err != nil { panic(err) } c.Set(1, 1, 1) c.Del(1) time.Sleep(wait) if val, ok := c.Get(1); val != nil || ok { t.Fatal("del didn't delete") } c = nil defer func() { if r := recover(); r != nil { t.Fatal("del panic with nil cache") } }() c.Del(1) } func TestCacheClear(t *testing.T) { c, err := NewCache(&Config{ NumCounters: 100, MaxCost: 10, BufferItems: 64, Metrics: true, }) if err != nil { panic(err) } for i := 0; i < 10; i++ { c.Set(i, i, 1) } time.Sleep(wait) if c.Metrics.KeysAdded() != 10 { t.Fatal("range of sets not being processed") } c.Clear() if c.Metrics.KeysAdded() != 0 { t.Fatal("clear didn't reset metrics") } for i := 0; i < 10; i++ { if val, ok := c.Get(i); val != nil || ok { t.Fatal("clear didn't delete values") } } } func TestCacheMetrics(t *testing.T) { c, err := NewCache(&Config{ NumCounters: 100, MaxCost: 10, BufferItems: 64, Metrics: true, }) if err != nil { panic(err) } for i := 0; i < 10; i++ { c.Set(i, i, 1) } time.Sleep(wait) m := c.Metrics if m.KeysAdded() != 10 { t.Fatal("metrics exporting incorrect fields") } } func TestMetrics(t *testing.T) { newMetrics() } func TestMetricsAddGet(t *testing.T) { m := newMetrics() m.add(hit, 1, 1) m.add(hit, 2, 2) m.add(hit, 3, 3) if m.Hits() != 6 { t.Fatal("add/get error") } m = nil m.add(hit, 1, 1) if m.Hits() != 0 { t.Fatal("get with nil struct should return 0") } } func TestMetricsRatio(t *testing.T) { m := newMetrics() if m.Ratio() != 0 { t.Fatal("ratio with no hits or misses should be 0") } m.add(hit, 1, 1) m.add(hit, 2, 2) m.add(miss, 1, 1) m.add(miss, 2, 2) if m.Ratio() != 0.5 { t.Fatal("ratio incorrect") } m = nil if m.Ratio() != 0.0 { t.Fatal("ratio with a nil struct should return 0") } } func TestMetricsString(t *testing.T) { m := newMetrics() m.add(hit, 1, 1) m.add(miss, 1, 1) m.add(keyAdd, 1, 1) m.add(keyUpdate, 1, 1) m.add(keyEvict, 1, 1) m.add(costAdd, 1, 1) m.add(costEvict, 1, 1) m.add(dropSets, 1, 1) m.add(rejectSets, 1, 1) m.add(dropGets, 1, 1) m.add(keepGets, 1, 1) if m.Hits() != 1 || m.Misses() != 1 || m.Ratio() != 0.5 || m.KeysAdded() != 1 || m.KeysUpdated() != 1 || m.KeysEvicted() != 1 || m.CostAdded() != 1 || m.CostEvicted() != 1 || m.SetsDropped() != 1 || m.SetsRejected() != 1 || m.GetsDropped() != 1 || m.GetsKept() != 1 { t.Fatal("Metrics wrong value(s)") } if len(m.String()) == 0 { t.Fatal("Metrics.String() empty") } m = nil if len(m.String()) != 0 { t.Fatal("Metrics.String() should be empty with nil struct") } if stringFor(doNotUse) != "unidentified" { t.Fatal("stringFor() not handling doNotUse type") } } func TestCacheMetricsClear(t *testing.T) { c, err := NewCache(&Config{ NumCounters: 100, MaxCost: 10, BufferItems: 64, Metrics: true, }) if err != nil { panic(err) } c.Set(1, 1, 1) stop := make(chan struct{}) go func() { for { select { case <-stop: return default: c.Get(1) } } }() time.Sleep(wait) c.Clear() stop <- struct{}{} c.Metrics = nil c.Metrics.Clear() } ristretto-0.0.1/go.mod000066400000000000000000000002351356330504700146630ustar00rootroot00000000000000module github.com/dgraph-io/ristretto go 1.12 require ( github.com/cespare/xxhash v1.1.0 github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 ) ristretto-0.0.1/go.sum000066400000000000000000000014361356330504700147140ustar00rootroot00000000000000github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= ristretto-0.0.1/policy.go000066400000000000000000000206651356330504700154140ustar00rootroot00000000000000/* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ristretto import ( "math" "sync" "github.com/dgraph-io/ristretto/z" ) const ( // lfuSample is the number of items to sample when looking at eviction // candidates. 5 seems to be the most optimal number [citation needed]. lfuSample = 5 ) // policy is the interface encapsulating eviction/admission behavior. // // TODO: remove this interface and just rename defaultPolicy to policy, as we // are probably only going to use/implement/maintain one policy. type policy interface { ringConsumer // Add attempts to Add the key-cost pair to the Policy. It returns a slice // of evicted keys and a bool denoting whether or not the key-cost pair // was added. If it returns true, the key should be stored in cache. Add(uint64, int64) ([]*item, bool) // Has returns true if the key exists in the Policy. Has(uint64) bool // Del deletes the key from the Policy. Del(uint64) // Cap returns the available capacity. Cap() int64 // Close stops all goroutines and closes all channels. Close() // Update updates the cost value for the key. Update(uint64, int64) // Cost returns the cost value of a key or -1 if missing. Cost(uint64) int64 // Optionally, set stats object to track how policy is performing. CollectMetrics(*Metrics) // Clear zeroes out all counters and clears hashmaps. Clear() } func newPolicy(numCounters, maxCost int64) policy { return newDefaultPolicy(numCounters, maxCost) } type defaultPolicy struct { sync.Mutex admit *tinyLFU evict *sampledLFU itemsCh chan []uint64 stop chan struct{} metrics *Metrics } func newDefaultPolicy(numCounters, maxCost int64) *defaultPolicy { p := &defaultPolicy{ admit: newTinyLFU(numCounters), evict: newSampledLFU(maxCost), itemsCh: make(chan []uint64, 3), stop: make(chan struct{}), } go p.processItems() return p } func (p *defaultPolicy) CollectMetrics(metrics *Metrics) { p.metrics = metrics p.evict.metrics = metrics } type policyPair struct { key uint64 cost int64 } func (p *defaultPolicy) processItems() { for { select { case items := <-p.itemsCh: p.Lock() p.admit.Push(items) p.Unlock() case <-p.stop: return } } } func (p *defaultPolicy) Push(keys []uint64) bool { if len(keys) == 0 { return true } select { case p.itemsCh <- keys: p.metrics.add(keepGets, keys[0], uint64(len(keys))) return true default: p.metrics.add(dropGets, keys[0], uint64(len(keys))) return false } } func (p *defaultPolicy) Add(key uint64, cost int64) ([]*item, bool) { p.Lock() defer p.Unlock() // can't add an item bigger than entire cache if cost > p.evict.maxCost { return nil, false } // we don't need to go any further if the item is already in the cache if has := p.evict.updateIfHas(key, cost); has { return nil, true } // if we got this far, this key doesn't exist in the cache // // calculate the remaining room in the cache (usually bytes) room := p.evict.roomLeft(cost) if room >= 0 { // there's enough room in the cache to store the new item without // overflowing, so we can do that now and stop here p.evict.add(key, cost) return nil, true } // incHits is the hit count for the incoming item incHits := p.admit.Estimate(key) // sample is the eviction candidate pool to be filled via random sampling // // TODO: perhaps we should use a min heap here. Right now our time // complexity is N for finding the min. Min heap should bring it down to // O(lg N). sample := make([]*policyPair, 0, lfuSample) // as items are evicted they will be appended to victims victims := make([]*item, 0) // delete victims until there's enough space or a minKey is found that has // more hits than incoming item. for ; room < 0; room = p.evict.roomLeft(cost) { // fill up empty slots in sample sample = p.evict.fillSample(sample) // find minimally used item in sample minKey, minHits, minId, minCost := uint64(0), int64(math.MaxInt64), 0, int64(0) for i, pair := range sample { // look up hit count for sample key if hits := p.admit.Estimate(pair.key); hits < minHits { minKey, minHits, minId, minCost = pair.key, hits, i, pair.cost } } // if the incoming item isn't worth keeping in the policy, reject. if incHits < minHits { p.metrics.add(rejectSets, key, 1) return victims, false } // delete the victim from metadata p.evict.del(minKey) // delete the victim from sample sample[minId] = sample[len(sample)-1] sample = sample[:len(sample)-1] // store victim in evicted victims slice victims = append(victims, &item{ key: minKey, conflict: 0, cost: minCost, }) } p.evict.add(key, cost) return victims, true } func (p *defaultPolicy) Has(key uint64) bool { p.Lock() _, exists := p.evict.keyCosts[key] p.Unlock() return exists } func (p *defaultPolicy) Del(key uint64) { p.Lock() p.evict.del(key) p.Unlock() } func (p *defaultPolicy) Cap() int64 { p.Lock() capacity := int64(p.evict.maxCost - p.evict.used) p.Unlock() return capacity } func (p *defaultPolicy) Update(key uint64, cost int64) { p.Lock() p.evict.updateIfHas(key, cost) p.Unlock() } func (p *defaultPolicy) Cost(key uint64) int64 { p.Lock() if cost, found := p.evict.keyCosts[key]; found { p.Unlock() return cost } p.Unlock() return -1 } func (p *defaultPolicy) Clear() { p.Lock() p.admit.clear() p.evict.clear() p.Unlock() } func (p *defaultPolicy) Close() { // block until p.processItems goroutine is returned p.stop <- struct{}{} close(p.stop) close(p.itemsCh) } // sampledLFU is an eviction helper storing key-cost pairs. type sampledLFU struct { keyCosts map[uint64]int64 maxCost int64 used int64 metrics *Metrics } func newSampledLFU(maxCost int64) *sampledLFU { return &sampledLFU{ keyCosts: make(map[uint64]int64), maxCost: maxCost, } } func (p *sampledLFU) roomLeft(cost int64) int64 { return p.maxCost - (p.used + cost) } func (p *sampledLFU) fillSample(in []*policyPair) []*policyPair { if len(in) >= lfuSample { return in } for key, cost := range p.keyCosts { in = append(in, &policyPair{key, cost}) if len(in) >= lfuSample { return in } } return in } func (p *sampledLFU) del(key uint64) { cost, ok := p.keyCosts[key] if !ok { return } p.used -= cost delete(p.keyCosts, key) } func (p *sampledLFU) add(key uint64, cost int64) { p.keyCosts[key] = cost p.used += cost } func (p *sampledLFU) updateIfHas(key uint64, cost int64) bool { if prev, found := p.keyCosts[key]; found { // update the cost of an existing key, but don't worry about evicting, // evictions will be handled the next time a new item is added p.metrics.add(keyUpdate, key, 1) p.used += cost - prev p.keyCosts[key] = cost return true } return false } func (p *sampledLFU) clear() { p.used = 0 p.keyCosts = make(map[uint64]int64) } // tinyLFU is an admission helper that keeps track of access frequency using // tiny (4-bit) counters in the form of a count-min sketch. // tinyLFU is NOT thread safe. type tinyLFU struct { freq *cmSketch door *z.Bloom incrs int64 resetAt int64 } func newTinyLFU(numCounters int64) *tinyLFU { return &tinyLFU{ freq: newCmSketch(numCounters), door: z.NewBloomFilter(float64(numCounters), 0.01), resetAt: numCounters, } } func (p *tinyLFU) Push(keys []uint64) { for _, key := range keys { p.Increment(key) } } func (p *tinyLFU) Estimate(key uint64) int64 { hits := p.freq.Estimate(key) if p.door.Has(key) { hits += 1 } return hits } func (p *tinyLFU) Increment(key uint64) { // flip doorkeeper bit if not already if added := p.door.AddIfNotHas(key); !added { // increment count-min counter if doorkeeper bit is already set. p.freq.Increment(key) } p.incrs++ if p.incrs >= p.resetAt { p.reset() } } func (p *tinyLFU) reset() { // Zero out incrs. p.incrs = 0 // clears doorkeeper bits p.door.Clear() // halves count-min counters p.freq.Reset() } func (p *tinyLFU) clear() { p.incrs = 0 p.door.Clear() p.freq.Clear() } ristretto-0.0.1/policy_test.go000066400000000000000000000140571356330504700164510ustar00rootroot00000000000000package ristretto import ( "testing" "time" ) func TestPolicy(t *testing.T) { defer func() { if r := recover(); r != nil { t.Fatal("newPolicy failed") } }() newPolicy(100, 10) } func TestPolicyMetrics(t *testing.T) { p := newDefaultPolicy(100, 10) p.CollectMetrics(newMetrics()) if p.metrics == nil || p.evict.metrics == nil { t.Fatal("policy metrics initialization error") } } func TestPolicyProcessItems(t *testing.T) { p := newDefaultPolicy(100, 10) p.itemsCh <- []uint64{1, 2, 2} time.Sleep(wait) p.Lock() if p.admit.Estimate(2) != 2 || p.admit.Estimate(1) != 1 { p.Unlock() t.Fatal("policy processItems not pushing to tinylfu counters") } p.Unlock() p.stop <- struct{}{} p.itemsCh <- []uint64{3, 3, 3} time.Sleep(wait) p.Lock() if p.admit.Estimate(3) != 0 { p.Unlock() t.Fatal("policy processItems not stopping") } p.Unlock() } func TestPolicyPush(t *testing.T) { p := newDefaultPolicy(100, 10) if !p.Push([]uint64{}) { t.Fatal("push empty slice should be good") } keepCount := 0 for i := 0; i < 10; i++ { if p.Push([]uint64{1, 2, 3, 4, 5}) { keepCount++ } } if keepCount == 0 { t.Fatal("push dropped everything") } } func TestPolicyAdd(t *testing.T) { p := newDefaultPolicy(1000, 100) if victims, added := p.Add(1, 101); victims != nil || added { t.Fatal("can't add an item bigger than entire cache") } p.Lock() p.evict.add(1, 1) p.admit.Increment(1) p.admit.Increment(2) p.admit.Increment(3) p.Unlock() if victims, added := p.Add(1, 1); victims != nil || !added { t.Fatal("item should already exist") } if victims, added := p.Add(2, 20); victims != nil || !added { t.Fatal("item should be added with no eviction") } if victims, added := p.Add(3, 90); victims == nil || !added { t.Fatal("item should be added with eviction") } if victims, added := p.Add(4, 20); victims == nil || added { t.Fatal("item should not be added") } } func TestPolicyHas(t *testing.T) { p := newDefaultPolicy(100, 10) p.Add(1, 1) if !p.Has(1) { t.Fatal("policy should have key") } if p.Has(2) { t.Fatal("policy shouldn't have key") } } func TestPolicyDel(t *testing.T) { p := newDefaultPolicy(100, 10) p.Add(1, 1) p.Del(1) p.Del(2) if p.Has(1) { t.Fatal("del didn't delete") } if p.Has(2) { t.Fatal("policy shouldn't have key") } } func TestPolicyCap(t *testing.T) { p := newDefaultPolicy(100, 10) p.Add(1, 1) if p.Cap() != 9 { t.Fatal("cap returned wrong value") } } func TestPolicyUpdate(t *testing.T) { p := newDefaultPolicy(100, 10) p.Add(1, 1) p.Update(1, 2) p.Lock() if p.evict.keyCosts[1] != 2 { p.Unlock() t.Fatal("update failed") } p.Unlock() } func TestPolicyCost(t *testing.T) { p := newDefaultPolicy(100, 10) p.Add(1, 2) if p.Cost(1) != 2 { t.Fatal("cost for existing key returned wrong value") } if p.Cost(2) != -1 { t.Fatal("cost for missing key returned wrong value") } } func TestPolicyClear(t *testing.T) { p := newDefaultPolicy(100, 10) p.Add(1, 1) p.Add(2, 2) p.Add(3, 3) p.Clear() if p.Cap() != 10 || p.Has(1) || p.Has(2) || p.Has(3) { t.Fatal("clear didn't clear properly") } } func TestPolicyClose(t *testing.T) { defer func() { if r := recover(); r == nil { t.Fatal("close didn't close channels") } }() p := newDefaultPolicy(100, 10) p.Add(1, 1) p.Close() p.itemsCh <- []uint64{1} } func TestSampledLFUAdd(t *testing.T) { e := newSampledLFU(4) e.add(1, 1) e.add(2, 2) e.add(3, 1) if e.used != 4 { t.Fatal("used not being incremented") } if e.keyCosts[2] != 2 { t.Fatal("keyCosts not being updated") } } func TestSampledLFUDel(t *testing.T) { e := newSampledLFU(4) e.add(1, 1) e.add(2, 2) e.del(2) if e.used != 1 { t.Fatal("del not updating used field") } if _, ok := e.keyCosts[2]; ok { t.Fatal("del not deleting value from keyCosts") } e.del(4) } func TestSampledLFUUpdate(t *testing.T) { e := newSampledLFU(4) e.add(1, 1) if !e.updateIfHas(1, 2) { t.Fatal("update should be possible") } if e.used != 2 { t.Fatal("update not changing used field") } if e.updateIfHas(2, 2) { t.Fatal("update shouldn't be possible") } } func TestSampledLFUClear(t *testing.T) { e := newSampledLFU(4) e.add(1, 1) e.add(2, 2) e.add(3, 1) e.clear() if len(e.keyCosts) != 0 || e.used != 0 { t.Fatal("clear not deleting keyCosts or zeroing used field") } } func TestSampledLFURoom(t *testing.T) { e := newSampledLFU(16) e.add(1, 1) e.add(2, 2) e.add(3, 3) if e.roomLeft(4) != 6 { t.Fatal("roomLeft returning wrong value") } } func TestSampledLFUSample(t *testing.T) { e := newSampledLFU(16) e.add(4, 4) e.add(5, 5) sample := e.fillSample([]*policyPair{ {1, 1}, {2, 2}, {3, 3}, }) k := sample[len(sample)-1].key if len(sample) != 5 || k == 1 || k == 2 || k == 3 { t.Fatal("fillSample not filling properly") } if len(sample) != len(e.fillSample(sample)) { t.Fatal("fillSample mutating full sample") } e.del(5) if sample = e.fillSample(sample[:len(sample)-2]); len(sample) != 4 { t.Fatal("fillSample not returning sample properly") } } func TestTinyLFUIncrement(t *testing.T) { a := newTinyLFU(4) a.Increment(1) a.Increment(1) a.Increment(1) if !a.door.Has(1) { t.Fatal("doorkeeper bit not set") } if a.freq.Estimate(1) != 2 { t.Fatal("incorrect counter value") } a.Increment(1) if a.door.Has(1) { t.Fatal("doorkeeper bit set after reset") } if a.freq.Estimate(1) != 1 { t.Fatal("counter value not halved after reset") } } func TestTinyLFUEstimate(t *testing.T) { a := newTinyLFU(8) a.Increment(1) a.Increment(1) a.Increment(1) if a.Estimate(1) != 3 { t.Fatal("estimate value incorrect") } if a.Estimate(2) != 0 { t.Fatal("estimate value should be 0") } } func TestTinyLFUPush(t *testing.T) { a := newTinyLFU(16) a.Push([]uint64{1, 2, 2, 3, 3, 3}) if a.Estimate(1) != 1 || a.Estimate(2) != 2 || a.Estimate(3) != 3 { t.Fatal("push didn't increment counters properly") } if a.incrs != 6 { t.Fatal("incrs not being incremented") } } func TestTinyLFUClear(t *testing.T) { a := newTinyLFU(16) a.Push([]uint64{1, 3, 3, 3}) a.clear() if a.incrs != 0 || a.Estimate(3) != 0 { t.Fatal("clear not clearing") } } ristretto-0.0.1/ring.go000066400000000000000000000053561356330504700150540ustar00rootroot00000000000000/* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ristretto import ( "sync" ) // ringConsumer is the user-defined object responsible for receiving and // processing items in batches when buffers are drained. type ringConsumer interface { Push([]uint64) bool } // ringStripe is a singular ring buffer that is not concurrent safe. type ringStripe struct { cons ringConsumer data []uint64 capa int } func newRingStripe(cons ringConsumer, capa int64) *ringStripe { return &ringStripe{ cons: cons, data: make([]uint64, 0, capa), capa: int(capa), } } // Push appends an item in the ring buffer and drains (copies items and // sends to Consumer) if full. func (s *ringStripe) Push(item uint64) { s.data = append(s.data, item) // if we should drain if len(s.data) >= s.capa { // Send elements to consumer. Create a new one. if s.cons.Push(s.data) { s.data = make([]uint64, 0, s.capa) } else { s.data = s.data[:0] } } } // ringBuffer stores multiple buffers (stripes) and distributes Pushed items // between them to lower contention. // // This implements the "batching" process described in the BP-Wrapper paper // (section III part A). type ringBuffer struct { pool *sync.Pool } // newRingBuffer returns a striped ring buffer. The Consumer in ringConfig will // be called when individual stripes are full and need to drain their elements. func newRingBuffer(cons ringConsumer, capa int64) *ringBuffer { // LOSSY buffers use a very simple sync.Pool for concurrently reusing // stripes. We do lose some stripes due to GC (unheld items in sync.Pool // are cleared), but the performance gains generally outweigh the small // percentage of elements lost. The performance primarily comes from // low-level runtime functions used in the standard library that aren't // available to us (such as runtime_procPin()). return &ringBuffer{ pool: &sync.Pool{ New: func() interface{} { return newRingStripe(cons, capa) }, }, } } // Push adds an element to one of the internal stripes and possibly drains if // the stripe becomes full. func (b *ringBuffer) Push(item uint64) { // reuse or create a new stripe stripe := b.pool.Get().(*ringStripe) stripe.Push(item) b.pool.Put(stripe) } ristretto-0.0.1/ring_test.go000066400000000000000000000023321356330504700161020ustar00rootroot00000000000000package ristretto import ( "sync" "testing" ) type testConsumer struct { push func([]uint64) save bool } func (c *testConsumer) Push(items []uint64) bool { if c.save { c.push(items) return true } return false } func TestRingDrain(t *testing.T) { drains := 0 r := newRingBuffer(&testConsumer{ push: func(items []uint64) { drains++ }, save: true, }, 1) for i := 0; i < 100; i++ { r.Push(uint64(i)) } if drains != 100 { t.Fatal("buffers shouldn't be dropped with BufferItems == 1") } } func TestRingReset(t *testing.T) { drains := 0 r := newRingBuffer(&testConsumer{ push: func(items []uint64) { drains++ }, save: false, }, 4) for i := 0; i < 100; i++ { r.Push(uint64(i)) } if drains != 0 { t.Fatal("testConsumer shouldn't be draining") } } func TestRingConsumer(t *testing.T) { mu := &sync.Mutex{} drainItems := make(map[uint64]struct{}) r := newRingBuffer(&testConsumer{ push: func(items []uint64) { mu.Lock() defer mu.Unlock() for i := range items { drainItems[items[i]] = struct{}{} } }, save: true, }, 4) for i := 0; i < 100; i++ { r.Push(uint64(i)) } l := len(drainItems) if l == 0 || l > 100 { t.Fatal("drains not being processed correctly") } } ristretto-0.0.1/sim/000077500000000000000000000000001356330504700143455ustar00rootroot00000000000000ristretto-0.0.1/sim/gli.lirs.gz000066400000000000000000000132121356330504700164310ustar00rootroot00000000000000‹… ªUgli.trcí]®4Ëqß{5¬ê¿ýoÌŒ<üj@€-ÛI€ T_éV 'INö§ó ÿȱŽ}\Ç}<Ç{|çAÿó¬ƒ}pÜÏÁ{ð9ô™YGö‘ëÈ}ä9òþãŸG¾cÇâXýK¯cíc]Ǻõë=ÖwìóØ;Çîó>öuìûØÏ±ßcÇuÇ•ãZÇÕÿa×qÝÇõ×{\ßqŸÇÍqç¸×qïãîÿîû¸Ÿã~û;žóx8žÏ:ž}<×ñôoë9ž÷x¾ã=—ãÍñ®ãÝÇ{ï}¼ý»~÷;¾óø8¾ß:¾}|×ñÝÇ÷_/Å[鵜½—³söfÎ^ÍÙ»9{9goçìõœ=7××s^ 7èz‡^¢·è5öéEï¹çz—ô2émÒë¤÷I¯’Þ$½Jz—,ô\¯“Þ'½Pz£ôJéÒK¥·J¯•-¹žëÍÒ«¥wK/—Þ.½^z¿ô‚é s‰¸çzÉô–é5Ó{¦Moš^5½kzÙÜ~z®÷M/œÞ8½rzçôÒé­Ók§÷Î㇦çzõôîéåÓÛ§×O(^?]=W å@APeAaP|~ ýöƒX)”GÊ#å‘òHy¤Ù=çgÛ·Ÿn?Þ~¾ýh—GÊ#å‘¥z®V½³æ¹î¹<ÞòxËã-·<ÞòxËã-·<^tÙ=Woy¼åñ–Ç[oy¼åñ–Ç[o´ã=Woy¼åñ–Ç[oy¼åñ–Ç[ïÒ·÷\y¼åñ–Ç[oy¼åñ–Ç[oy¼[ƒßsåñ–Ç[oy¼åñ–Ç[oy¼åñ^&ž+·<ÞòxËã-·<ÞòxËã-÷62ô\y¼åñ–Ç[oy¼åñ–Ç[oy¼Ù¢çÊã5|˜>Œæˆ Äb™Òsåñ–Ç[oy¼åñ–Ç[oy¼åñ~¦ãJóJy|åñ•ÇW_y|åñ•ÇW_y|˜kz®<¾òøÊã+¯<¾òøÊã+¯<¾€z®<¾òøÊã+¯<¾òøÊã+¯<¾eRê¹òøÊã+¯<¾òøÊã+¯<¾òø¶‘ªçÊã+¯<¾òøÊã+¯<¾òøÊã»Ì^=W_y|åñ•ÇW_y|åñ•ÇWßmHë¹òøÊã+¯<¾òøÊã+¯<¾òøÓ\Ï•ÇW_y|åñ•ÇW_y|åñ•Ç÷ûzÎXh.4š †fCáéÐx8ùp¢ q"âdÄ ‰“'&NNœ 8IѨx2™Ò'L‹§qñ4/žÆÓÄxO3ãihaœ<Í“§ò4QžFÊÓLy*OSåi¬<¯ÉÇ>a²<–§Ùò4\ž¦ËÓxyš/OæiÂ<ï‰Ô>aÈaÞ< œ§‰ó4ržfÎÓÐyš:Ocçiî<ß î>aô<Íž§áó4}žÆÏÓüy@Oèi=¿ÉúöMû2Ÿ^`Ši¦˜n`Êiþê¿~À'¦!˜Š`:‚) ¦%˜š`z™OS0UÁtSL[0uÁôSLc0•ÁtSLk0µÁôSLs`uðŸ•Í{h:ÿQýªœ_•ó«r~UίÊùU9¿*ç_«rþ»‹œóWåüÏT9Ìôžñ=ó{øLðá3ÃgˆÏŸ1>s|ùLòå3Ëg˜Ï4Ÿq>ó|úLôé3Óg¨ÏTŸ±>s}ûLöí3Ûg¸ÏtŸñ>ó}üLøñ3ãgÈÏ”Ÿ1?s~ýLúõ3ëgØÏ´Ÿq?ó~þLüù3ógèÏÔŸ±?sÿL~G?Î~þ8ýqüãüG€-z4èÈüB©/Ó @+€^ͺ2ölüÙŸAó‰±hãÑÆ¤K›6>M£¦3@k€Þͺ´èÐ  C@‹€Mº´ èÐ( S@«€^ͺ´ èÐ0 c@Ë€žMº´ èÐ8 s@ë€Þͺ´èÐ@ ƒ@ Mº´è#ÐH “@+^ͺ ´è'ÐP £@KžMº ´è+ÐX ³@kÞͺ ´è/Ð` Ã@‹Mº ´è3Ðh Ó@«Ášß¥e®Û@»~ :´è9Ðt ë@Û¾:´è=XcÎÇ=ÿóç>1},úxô1éãÒe®A#‚N­z4#èFÐŽ AC‚Ž- z4%èJЖ /Ac‚έ z4'èNО ?Aƒ‚- z4)èRЦ OA£‚N­ z4+èVЮ _AÂŽ- z4-èZж oAã‚έ z4/è^о Aƒ- z41èbÐÆ A#ƒN­ z43èfÐΠŸaÏ6‚̵4èiÐÔ «A[ƒ¾ :´6èmÐÜ »A{ƒþ :œ_ þ¥à_ þ¥à_ þ¥à_ þ¥à_ þWR°JA© VP,¨” êÞ ?#~fü ù¿)ï3çgÐϤŸQï¬W<¨”ê„ B ¡†PD¨"”ê…„JB)¡–PL¨&”ê …ŠBI¡¦PT¨*”ê ……ÊBi¡¶P\¨.”ê † C‰¡ÆPd¨2”ê …†JC©¡ÖPl¨6”ê ‡ŠCÉ¡æPt¨:”ê…‡ÊCé¡ö¸f¡W÷¦üP(@T J5ˆ"D¢ Q‡(DT"JµˆbD5¢Q(HT$J5‰¢DM¢(Q•(KÔ% •‰ÒDm¢8¹ÆßÁ‡7o<ÞŸÉó‰±yãóÆèÉ\¡¢RQª¨U+ªåŠzEÁ¢bQ²¨Y-ªe‹ºEá¢rQº¨]/ªå‹úE£‚Q¨a1ªeŒ:F!£’Qʨe3ªåŒzFA£¢QÒ¨i5ªeºFa£²û(¿}”ß>Êoå·ò_ÚGÑ¡B+„^Í÷¼ôâ߯~ :"´Dè‰Ð¡+B[„¾:#Æ ¹Ý‰ë¸ß‰ ž¸á‰+ž¸ã‰Kž¸å‰kž¸ç‰‹ž¸é‰«ž¸ë‰Ëž¸í‰ëž¸ï‰ Ÿ¸ñ‰+Ÿ¸ó‰KŸ¸õ‰kŸ¸÷‰‹ŸÜó÷넼§ ™2dÚ©C¦™Bä¯ñ‰éD¦qBºŠ{ ¸Š› ¸ Š» ¸ ŠÛ ¸Šû ¸Š¡¸Š;¡¸Š[¡¸Š{¡¸Š›¡¸Š»¡¸ŠÛ¡¸Šû¡¸ Š¢¸"Š;¢¸$Š[¢¸&Š{¢¸(Š›¢¸*Š»¢¸,ŠÛ¢¸.Šû¢¸0Š£¸2Š;£<óª“Ì]ŽQ\ÅÍQ\ÅÝQ\ÅíQ\ÅýQ\ Å R\!ÅR\"Å-R\#Å=R\$ÅMR\%Å]R\&Åéãç7pœà8Âq†ãÇ)Žcç8rœä8Êq–ã0ÇiŽãç9tœè8Òq¦ãPÇ©Žcç:vži¦ ›.lʰiæ›>ì¯ó‰©ÄdîÇ)cç<zœô8êqÖã°Çiãç=|œø8òqæãÐÇ©cç>~œü8úqöãðÇéãç?thР @€6}ÞyÁMæz4èР@C€Ž-z4è Р/@c€Î­z4èР?@ƒ€-z4 èÐ& O@£€N­z4 èÐ. _@ÀŽ-z4 èÐ6 o@ã€Î­z4èÐ> @-į&ÿÕ俚üW“ÿjò_Mþ«É5ù¯&ÿWjr ld°’ÁN†w¦åŒË™—30gbÎÈœ™9Cóojú„sÓ~ lh°¢ÁŽKli°¦Áž‹lj°ªÁ®Ëlk°®Á¾ ll°²ÁÎKlm°¶ÁÞ‹ln°ºá›?#B—c{ƒõ ö7Xà`ƒƒv8Xâ`‹ƒ5ö8Xä`“ƒUv9Xæ`›ƒuö9Xè`£ƒ•v:Xê`«ƒµö:Xì`³ƒÕv;Xî`»ƒõö;Xð`ÃvXú`냵ö>Xü`óƒÕv?Xþ`ûƒõö?XaÄ7^iÌÒ¸¥±Kã—Æ0cË4žéÏ4kÒ6oã4Îi¬Óx§1OãžÆ>éŸ,ƒbË XÅ2(–A± ŠeP,ƒrΟ 2ÖöÿÆ6\,µb©K­XjÅR+–Z±ÔŠ¥V,µb©K­XjÅR+–Z±ÔŠ¥V,µb©K­XjÅR+–Z±ÔŠ¥V,µb©K­XjÅR+–Z±ÔŠ¥V,µb©K­XjÅR+¾d_²Š/YÅ—¬âKVñ%«ø’U|É*¾d_²Š/YÅ—¬âKVñ%«ø’U|É*¾d_²Š/YÅ—¬rÎ é&}É*¾d_²Š/YÅ—¬âKVñ%«ø’U|É*¾d_²Š/YÅ—¬âKVñ%«ø’U|É*¾d_²Š/YÅ3X a1…ÅsX b1‰Å(Æûÿ™Ÿû?þÀ$€‰“&øÙeþ\›L^ð ™Íb6‹á,¦³Ïb>‹-&´ÑbF‹!-)íßlã.æÆÌ[_&ÇcvŒá1¦Çc~Œ2&È!c†Œ!2¦È#cŽŒA2&É%c–Œa2¦É'cžŒ2&Ê)c¦Œ¡2¦Ê+c®ŒÁ2&Ë-c¶Œá2¦Ë/c¾Œ3&Ì1cÆŒ!3¦Ì3cÎŒA3&Í5cÖŒa3¦Í7cÞŒ3&Î9c挡3¦ÎLìœÜ9Ás’çDÏÉž>'}Nüœü9tèDÐÉ B'…N :At’è_,úF}bâèäÑ ¤¾¨þûï÷ßï¾ß|¿ø~?ðý~àûwúïÃn¿F{*m=ì”ÚÓjO­=½öÛÓlOµ=Ýö”ÛÓnO½=ýöÜÓpOÅ=÷”ÜÓrOÍ==÷ÝÓtOÕ=]÷”ÝÓvOÝ=}÷ÞÓxOå=÷”ÞÓzOí=½÷ßÓ|Oõ=Ý÷”ßÓ~Oý=ý÷àÓ€O>ø$»iÁ§Ÿ|Šði§ Ÿ.|ÊðiçŸ>| ñiħŸN|JñiŧŸ^|Šq›ñXÇn<–ã±õxìÇcAòX‘ÇŽ<–ä±%5yìÉcQ›òX•Ç®<–å±-uyì˳æÏäÔ2Z™ÇÎ<–æ±5µyÖüŽ1?dÌ/?eøÄü˜1¿fÌÏó{Æü !s+ôØ¡Ç=¶è±F=z,Òc“«ôØ¥Ç2=¶é±N}z,Ôc£+õØ©ÇR=¶ê±V½z,Öc³«õØ­Çr=¶ë±^ýz,ØcÃ+öرÇÔSWL]1uÅÔSWL]1uÅÔSWL]1uÅÔSWL]1uÅÔSWL]1uÅÔSWL]1uÅÔSWL]1ueR— |¬àcKøØÂÇ>öð±ˆÏ¼çô[ø­üV~+¿ߊÀoEà·"ð[ø­üVþŸ®DÕEÙEÝEáEåEéEíEñEõEùEýeÏÿ/ƒJ F FfÏ_Ô15êBŒJŒRŒZŒbŒjŒrŒzÌž5kFÖ̬¿¡å3¶fnÍàšÉåèR›QœQQžQŸQ Q¡Q¢Q£Q¤Q¥Q¦Q§Q¨Q©QªQ«Q¬Q­Q®Q¯Q°Q±Q²Q³Q´QµQ¶Q·Q¸Q¹QºQ»Q¼Q½Q¾Q¿QÀQÁQÂQÃQÄQÅQÆQÇQÈQÉQÊQËQÌQÍQÎQÏQÐQÑQÒQÓQÔQÕQÖQ×QØQÙQÚQÛQÜQÝQÞQßQàQáQâQãQ乿ÿCæê< =*ýø·²]„æcristretto-0.0.1/sim/sim.go000066400000000000000000000124241356330504700154670ustar00rootroot00000000000000/* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package sim import ( "bufio" "errors" "fmt" "io" "math/rand" "strconv" "strings" "time" ) var ( // ErrDone is returned when the underlying file has ran out of lines. ErrDone = errors.New("no more values in the Simulator") // ErrBadLine is returned when the trace file line is unrecognizable to // the Parser. ErrBadLine = errors.New("bad line for trace format") ) // Simulator is the central type of the `sim` package. It is a function // returning a key from some source (composed from the other functions in this // package, either generated or parsed). You can use these Simulators to // approximate access distributions. type Simulator func() (uint64, error) // NewZipfian creates a Simulator returning numbers following a Zipfian [1] // distribution infinitely. Zipfian distributions are useful for simulating real // workloads. // // [1]: https://en.wikipedia.org/wiki/Zipf%27s_law func NewZipfian(s, v float64, n uint64) Simulator { z := rand.NewZipf(rand.New(rand.NewSource(time.Now().UnixNano())), s, v, n) return func() (uint64, error) { return z.Uint64(), nil } } // NewUniform creates a Simulator returning uniformly distributed [1] (random) // numbers [0, max) infinitely. // // [1]: https://en.wikipedia.org/wiki/Uniform_distribution_(continuous) func NewUniform(max uint64) Simulator { m := int64(max) r := rand.New(rand.NewSource(time.Now().UnixNano())) return func() (uint64, error) { return uint64(r.Int63n(m)), nil } } // Parser is used as a parameter to NewReader so we can create Simulators from // varying trace file formats easily. type Parser func(string, error) ([]uint64, error) // NewReader creates a Simulator from two components: the Parser, which is a // filetype specific function for parsing lines, and the file itself, which will // be read from. // // When every line in the file has been read, ErrDone will be returned. For some // trace formats (LIRS) there is one item per line. For others (ARC) there is a // range of items on each line. Thus, the true number of items in each file // is hard to determine, so it's up to the user to handle ErrDone accordingly. func NewReader(parser Parser, file io.Reader) Simulator { b := bufio.NewReader(file) s := make([]uint64, 0) i := -1 var err error return func() (uint64, error) { // only parse a new line when we've run out of items if i++; i == len(s) { // parse sequence from line if s, err = parser(b.ReadString('\n')); err != nil { s = []uint64{0} } i = 0 } return s[i], err } } // ParseLIRS takes a single line of input from a LIRS trace file as described in // multiple papers [1] and returns a slice containing one number. A nice // collection of LIRS trace files can be found in Ben Manes' repo [2]. // // [1]: https://en.wikipedia.org/wiki/LIRS_caching_algorithm // [2]: https://git.io/fj9gU func ParseLIRS(line string, err error) ([]uint64, error) { if line = strings.TrimSpace(line); line != "" { // example: "1\r\n" key, err := strconv.ParseUint(line, 10, 64) return []uint64{key}, err } return nil, ErrDone } // ParseARC takes a single line of input from an ARC trace file as described in // "ARC: a self-tuning, low overhead replacement cache" [1] by Nimrod Megiddo // and Dharmendra S. Modha [1] and returns a sequence of numbers generated from // the line and any error. For use with NewReader. // // [1]: https://scinapse.io/papers/1860107648 func ParseARC(line string, err error) ([]uint64, error) { if line != "" { // example: "0 5 0 0\n" // // - first block: starting number in sequence // - second block: number of items in sequence // - third block: ignore // - fourth block: global line number (not used) cols := strings.Fields(line) if len(cols) != 4 { return nil, ErrBadLine } start, err := strconv.ParseUint(cols[0], 10, 64) if err != nil { return nil, err } count, err := strconv.ParseUint(cols[1], 10, 64) if err != nil { return nil, err } // populate sequence from start to start + count seq := make([]uint64, count) for i := range seq { seq[i] = start + uint64(i) } return seq, nil } return nil, ErrDone } // Collection evaluates the Simulator size times and saves each item to the // returned slice. func Collection(simulator Simulator, size uint64) []uint64 { collection := make([]uint64, size) for i := range collection { collection[i], _ = simulator() } return collection } // StringCollection evaluates the Simulator size times and saves each item to // the returned slice, after converting it to a string. func StringCollection(simulator Simulator, size uint64) []string { collection := make([]string, size) for i := range collection { n, _ := simulator() collection[i] = fmt.Sprintf("%d", n) } return collection } ristretto-0.0.1/sim/sim_test.go000066400000000000000000000044401356330504700165250ustar00rootroot00000000000000/* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package sim import ( "bytes" "compress/gzip" "os" "testing" ) func TestZipfian(t *testing.T) { s := NewZipfian(1.5, 1, 100) m := make(map[uint64]uint64, 100) for i := 0; i < 100; i++ { k, err := s() if err != nil { t.Fatal(err) } m[k]++ } if len(m) == 0 || len(m) == 100 { t.Fatal("zipfian not skewed") } } func TestUniform(t *testing.T) { s := NewUniform(100) for i := 0; i < 100; i++ { if _, err := s(); err != nil { t.Fatal(err) } } } func TestParseLIRS(t *testing.T) { s := NewReader(ParseLIRS, bytes.NewReader([]byte{ '0', '\n', '1', '\r', '\n', '2', '\r', '\n', })) for i := uint64(0); i < 3; i++ { v, err := s() if err != nil { t.Fatal(err) } if v != i { t.Fatal("value mismatch") } } } func TestReadLIRS(t *testing.T) { f, err := os.Open("./gli.lirs.gz") if err != nil { t.Fatal(err) } r, err := gzip.NewReader(f) if err != nil { t.Fatal(err) } s := NewReader(ParseLIRS, r) for i := uint64(0); i < 100; i++ { if _, err = s(); err != nil { t.Fatal(err) } } } func TestParseARC(t *testing.T) { s := NewReader(ParseARC, bytes.NewReader([]byte{ '1', '2', '7', ' ', '6', '4', ' ', '0', ' ', '0', '\r', '\n', '1', '9', '1', ' ', '3', '6', ' ', '0', ' ', '0', '\r', '\n', })) for i := uint64(0); i < 100; i++ { v, err := s() if err != nil { t.Fatal(err) } if v != 127+i { t.Fatal("value mismatch") } } } func TestCollection(t *testing.T) { s := NewUniform(100) c := Collection(s, 100) if len(c) != 100 { t.Fatal("collection not full") } } func TestStringCollection(t *testing.T) { s := NewUniform(100) c := StringCollection(s, 100) if len(c) != 100 { t.Fatal("string collection not full") } } ristretto-0.0.1/sketch.go000066400000000000000000000072731356330504700153760ustar00rootroot00000000000000/* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // This package includes multiple probabalistic data structures needed for // admission/eviction metadata. Most are Counting Bloom Filter variations, but // a caching-specific feature that is also required is a "freshness" mechanism, // which basically serves as a "lifetime" process. This freshness mechanism // was described in the original TinyLFU paper [1], but other mechanisms may // be better suited for certain data distributions. // // [1]: https://arxiv.org/abs/1512.00727 package ristretto import ( "fmt" "math/rand" "time" ) // cmSketch is a Count-Min sketch implementation with 4-bit counters, heavily // based on Damian Gryski's CM4 [1]. // // [1]: https://github.com/dgryski/go-tinylfu/blob/master/cm4.go type cmSketch struct { rows [cmDepth]cmRow seed [cmDepth]uint64 mask uint64 } const ( // cmDepth is the number of counter copies to store (think of it as rows) cmDepth = 4 ) func newCmSketch(numCounters int64) *cmSketch { if numCounters == 0 { panic("cmSketch: bad numCounters") } // get the next power of 2 for better cache performance numCounters = next2Power(numCounters) sketch := &cmSketch{mask: uint64(numCounters - 1)} // initialize rows of counters and seeds source := rand.New(rand.NewSource(time.Now().UnixNano())) for i := 0; i < cmDepth; i++ { sketch.seed[i] = source.Uint64() sketch.rows[i] = newCmRow(numCounters) } return sketch } // Increment increments the count(ers) for the specified key. func (s *cmSketch) Increment(hashed uint64) { for i := range s.rows { s.rows[i].increment((hashed ^ s.seed[i]) & s.mask) } } // Estimate returns the value of the specified key. func (s *cmSketch) Estimate(hashed uint64) int64 { min := byte(255) for i := range s.rows { val := s.rows[i].get((hashed ^ s.seed[i]) & s.mask) if val < min { min = val } } return int64(min) } // Reset halves all counter values. func (s *cmSketch) Reset() { for _, r := range s.rows { r.reset() } } // Clear zeroes all counters. func (s *cmSketch) Clear() { for _, r := range s.rows { r.clear() } } // cmRow is a row of bytes, with each byte holding two counters type cmRow []byte func newCmRow(numCounters int64) cmRow { return make(cmRow, numCounters/2) } func (r cmRow) get(n uint64) byte { return byte(r[n/2]>>((n&1)*4)) & 0x0f } func (r cmRow) increment(n uint64) { // index of the counter i := n / 2 // shift distance (even 0, odd 4) s := (n & 1) * 4 // counter value v := (r[i] >> s) & 0x0f // only increment if not max value (overflow wrap is bad for LFU) if v < 15 { r[i] += 1 << s } } func (r cmRow) reset() { // halve each counter for i := range r { r[i] = (r[i] >> 1) & 0x77 } } func (r cmRow) clear() { // zero each counter for i := range r { r[i] = 0 } } func (r cmRow) string() string { s := "" for i := uint64(0); i < uint64(len(r)*2); i++ { s += fmt.Sprintf("%02d ", (r[(i/2)]>>((i&1)*4))&0x0f) } s = s[:len(s)-1] return s } // next2Power rounds x up to the next power of 2, if it's not already one. func next2Power(x int64) int64 { x-- x |= x >> 1 x |= x >> 2 x |= x >> 4 x |= x >> 8 x |= x >> 16 x |= x >> 32 x++ return x } ristretto-0.0.1/sketch_test.go000066400000000000000000000027421356330504700164310ustar00rootroot00000000000000package ristretto import ( "testing" ) func TestSketch(t *testing.T) { defer func() { if r := recover(); r == nil { t.Fatal("no panic with bad param numCounters") } }() s := newCmSketch(5) if s.mask != 7 { t.Fatal("not rounding up to next power of 2") } newCmSketch(0) } func TestSketchIncrement(t *testing.T) { s := newCmSketch(16) s.Increment(1) s.Increment(5) s.Increment(9) for i := 0; i < cmDepth; i++ { if s.rows[i].string() != s.rows[0].string() { break } if i == cmDepth-1 { t.Fatal("identical rows, bad seeding") } } } func TestSketchEstimate(t *testing.T) { s := newCmSketch(16) s.Increment(1) s.Increment(1) if s.Estimate(1) != 2 { t.Fatal("estimate should be 2") } if s.Estimate(0) != 0 { t.Fatal("estimate should be 0") } } func TestSketchReset(t *testing.T) { s := newCmSketch(16) s.Increment(1) s.Increment(1) s.Increment(1) s.Increment(1) s.Reset() if s.Estimate(1) != 2 { t.Fatal("reset failed, estimate should be 2") } } func TestSketchClear(t *testing.T) { s := newCmSketch(16) for i := 0; i < 16; i++ { s.Increment(uint64(i)) } s.Clear() for i := 0; i < 16; i++ { if s.Estimate(uint64(i)) != 0 { t.Fatal("clear failed") } } } func BenchmarkSketchIncrement(b *testing.B) { s := newCmSketch(16) b.SetBytes(1) for n := 0; n < b.N; n++ { s.Increment(1) } } func BenchmarkSketchEstimate(b *testing.B) { s := newCmSketch(16) s.Increment(1) b.SetBytes(1) for n := 0; n < b.N; n++ { s.Estimate(1) } } ristretto-0.0.1/store.go000066400000000000000000000100211356330504700152320ustar00rootroot00000000000000/* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ristretto import ( "sync" ) type storeItem struct { key uint64 conflict uint64 value interface{} } // store is the interface fulfilled by all hash map implementations in this // file. Some hash map implementations are better suited for certain data // distributions than others, so this allows us to abstract that out for use // in Ristretto. // // Every store is safe for concurrent usage. type store interface { // Get returns the value associated with the key parameter. Get(uint64, uint64) (interface{}, bool) // Set adds the key-value pair to the Map or updates the value if it's // already present. Set(uint64, uint64, interface{}) // Del deletes the key-value pair from the Map. Del(uint64, uint64) (uint64, interface{}) // Update attempts to update the key with a new value and returns true if // successful. Update(uint64, uint64, interface{}) bool // Clear clears all contents of the store. Clear() } // newStore returns the default store implementation. func newStore() store { return newShardedMap() } const numShards uint64 = 256 type shardedMap struct { shards []*lockedMap } func newShardedMap() *shardedMap { sm := &shardedMap{ shards: make([]*lockedMap, int(numShards)), } for i := range sm.shards { sm.shards[i] = newLockedMap() } return sm } func (sm *shardedMap) Get(key, conflict uint64) (interface{}, bool) { return sm.shards[key%numShards].Get(key, conflict) } func (sm *shardedMap) Set(key, conflict uint64, value interface{}) { sm.shards[key%numShards].Set(key, conflict, value) } func (sm *shardedMap) Del(key, conflict uint64) (uint64, interface{}) { return sm.shards[key%numShards].Del(key, conflict) } func (sm *shardedMap) Update(key, conflict uint64, value interface{}) bool { return sm.shards[key%numShards].Update(key, conflict, value) } func (sm *shardedMap) Clear() { for i := uint64(0); i < numShards; i++ { sm.shards[i].Clear() } } type lockedMap struct { sync.RWMutex data map[uint64]storeItem } func newLockedMap() *lockedMap { return &lockedMap{ data: make(map[uint64]storeItem), } } func (m *lockedMap) Get(key, conflict uint64) (interface{}, bool) { m.RLock() item, ok := m.data[key] m.RUnlock() if !ok { return nil, false } if conflict != 0 && (conflict != item.conflict) { return nil, false } return item.value, true } func (m *lockedMap) Set(key, conflict uint64, value interface{}) { m.Lock() item, ok := m.data[key] if !ok { m.data[key] = storeItem{ key: key, conflict: conflict, value: value, } m.Unlock() return } if conflict != 0 && (conflict != item.conflict) { m.Unlock() return } m.data[key] = storeItem{ key: key, conflict: conflict, value: value, } m.Unlock() } func (m *lockedMap) Del(key, conflict uint64) (uint64, interface{}) { m.Lock() item, ok := m.data[key] if !ok { m.Unlock() return 0, nil } if conflict != 0 && (conflict != item.conflict) { m.Unlock() return 0, nil } delete(m.data, key) m.Unlock() return item.conflict, item.value } func (m *lockedMap) Update(key, conflict uint64, value interface{}) bool { m.Lock() item, ok := m.data[key] if !ok { m.Unlock() return false } if conflict != 0 && (conflict != item.conflict) { m.Unlock() return false } m.data[key] = storeItem{ key: key, conflict: conflict, value: value, } m.Unlock() return true } func (m *lockedMap) Clear() { m.Lock() m.data = make(map[uint64]storeItem) m.Unlock() } ristretto-0.0.1/store_test.go000066400000000000000000000064221356330504700163030ustar00rootroot00000000000000package ristretto import ( "testing" "github.com/dgraph-io/ristretto/z" ) func TestStoreSetGet(t *testing.T) { s := newStore() key, conflict := z.KeyToHash(1) s.Set(key, conflict, 2) if val, ok := s.Get(key, conflict); (val == nil || !ok) || val.(int) != 2 { t.Fatal("set/get error") } s.Set(key, conflict, 3) if val, ok := s.Get(key, conflict); (val == nil || !ok) || val.(int) != 3 { t.Fatal("set/get overwrite error") } key, conflict = z.KeyToHash(2) s.Set(key, conflict, 2) if val, ok := s.Get(key, conflict); !ok || val.(int) != 2 { t.Fatal("set/get nil key error") } } func TestStoreDel(t *testing.T) { s := newStore() key, conflict := z.KeyToHash(1) s.Set(key, conflict, 1) s.Del(key, conflict) if val, ok := s.Get(key, conflict); val != nil || ok { t.Fatal("del error") } s.Del(2, 0) } func TestStoreClear(t *testing.T) { s := newStore() for i := uint64(0); i < 1000; i++ { key, conflict := z.KeyToHash(i) s.Set(key, conflict, i) } s.Clear() for i := uint64(0); i < 1000; i++ { key, conflict := z.KeyToHash(i) if val, ok := s.Get(key, conflict); val != nil || ok { t.Fatal("clear operation failed") } } } func TestStoreUpdate(t *testing.T) { s := newStore() key, conflict := z.KeyToHash(1) s.Set(key, conflict, 1) if updated := s.Update(key, conflict, 2); !updated { t.Fatal("value should have been updated") } if val, ok := s.Get(key, conflict); val == nil || !ok { t.Fatal("value was deleted") } if val, ok := s.Get(key, conflict); val.(int) != 2 || !ok { t.Fatal("value wasn't updated") } if !s.Update(key, conflict, 3) { t.Fatal("value should have been updated") } if val, ok := s.Get(key, conflict); val.(int) != 3 || !ok { t.Fatal("value wasn't updated") } key, conflict = z.KeyToHash(2) if updated := s.Update(key, conflict, 2); updated { t.Fatal("value should not have been updated") } if val, ok := s.Get(key, conflict); val != nil || ok { t.Fatal("value should not have been updated") } } func TestStoreCollision(t *testing.T) { s := newShardedMap() s.shards[1].Lock() s.shards[1].data[1] = storeItem{ key: 1, conflict: 0, value: 1, } s.shards[1].Unlock() if val, ok := s.Get(1, 1); val != nil || ok { t.Fatal("collision should return nil") } s.Set(1, 1, 2) if val, ok := s.Get(1, 0); !ok || val == nil || val.(int) == 2 { t.Fatal("collision should prevent Set update") } if s.Update(1, 1, 2) { t.Fatal("collision should prevent Update") } if val, ok := s.Get(1, 0); !ok || val == nil || val.(int) == 2 { t.Fatal("collision should prevent Update") } s.Del(1, 1) if val, ok := s.Get(1, 0); !ok || val == nil { t.Fatal("collision should prevent Del") } } func BenchmarkStoreGet(b *testing.B) { s := newStore() key, conflict := z.KeyToHash(1) s.Set(key, conflict, 1) b.SetBytes(1) b.RunParallel(func(pb *testing.PB) { for pb.Next() { s.Get(key, conflict) } }) } func BenchmarkStoreSet(b *testing.B) { s := newStore() key, conflict := z.KeyToHash(1) b.SetBytes(1) b.RunParallel(func(pb *testing.PB) { for pb.Next() { s.Set(key, conflict, 1) } }) } func BenchmarkStoreUpdate(b *testing.B) { s := newStore() key, conflict := z.KeyToHash(1) s.Set(key, conflict, 1) b.SetBytes(1) b.RunParallel(func(pb *testing.PB) { for pb.Next() { s.Update(key, conflict, 2) } }) } ristretto-0.0.1/stress_test.go000066400000000000000000000067311356330504700164750ustar00rootroot00000000000000package ristretto import ( "container/heap" "fmt" "math/rand" "runtime" "sync" "testing" "time" "github.com/dgraph-io/ristretto/sim" ) func TestStressSetGet(t *testing.T) { c, err := NewCache(&Config{ NumCounters: 1000, MaxCost: 100, BufferItems: 64, Metrics: true, }) if err != nil { panic(err) } for i := 0; i < 100; i++ { c.Set(i, i, 1) } time.Sleep(wait) wg := &sync.WaitGroup{} for i := 0; i < runtime.GOMAXPROCS(0); i++ { wg.Add(1) go func() { r := rand.New(rand.NewSource(time.Now().UnixNano())) for a := 0; a < 1000; a++ { k := r.Int() % 10 if val, ok := c.Get(k); val == nil || !ok { err = fmt.Errorf("expected %d but got nil", k) break } else if val != nil && val.(int) != k { err = fmt.Errorf("expected %d but got %d", k, val.(int)) break } } wg.Done() }() } wg.Wait() if err != nil { t.Fatal(err) } if r := c.Metrics.Ratio(); r != 1.0 { t.Fatalf("hit ratio should be 1.0 but got %.2f\n", r) } } func TestStressHitRatio(t *testing.T) { key := sim.NewZipfian(1.0001, 1, 1000) c, err := NewCache(&Config{ NumCounters: 1000, MaxCost: 100, BufferItems: 64, Metrics: true, }) if err != nil { panic(err) } o := NewClairvoyant(100) for i := 0; i < 10000; i++ { k, err := key() if err != nil { panic(err) } if _, ok := o.Get(k); !ok { o.Set(k, k, 1) } if _, ok := c.Get(k); !ok { c.Set(k, k, 1) } } t.Logf("actual: %.2f, optimal: %.2f", c.Metrics.Ratio(), o.Metrics().Ratio()) } // Clairvoyant is a mock cache providing us with optimal hit ratios to compare // with Ristretto's. It looks ahead and evicts the absolute least valuable item, // which we try to approximate in a real cache. type Clairvoyant struct { capacity uint64 hits map[uint64]uint64 access []uint64 } func NewClairvoyant(capacity uint64) *Clairvoyant { return &Clairvoyant{ capacity: capacity, hits: make(map[uint64]uint64), access: make([]uint64, 0), } } // Get just records the cache access so that we can later take this event into // consideration when calculating the absolute least valuable item to evict. func (c *Clairvoyant) Get(key interface{}) (interface{}, bool) { c.hits[key.(uint64)]++ c.access = append(c.access, key.(uint64)) return nil, false } // Set isn't important because it is only called after a Get (in the case of our // hit ratio benchmarks, at least). func (c *Clairvoyant) Set(key, value interface{}, cost int64) bool { return false } func (c *Clairvoyant) Metrics() *Metrics { stat := newMetrics() look := make(map[uint64]struct{}, c.capacity) data := &clairvoyantHeap{} heap.Init(data) for _, key := range c.access { if _, has := look[key]; has { stat.add(hit, 0, 1) continue } if uint64(data.Len()) >= c.capacity { victim := heap.Pop(data) delete(look, victim.(*clairvoyantItem).key) } stat.add(miss, 0, 1) look[key] = struct{}{} heap.Push(data, &clairvoyantItem{key, c.hits[key]}) } return stat } type clairvoyantItem struct { key uint64 hits uint64 } type clairvoyantHeap []*clairvoyantItem func (h clairvoyantHeap) Len() int { return len(h) } func (h clairvoyantHeap) Less(i, j int) bool { return h[i].hits < h[j].hits } func (h clairvoyantHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } func (h *clairvoyantHeap) Push(x interface{}) { *h = append(*h, x.(*clairvoyantItem)) } func (h *clairvoyantHeap) Pop() interface{} { old := *h n := len(old) x := old[n-1] *h = old[0 : n-1] return x } ristretto-0.0.1/z/000077500000000000000000000000001356330504700140265ustar00rootroot00000000000000ristretto-0.0.1/z/LICENSE000066400000000000000000000055711356330504700150430ustar00rootroot00000000000000bbloom.go // The MIT License (MIT) // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt // Permission is hereby granted, free of charge, to any person obtaining a copy of // this software and associated documentation files (the "Software"), to deal in // the Software without restriction, including without limitation the rights to // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of // the Software, and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. rtutil.go // MIT License // Copyright (c) 2019 Ewan Chou // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. Modifications: /* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ ristretto-0.0.1/z/README.md000066400000000000000000000125101356330504700153040ustar00rootroot00000000000000## bbloom: a bitset Bloom filter for go/golang === package implements a fast bloom filter with real 'bitset' and JSONMarshal/JSONUnmarshal to store/reload the Bloom filter. NOTE: the package uses unsafe.Pointer to set and read the bits from the bitset. If you're uncomfortable with using the unsafe package, please consider using my bloom filter package at github.com/AndreasBriese/bloom === changelog 11/2015: new thread safe methods AddTS(), HasTS(), AddIfNotHasTS() following a suggestion from Srdjan Marinovic (github @a-little-srdjan), who used this to code a bloomfilter cache. This bloom filter was developed to strengthen a website-log database and was tested and optimized for this log-entry mask: "2014/%02i/%02i %02i:%02i:%02i /info.html". Nonetheless bbloom should work with any other form of entries. ~~Hash function is a modified Berkeley DB sdbm hash (to optimize for smaller strings). sdbm http://www.cse.yorku.ca/~oz/hash.html~~ Found sipHash (SipHash-2-4, a fast short-input PRF created by Jean-Philippe Aumasson and Daniel J. Bernstein.) to be about as fast. sipHash had been ported by Dimtry Chestnyk to Go (github.com/dchest/siphash ) Minimum hashset size is: 512 ([4]uint64; will be set automatically). ###install ```sh go get github.com/AndreasBriese/bbloom ``` ###test + change to folder ../bbloom + create wordlist in file "words.txt" (you might use `python permut.py`) + run 'go test -bench=.' within the folder ```go go test -bench=. ``` ~~If you've installed the GOCONVEY TDD-framework http://goconvey.co/ you can run the tests automatically.~~ using go's testing framework now (have in mind that the op timing is related to 65536 operations of Add, Has, AddIfNotHas respectively) ### usage after installation add ```go import ( ... "github.com/AndreasBriese/bbloom" ... ) ``` at your header. In the program use ```go // create a bloom filter for 65536 items and 1 % wrong-positive ratio bf := bbloom.New(float64(1<<16), float64(0.01)) // or // create a bloom filter with 650000 for 65536 items and 7 locs per hash explicitly // bf = bbloom.New(float64(650000), float64(7)) // or bf = bbloom.New(650000.0, 7.0) // add one item bf.Add([]byte("butter")) // Number of elements added is exposed now // Note: ElemNum will not be included in JSON export (for compatability to older version) nOfElementsInFilter := bf.ElemNum // check if item is in the filter isIn := bf.Has([]byte("butter")) // should be true isNotIn := bf.Has([]byte("Butter")) // should be false // 'add only if item is new' to the bloomfilter added := bf.AddIfNotHas([]byte("butter")) // should be false because 'butter' is already in the set added = bf.AddIfNotHas([]byte("buTTer")) // should be true because 'buTTer' is new // thread safe versions for concurrent use: AddTS, HasTS, AddIfNotHasTS // add one item bf.AddTS([]byte("peanutbutter")) // check if item is in the filter isIn = bf.HasTS([]byte("peanutbutter")) // should be true isNotIn = bf.HasTS([]byte("peanutButter")) // should be false // 'add only if item is new' to the bloomfilter added = bf.AddIfNotHasTS([]byte("butter")) // should be false because 'peanutbutter' is already in the set added = bf.AddIfNotHasTS([]byte("peanutbuTTer")) // should be true because 'penutbuTTer' is new // convert to JSON ([]byte) Json := bf.JSONMarshal() // bloomfilters Mutex is exposed for external un-/locking // i.e. mutex lock while doing JSON conversion bf.Mtx.Lock() Json = bf.JSONMarshal() bf.Mtx.Unlock() // restore a bloom filter from storage bfNew := bbloom.JSONUnmarshal(Json) isInNew := bfNew.Has([]byte("butter")) // should be true isNotInNew := bfNew.Has([]byte("Butter")) // should be false ``` to work with the bloom filter. ### why 'fast'? It's about 3 times faster than William Fitzgeralds bitset bloom filter https://github.com/willf/bloom . And it is about so fast as my []bool set variant for Boom filters (see https://github.com/AndreasBriese/bloom ) but having a 8times smaller memory footprint: Bloom filter (filter size 524288, 7 hashlocs) github.com/AndreasBriese/bbloom 'Add' 65536 items (10 repetitions): 6595800 ns (100 ns/op) github.com/AndreasBriese/bbloom 'Has' 65536 items (10 repetitions): 5986600 ns (91 ns/op) github.com/AndreasBriese/bloom 'Add' 65536 items (10 repetitions): 6304684 ns (96 ns/op) github.com/AndreasBriese/bloom 'Has' 65536 items (10 repetitions): 6568663 ns (100 ns/op) github.com/willf/bloom 'Add' 65536 items (10 repetitions): 24367224 ns (371 ns/op) github.com/willf/bloom 'Test' 65536 items (10 repetitions): 21881142 ns (333 ns/op) github.com/dataence/bloom/standard 'Add' 65536 items (10 repetitions): 23041644 ns (351 ns/op) github.com/dataence/bloom/standard 'Check' 65536 items (10 repetitions): 19153133 ns (292 ns/op) github.com/cabello/bloom 'Add' 65536 items (10 repetitions): 131921507 ns (2012 ns/op) github.com/cabello/bloom 'Contains' 65536 items (10 repetitions): 131108962 ns (2000 ns/op) (on MBPro15 OSX10.8.5 i7 4Core 2.4Ghz) With 32bit bloom filters (bloom32) using modified sdbm, bloom32 does hashing with only 2 bit shifts, one xor and one substraction per byte. smdb is about as fast as fnv64a but gives less collisions with the dataset (see mask above). bloom.New(float64(10 * 1<<16),float64(7)) populated with 1<<16 random items from the dataset (see above) and tested against the rest results in less than 0.05% collisions. ristretto-0.0.1/z/bbloom.go000066400000000000000000000136121356330504700156320ustar00rootroot00000000000000// The MIT License (MIT) // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt // Permission is hereby granted, free of charge, to any person obtaining a copy of // this software and associated documentation files (the "Software"), to deal in // the Software without restriction, including without limitation the rights to // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of // the Software, and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. package z import ( "bytes" "encoding/json" "log" "math" "unsafe" ) // helper var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128} func getSize(ui64 uint64) (size uint64, exponent uint64) { if ui64 < uint64(512) { ui64 = uint64(512) } size = uint64(1) for size < ui64 { size <<= 1 exponent++ } return size, exponent } func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) { size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2) locs := math.Ceil(float64(0.69314718056) * size / numEntries) return uint64(size), uint64(locs) } // NewBloomFilter returns a new bloomfilter. func NewBloomFilter(params ...float64) (bloomfilter *Bloom) { var entries, locs uint64 if len(params) == 2 { if params[1] < 1 { entries, locs = calcSizeByWrongPositives(params[0], params[1]) } else { entries, locs = uint64(params[0]), uint64(params[1]) } } else { log.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" + " i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," + " float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))") } size, exponent := getSize(entries) bloomfilter = &Bloom{ sizeExp: exponent, size: size - 1, setLocs: locs, shift: 64 - exponent, } bloomfilter.Size(size) return bloomfilter } // Bloom filter type Bloom struct { bitset []uint64 ElemNum uint64 sizeExp uint64 size uint64 setLocs uint64 shift uint64 } // <--- http://www.cse.yorku.ca/~oz/hash.html // modified Berkeley DB Hash (32bit) // hash is casted to l, h = 16bit fragments // func (bl Bloom) absdbm(b *[]byte) (l, h uint64) { // hash := uint64(len(*b)) // for _, c := range *b { // hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash // } // h = hash >> bl.shift // l = hash << bl.shift >> bl.shift // return l, h // } // Add adds hash of a key to the bloomfilter. func (bl *Bloom) Add(hash uint64) { h := hash >> bl.shift l := hash << bl.shift >> bl.shift for i := uint64(0); i < bl.setLocs; i++ { bl.Set((h + i*l) & bl.size) bl.ElemNum++ } } // Has checks if bit(s) for entry hash is/are set, // returns true if the hash was added to the Bloom Filter. func (bl Bloom) Has(hash uint64) bool { h := hash >> bl.shift l := hash << bl.shift >> bl.shift for i := uint64(0); i < bl.setLocs; i++ { switch bl.IsSet((h + i*l) & bl.size) { case false: return false } } return true } // AddIfNotHas only Adds hash, if it's not present in the bloomfilter. // Returns true if hash was added. // Returns false if hash was already registered in the bloomfilter. func (bl *Bloom) AddIfNotHas(hash uint64) bool { if bl.Has(hash) { return false } bl.Add(hash) return true } // Size makes Bloom filter with as bitset of size sz. func (bl *Bloom) Size(sz uint64) { bl.bitset = make([]uint64, sz>>6) } // Clear resets the Bloom filter. func (bl *Bloom) Clear() { for i := range bl.bitset { bl.bitset[i] = 0 } } // Set sets the bit[idx] of bitset. func (bl *Bloom) Set(idx uint64) { ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) *(*uint8)(ptr) |= mask[idx%8] } // IsSet checks if bit[idx] of bitset is set, returns true/false. func (bl *Bloom) IsSet(idx uint64) bool { ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3)) r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1 return r == 1 } // bloomJSONImExport // Im/Export structure used by JSONMarshal / JSONUnmarshal type bloomJSONImExport struct { FilterSet []byte SetLocs uint64 } // NewWithBoolset takes a []byte slice and number of locs per entry, // returns the bloomfilter with a bitset populated according to the input []byte. func newWithBoolset(bs *[]byte, locs uint64) *Bloom { bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs)) for i, b := range *bs { *(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b } return bloomfilter } // JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes // returns bloom32 / bloom64 object. func JSONUnmarshal(dbData []byte) *Bloom { bloomImEx := bloomJSONImExport{} json.Unmarshal(dbData, &bloomImEx) buf := bytes.NewBuffer(bloomImEx.FilterSet) bs := buf.Bytes() bf := newWithBoolset(&bs, bloomImEx.SetLocs) return bf } // JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte. func (bl Bloom) JSONMarshal() []byte { bloomImEx := bloomJSONImExport{} bloomImEx.SetLocs = bl.setLocs bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3) for i := range bloomImEx.FilterSet { bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) + uintptr(i))) } data, err := json.Marshal(bloomImEx) if err != nil { log.Fatal("json.Marshal failed: ", err) } return data } ristretto-0.0.1/z/bbloom_test.go000066400000000000000000000041421356330504700166670ustar00rootroot00000000000000package z import ( "crypto/rand" "fmt" "testing" ) var ( wordlist1 [][]byte n = 1 << 16 bf *Bloom ) func TestMain(m *testing.M) { wordlist1 = make([][]byte, n) for i := range wordlist1 { b := make([]byte, 32) rand.Read(b) wordlist1[i] = b } fmt.Println("\n###############\nbbloom_test.go") fmt.Print("Benchmarks relate to 2**16 OP. --> output/65536 op/ns\n###############\n\n") m.Run() } func TestM_NumberOfWrongs(t *testing.T) { bf = NewBloomFilter(float64(n*10), float64(7)) cnt := 0 for i := range wordlist1 { hash := MemHash(wordlist1[i]) if !bf.AddIfNotHas(hash) { cnt++ } } fmt.Printf("Bloomfilter New(7* 2**16, 7) (-> size=%v bit): \n Check for 'false positives': %v wrong positive 'Has' results on 2**16 entries => %v %%\n", len(bf.bitset)<<6, cnt, float64(cnt)/float64(n)) } func TestM_JSON(t *testing.T) { const shallBe = int(1 << 16) bf = NewBloomFilter(float64(n*10), float64(7)) cnt := 0 for i := range wordlist1 { hash := MemHash(wordlist1[i]) if !bf.AddIfNotHas(hash) { cnt++ } } Json := bf.JSONMarshal() // create new bloomfilter from bloomfilter's JSON representation bf2 := JSONUnmarshal(Json) cnt2 := 0 for i := range wordlist1 { hash := MemHash(wordlist1[i]) if !bf2.AddIfNotHas(hash) { cnt2++ } } if cnt2 != shallBe { t.Errorf("FAILED !AddIfNotHasBytes = %v; want %v", cnt2, shallBe) } } func BenchmarkM_New(b *testing.B) { for r := 0; r < b.N; r++ { _ = NewBloomFilter(float64(n*10), float64(7)) } } func BenchmarkM_Clear(b *testing.B) { bf = NewBloomFilter(float64(n*10), float64(7)) for i := range wordlist1 { hash := MemHash(wordlist1[i]) bf.Add(hash) } b.ResetTimer() for r := 0; r < b.N; r++ { bf.Clear() } } func BenchmarkM_Add(b *testing.B) { bf = NewBloomFilter(float64(n*10), float64(7)) b.ResetTimer() for r := 0; r < b.N; r++ { for i := range wordlist1 { hash := MemHash(wordlist1[i]) bf.Add(hash) } } } func BenchmarkM_Has(b *testing.B) { b.ResetTimer() for r := 0; r < b.N; r++ { for i := range wordlist1 { hash := MemHash(wordlist1[i]) bf.Has(hash) } } } ristretto-0.0.1/z/rtutil.go000066400000000000000000000046561356330504700157130ustar00rootroot00000000000000// MIT License // Copyright (c) 2019 Ewan Chou // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. package z import ( "unsafe" ) // NanoTime returns the current time in nanoseconds from a monotonic clock. //go:linkname NanoTime runtime.nanotime func NanoTime() int64 // CPUTicks is a faster alternative to NanoTime to measure time duration. //go:linkname CPUTicks runtime.cputicks func CPUTicks() int64 type stringStruct struct { str unsafe.Pointer len int } //go:noescape //go:linkname memhash runtime.memhash func memhash(p unsafe.Pointer, h, s uintptr) uintptr // MemHash is the hash function used by go map, it utilizes available hardware instructions(behaves // as aeshash if aes instruction is available). // NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash. func MemHash(data []byte) uint64 { ss := (*stringStruct)(unsafe.Pointer(&data)) return uint64(memhash(ss.str, 0, uintptr(ss.len))) } // MemHashString is the hash function used by go map, it utilizes available hardware instructions // (behaves as aeshash if aes instruction is available). // NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash. func MemHashString(str string) uint64 { ss := (*stringStruct)(unsafe.Pointer(&str)) return uint64(memhash(ss.str, 0, uintptr(ss.len))) } // FastRand is a fast thread local random function. //go:linkname FastRand runtime.fastrand func FastRand() uint32 ristretto-0.0.1/z/rtutil.s000066400000000000000000000000001356330504700155230ustar00rootroot00000000000000ristretto-0.0.1/z/rtutil_test.go000066400000000000000000000074221356330504700167440ustar00rootroot00000000000000package z import ( "crypto/rand" "hash/fnv" "testing" "github.com/dgryski/go-farm" ) func BenchmarkMemHash(b *testing.B) { buf := make([]byte, 64) rand.Read(buf) for i := 0; i < b.N; i++ { MemHash(buf) } } func BenchmarkSip(b *testing.B) { buf := make([]byte, 64) rand.Read(buf) for i := 0; i < b.N; i++ { SipHash(buf) } } func BenchmarkFarm(b *testing.B) { buf := make([]byte, 64) rand.Read(buf) for i := 0; i < b.N; i++ { farm.Fingerprint64(buf) } } func BenchmarkFnv(b *testing.B) { buf := make([]byte, 64) rand.Read(buf) f := fnv.New64a() for i := 0; i < b.N; i++ { f.Write(buf) f.Sum64() f.Reset() } } func SipHash(p []byte) (l, h uint64) { // Initialization. v0 := uint64(8317987320269560794) // k0 ^ 0x736f6d6570736575 v1 := uint64(7237128889637516672) // k1 ^ 0x646f72616e646f6d v2 := uint64(7816392314733513934) // k0 ^ 0x6c7967656e657261 v3 := uint64(8387220255325274014) // k1 ^ 0x7465646279746573 t := uint64(len(p)) << 56 // Compression. for len(p) >= 8 { m := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 | uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56 v3 ^= m // Round 1. v0 += v1 v1 = v1<<13 | v1>>51 v1 ^= v0 v0 = v0<<32 | v0>>32 v2 += v3 v3 = v3<<16 | v3>>48 v3 ^= v2 v0 += v3 v3 = v3<<21 | v3>>43 v3 ^= v0 v2 += v1 v1 = v1<<17 | v1>>47 v1 ^= v2 v2 = v2<<32 | v2>>32 // Round 2. v0 += v1 v1 = v1<<13 | v1>>51 v1 ^= v0 v0 = v0<<32 | v0>>32 v2 += v3 v3 = v3<<16 | v3>>48 v3 ^= v2 v0 += v3 v3 = v3<<21 | v3>>43 v3 ^= v0 v2 += v1 v1 = v1<<17 | v1>>47 v1 ^= v2 v2 = v2<<32 | v2>>32 v0 ^= m p = p[8:] } // Compress last block. switch len(p) { case 7: t |= uint64(p[6]) << 48 fallthrough case 6: t |= uint64(p[5]) << 40 fallthrough case 5: t |= uint64(p[4]) << 32 fallthrough case 4: t |= uint64(p[3]) << 24 fallthrough case 3: t |= uint64(p[2]) << 16 fallthrough case 2: t |= uint64(p[1]) << 8 fallthrough case 1: t |= uint64(p[0]) } v3 ^= t // Round 1. v0 += v1 v1 = v1<<13 | v1>>51 v1 ^= v0 v0 = v0<<32 | v0>>32 v2 += v3 v3 = v3<<16 | v3>>48 v3 ^= v2 v0 += v3 v3 = v3<<21 | v3>>43 v3 ^= v0 v2 += v1 v1 = v1<<17 | v1>>47 v1 ^= v2 v2 = v2<<32 | v2>>32 // Round 2. v0 += v1 v1 = v1<<13 | v1>>51 v1 ^= v0 v0 = v0<<32 | v0>>32 v2 += v3 v3 = v3<<16 | v3>>48 v3 ^= v2 v0 += v3 v3 = v3<<21 | v3>>43 v3 ^= v0 v2 += v1 v1 = v1<<17 | v1>>47 v1 ^= v2 v2 = v2<<32 | v2>>32 v0 ^= t // Finalization. v2 ^= 0xff // Round 1. v0 += v1 v1 = v1<<13 | v1>>51 v1 ^= v0 v0 = v0<<32 | v0>>32 v2 += v3 v3 = v3<<16 | v3>>48 v3 ^= v2 v0 += v3 v3 = v3<<21 | v3>>43 v3 ^= v0 v2 += v1 v1 = v1<<17 | v1>>47 v1 ^= v2 v2 = v2<<32 | v2>>32 // Round 2. v0 += v1 v1 = v1<<13 | v1>>51 v1 ^= v0 v0 = v0<<32 | v0>>32 v2 += v3 v3 = v3<<16 | v3>>48 v3 ^= v2 v0 += v3 v3 = v3<<21 | v3>>43 v3 ^= v0 v2 += v1 v1 = v1<<17 | v1>>47 v1 ^= v2 v2 = v2<<32 | v2>>32 // Round 3. v0 += v1 v1 = v1<<13 | v1>>51 v1 ^= v0 v0 = v0<<32 | v0>>32 v2 += v3 v3 = v3<<16 | v3>>48 v3 ^= v2 v0 += v3 v3 = v3<<21 | v3>>43 v3 ^= v0 v2 += v1 v1 = v1<<17 | v1>>47 v1 ^= v2 v2 = v2<<32 | v2>>32 // Round 4. v0 += v1 v1 = v1<<13 | v1>>51 v1 ^= v0 v0 = v0<<32 | v0>>32 v2 += v3 v3 = v3<<16 | v3>>48 v3 ^= v2 v0 += v3 v3 = v3<<21 | v3>>43 v3 ^= v0 v2 += v1 v1 = v1<<17 | v1>>47 v1 ^= v2 v2 = v2<<32 | v2>>32 // return v0 ^ v1 ^ v2 ^ v3 hash := v0 ^ v1 ^ v2 ^ v3 h = hash >> 1 l = hash << 1 >> 1 return l, h } func BenchmarkNanoTime(b *testing.B) { for i := 0; i < b.N; i++ { NanoTime() } } func BenchmarkCPUTicks(b *testing.B) { for i := 0; i < b.N; i++ { CPUTicks() } } func BenchmarkFastRand(b *testing.B) { for i := 0; i < b.N; i++ { FastRand() } } ristretto-0.0.1/z/z.go000066400000000000000000000031141356330504700146250ustar00rootroot00000000000000/* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package z import ( "github.com/cespare/xxhash" ) // TODO: Figure out a way to re-use memhash for the second uint64 hash, we // already know that appending bytes isn't reliable for generating a // second hash (see Ristretto PR #88). // // We also know that while the Go runtime has a runtime memhash128 // function, it's not possible to use it to generate [2]uint64 or // anything resembling a 128bit hash, even though that's exactly what // we need in this situation. func KeyToHash(key interface{}) (uint64, uint64) { if key == nil { return 0, 0 } switch k := key.(type) { case uint64: return k, 0 case string: raw := []byte(k) return MemHash(raw), xxhash.Sum64(raw) case []byte: return MemHash(k), xxhash.Sum64(k) case byte: return uint64(k), 0 case int: return uint64(k), 0 case int32: return uint64(k), 0 case uint32: return uint64(k), 0 case int64: return uint64(k), 0 default: panic("Key type not supported") } } ristretto-0.0.1/z/z_test.go000066400000000000000000000030301356330504700156610ustar00rootroot00000000000000/* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package z import ( "math" "testing" ) func verifyHashProduct(t *testing.T, wantKey, wantConflict, key, conflict uint64) { if wantKey != key || wantConflict != conflict { t.Errorf("expected (%d, %d) but got (%d, %d)\n", wantKey, wantConflict, key, conflict) } } func TestKeyToHash(t *testing.T) { var key uint64 var conflict uint64 key, conflict = KeyToHash(uint64(1)) verifyHashProduct(t, 1, 0, key, conflict) key, conflict = KeyToHash(1) verifyHashProduct(t, 1, 0, key, conflict) key, conflict = KeyToHash(int32(2)) verifyHashProduct(t, 2, 0, key, conflict) key, conflict = KeyToHash(int32(-2)) verifyHashProduct(t, math.MaxUint64-1, 0, key, conflict) key, conflict = KeyToHash(int64(-2)) verifyHashProduct(t, math.MaxUint64-1, 0, key, conflict) key, conflict = KeyToHash(uint32(3)) verifyHashProduct(t, 3, 0, key, conflict) key, conflict = KeyToHash(int64(3)) verifyHashProduct(t, 3, 0, key, conflict) }