pax_global_header 0000666 0000000 0000000 00000000064 13563305047 0014520 g ustar 00root root 0000000 0000000 52 comment=99d1bbbf28e64530eb246be0568fc7709a35ebdd
ristretto-0.0.1/ 0000775 0000000 0000000 00000000000 13563305047 0013555 5 ustar 00root root 0000000 0000000 ristretto-0.0.1/.deepsource.toml 0000664 0000000 0000000 00000000275 13563305047 0016672 0 ustar 00root root 0000000 0000000 version = 1
test_patterns = [
'**/*_test.go'
]
exclude_patterns = [
]
[[analyzers]]
name = 'go'
enabled = true
[analyzers.meta]
import_path = 'github.com/dgraph-io/ristretto'
ristretto-0.0.1/.github/ 0000775 0000000 0000000 00000000000 13563305047 0015115 5 ustar 00root root 0000000 0000000 ristretto-0.0.1/.github/CODEOWNERS 0000664 0000000 0000000 00000000316 13563305047 0016510 0 ustar 00root root 0000000 0000000 # CODEOWNERS info: https://help.github.com/en/articles/about-code-owners
# Owners are automatically requested for review for PRs that changes code
# that they own.
* @manishrjain @jarifibrahim @karlmcguire
ristretto-0.0.1/.github/workflows/ 0000775 0000000 0000000 00000000000 13563305047 0017152 5 ustar 00root root 0000000 0000000 ristretto-0.0.1/.github/workflows/ci.yml 0000664 0000000 0000000 00000000656 13563305047 0020277 0 ustar 00root root 0000000 0000000 name: tests
on: [push, pull_request]
jobs:
ci:
strategy:
matrix:
go-version: [1.12.x, 1.13.x]
platform: [ubuntu-latest]
name: CI
runs-on: ${{ matrix.platform }}
steps:
- uses: actions/checkout@v1
- uses: actions/setup-go@v1
with:
go-version: ${{ matrix.go-version }}
- run: go fmt ./...
- run: go test -race ./...
- run: go test -v ./...
ristretto-0.0.1/LICENSE 0000664 0000000 0000000 00000023675 13563305047 0014577 0 ustar 00root root 0000000 0000000 Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
ristretto-0.0.1/README.md 0000664 0000000 0000000 00000020534 13563305047 0015040 0 ustar 00root root 0000000 0000000 # Ristretto
[](http://godoc.org/github.com/dgraph-io/ristretto)
[](https://goreportcard.com/report/github.com/dgraph-io/ristretto)
[](https://gocover.io/github.com/dgraph-io/ristretto)

Ristretto is a fast, concurrent cache library built with a focus on performance and correctness.
The motivation to build Ristretto comes from the need for a contention-free
cache in [Dgraph][].
[Dgraph]: https://github.com/dgraph-io/dgraph
## Features
* **High Hit Ratios** - with our unique admission/eviction policy pairing, Ristretto's performance is best in class.
* **Eviction: SampledLFU** - on par with exact LRU and better performance on Search and Database traces.
* **Admission: TinyLFU** - extra performance with little memory overhead (12 bits per counter).
* **Fast Throughput** - we use a variety of techniques for managing contention and the result is excellent throughput.
* **Cost-Based Eviction** - any large new item deemed valuable can evict multiple smaller items (cost could be anything).
* **Fully Concurrent** - you can use as many goroutines as you want with little throughput degradation.
* **Metrics** - optional performance metrics for throughput, hit ratios, and other stats.
* **Simple API** - just figure out your ideal `Config` values and you're off and running.
## Status
Ristretto is usable but still under active development. We expect it to be production ready in the near future.
## Table of Contents
* [Usage](#Usage)
* [Example](#Example)
* [Config](#Config)
* [NumCounters](#Config)
* [MaxCost](#Config)
* [BufferItems](#Config)
* [Metrics](#Config)
* [OnEvict](#Config)
* [KeyToHash](#Config)
* [Cost](#Config)
* [Benchmarks](#Benchmarks)
* [Hit Ratios](#Hit-Ratios)
* [Search](#Search)
* [Database](#Database)
* [Looping](#Looping)
* [CODASYL](#CODASYL)
* [Throughput](#Throughput)
* [Mixed](#Mixed)
* [Read](#Read)
* [Write](#Write)
* [FAQ](#FAQ)
## Usage
### Example
```go
func main() {
cache, err := ristretto.NewCache(&ristretto.Config{
NumCounters: 1e7, // number of keys to track frequency of (10M).
MaxCost: 1 << 30, // maximum cost of cache (1GB).
BufferItems: 64, // number of keys per Get buffer.
})
if err != nil {
panic(err)
}
// set a value with a cost of 1
cache.Set("key", "value", 1)
// wait for value to pass through buffers
time.Sleep(10 * time.Millisecond)
value, found := cache.Get("key")
if !found {
panic("missing value")
}
fmt.Println(value)
cache.Del("key")
}
```
### Config
The `Config` struct is passed to `NewCache` when creating Ristretto instances (see the example above).
**NumCounters** `int64`
NumCounters is the number of 4-bit access counters to keep for admission and eviction. We've seen good performance in setting this to 10x the number of items you expect to keep in the cache when full.
For example, if you expect each item to have a cost of 1 and MaxCost is 100, set NumCounters to 1,000. Or, if you use variable cost values but expect the cache to hold around 10,000 items when full, set NumCounters to 100,000. The important thing is the *number of unique items* in the full cache, not necessarily the MaxCost value.
**MaxCost** `int64`
MaxCost is how eviction decisions are made. For example, if MaxCost is 100 and a new item with a cost of 1 increases total cache cost to 101, 1 item will be evicted.
MaxCost can also be used to denote the max size in bytes. For example, if MaxCost is 1,000,000 (1MB) and the cache is full with 1,000 1KB items, a new item (that's accepted) would cause 5 1KB items to be evicted.
MaxCost could be anything as long as it matches how you're using the cost values when calling Set.
**BufferItems** `int64`
BufferItems is the size of the Get buffers. The best value we've found for this is 64.
If for some reason you see Get performance decreasing with lots of contention (you shouldn't), try increasing this value in increments of 64. This is a fine-tuning mechanism and you probably won't have to touch this.
**Metrics** `bool`
Metrics is true when you want real-time logging of a variety of stats. The reason this is a Config flag is because there's a 10% throughput performance overhead.
**OnEvict** `func(hashes [2]uint64, value interface{}, cost int64)`
OnEvict is called for every eviction.
**KeyToHash** `func(key interface{}) [2]uint64`
KeyToHash is the hashing algorithm used for every key. If this is nil, Ristretto has a variety of [defaults depending on the underlying interface type](https://github.com/dgraph-io/ristretto/blob/master/z/z.go#L19-L41).
Note that if you want 128bit hashes you should use the full `[2]uint64`,
otherwise just fill the `uint64` at the `0` position and it will behave like
any 64bit hash.
**Cost** `func(value interface{}) int64`
Cost is an optional function you can pass to the Config in order to evaluate
item cost at runtime, and only for the Set calls that aren't dropped (this is
useful if calculating item cost is particularly expensive and you don't want to
waste time on items that will be dropped anyways).
To signal to Ristretto that you'd like to use this Cost function:
1. Set the Cost field to a non-nil function.
2. When calling Set for new items or item updates, use a `cost` of 0.
## Benchmarks
The benchmarks can be found in https://github.com/dgraph-io/benchmarks/tree/master/cachebench/ristretto.
### Hit Ratios
#### Search
This trace is described as "disk read accesses initiated by a large commercial
search engine in response to various web search requests."
#### Database
This trace is described as "a database server running at a commercial site
running an ERP application on top of a commercial database."
#### Looping
This trace demonstrates a looping access pattern.
#### CODASYL
This trace is described as "references to a CODASYL database for a one hour
period."
### Throughput
All throughput benchmarks were ran on an Intel Core i7-8700K (3.7GHz) with 16gb
of RAM.
#### Mixed
#### Read
#### Write
## FAQ
### How are you achieving this performance? What shortcuts are you taking?
We go into detail in the [Ristretto blog post](https://blog.dgraph.io/post/introducing-ristretto-high-perf-go-cache/), but in short: our throughput performance can be attributed to a mix of batching and eventual consistency. Our hit ratio performance is mostly due to an excellent [admission policy](https://arxiv.org/abs/1512.00727) and SampledLFU eviction policy.
As for "shortcuts," the only thing Ristretto does that could be construed as one is dropping some Set calls. That means a Set call for a new item (updates are guaranteed) isn't guaranteed to make it into the cache. The new item could be dropped at two points: when passing through the Set buffer or when passing through the admission policy. However, this doesn't affect hit ratios much at all as we expect the most popular items to be Set multiple times and eventually make it in the cache.
### Is Ristretto distributed?
No, it's just like any other Go library that you can import into your project and use in a single process.
ristretto-0.0.1/cache.go 0000664 0000000 0000000 00000033444 13563305047 0015157 0 ustar 00root root 0000000 0000000 /*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Ristretto is a fast, fixed size, in-memory cache with a dual focus on
// throughput and hit ratio performance. You can easily add Ristretto to an
// existing system and keep the most valuable data where you need it.
package ristretto
import (
"bytes"
"errors"
"fmt"
"sync/atomic"
"github.com/dgraph-io/ristretto/z"
)
const (
// TODO: find the optimal value for this or make it configurable
setBufSize = 32 * 1024
)
// Cache is a thread-safe implementation of a hashmap with a TinyLFU admission
// policy and a Sampled LFU eviction policy. You can use the same Cache instance
// from as many goroutines as you want.
type Cache struct {
// store is the central concurrent hashmap where key-value items are stored
store store
// policy determines what gets let in to the cache and what gets kicked out
policy policy
// getBuf is a custom ring buffer implementation that gets pushed to when
// keys are read
getBuf *ringBuffer
// setBuf is a buffer allowing us to batch/drop Sets during times of high
// contention
setBuf chan *item
// onEvict is called for item evictions
onEvict func(uint64, uint64, interface{}, int64)
// KeyToHash function is used to customize the key hashing algorithm.
// Each key will be hashed using the provided function. If keyToHash value
// is not set, the default keyToHash function is used.
keyToHash func(interface{}) (uint64, uint64)
// stop is used to stop the processItems goroutine
stop chan struct{}
// cost calculates cost from a value
cost func(value interface{}) int64
// Metrics contains a running log of important statistics like hits, misses,
// and dropped items
Metrics *Metrics
}
// Config is passed to NewCache for creating new Cache instances.
type Config struct {
// NumCounters determines the number of counters (keys) to keep that hold
// access frequency information. It's generally a good idea to have more
// counters than the max cache capacity, as this will improve eviction
// accuracy and subsequent hit ratios.
//
// For example, if you expect your cache to hold 1,000,000 items when full,
// NumCounters should be 10,000,000 (10x). Each counter takes up 4 bits, so
// keeping 10,000,000 counters would require 5MB of memory.
NumCounters int64
// MaxCost can be considered as the cache capacity, in whatever units you
// choose to use.
//
// For example, if you want the cache to have a max capacity of 100MB, you
// would set MaxCost to 100,000,000 and pass an item's number of bytes as
// the `cost` parameter for calls to Set. If new items are accepted, the
// eviction process will take care of making room for the new item and not
// overflowing the MaxCost value.
MaxCost int64
// BufferItems determines the size of Get buffers.
//
// Unless you have a rare use case, using `64` as the BufferItems value
// results in good performance.
BufferItems int64
// Metrics determines whether cache statistics are kept during the cache's
// lifetime. There *is* some overhead to keeping statistics, so you should
// only set this flag to true when testing or throughput performance isn't a
// major factor.
Metrics bool
// OnEvict is called for every eviction and passes the hashed key, value,
// and cost to the function.
OnEvict func(key, conflict uint64, value interface{}, cost int64)
// KeyToHash function is used to customize the key hashing algorithm.
// Each key will be hashed using the provided function. If keyToHash value
// is not set, the default keyToHash function is used.
KeyToHash func(key interface{}) (uint64, uint64)
// Cost evaluates a value and outputs a corresponding cost. This function
// is ran after Set is called for a new item or an item update with a cost
// param of 0.
Cost func(value interface{}) int64
}
type itemFlag byte
const (
itemNew itemFlag = iota
itemDelete
itemUpdate
)
// item is passed to setBuf so items can eventually be added to the cache
type item struct {
flag itemFlag
key uint64
conflict uint64
value interface{}
cost int64
}
// NewCache returns a new Cache instance and any configuration errors, if any.
func NewCache(config *Config) (*Cache, error) {
switch {
case config.NumCounters == 0:
return nil, errors.New("NumCounters can't be zero.")
case config.MaxCost == 0:
return nil, errors.New("MaxCost can't be zero.")
case config.BufferItems == 0:
return nil, errors.New("BufferItems can't be zero.")
}
policy := newPolicy(config.NumCounters, config.MaxCost)
cache := &Cache{
store: newStore(),
policy: policy,
getBuf: newRingBuffer(policy, config.BufferItems),
setBuf: make(chan *item, setBufSize),
onEvict: config.OnEvict,
keyToHash: config.KeyToHash,
stop: make(chan struct{}),
cost: config.Cost,
}
if cache.keyToHash == nil {
cache.keyToHash = z.KeyToHash
}
if config.Metrics {
cache.collectMetrics()
}
// NOTE: benchmarks seem to show that performance decreases the more
// goroutines we have running cache.processItems(), so 1 should
// usually be sufficient
go cache.processItems()
return cache, nil
}
// Get returns the value (if any) and a boolean representing whether the
// value was found or not. The value can be nil and the boolean can be true at
// the same time.
func (c *Cache) Get(key interface{}) (interface{}, bool) {
if c == nil || key == nil {
return nil, false
}
keyHash, conflictHash := c.keyToHash(key)
c.getBuf.Push(keyHash)
value, ok := c.store.Get(keyHash, conflictHash)
if ok {
c.Metrics.add(hit, keyHash, 1)
} else {
c.Metrics.add(miss, keyHash, 1)
}
return value, ok
}
// Set attempts to add the key-value item to the cache. If it returns false,
// then the Set was dropped and the key-value item isn't added to the cache. If
// it returns true, there's still a chance it could be dropped by the policy if
// its determined that the key-value item isn't worth keeping, but otherwise the
// item will be added and other items will be evicted in order to make room.
//
// To dynamically evaluate the items cost using the Config.Coster function, set
// the cost parameter to 0 and Coster will be ran when needed in order to find
// the items true cost.
func (c *Cache) Set(key, value interface{}, cost int64) bool {
if c == nil || key == nil {
return false
}
keyHash, conflictHash := c.keyToHash(key)
i := &item{
flag: itemNew,
key: keyHash,
conflict: conflictHash,
value: value,
cost: cost,
}
// attempt to immediately update hashmap value and set flag to update so the
// cost is eventually updated
if c.store.Update(keyHash, conflictHash, i.value) {
i.flag = itemUpdate
}
// attempt to send item to policy
select {
case c.setBuf <- i:
return true
default:
c.Metrics.add(dropSets, keyHash, 1)
return false
}
}
// Del deletes the key-value item from the cache if it exists.
func (c *Cache) Del(key interface{}) {
if c == nil || key == nil {
return
}
keyHash, conflictHash := c.keyToHash(key)
c.setBuf <- &item{
flag: itemDelete,
key: keyHash,
conflict: conflictHash,
}
}
// Close stops all goroutines and closes all channels.
func (c *Cache) Close() {
// block until processItems goroutine is returned
c.stop <- struct{}{}
close(c.stop)
close(c.setBuf)
c.policy.Close()
}
// Clear empties the hashmap and zeroes all policy counters. Note that this is
// not an atomic operation (but that shouldn't be a problem as it's assumed that
// Set/Get calls won't be occurring until after this).
func (c *Cache) Clear() {
// block until processItems goroutine is returned
c.stop <- struct{}{}
// swap out the setBuf channel
c.setBuf = make(chan *item, setBufSize)
// clear value hashmap and policy data
c.policy.Clear()
c.store.Clear()
// only reset metrics if they're enabled
if c.Metrics != nil {
c.Metrics.Clear()
}
// restart processItems goroutine
go c.processItems()
}
// processItems is ran by goroutines processing the Set buffer.
func (c *Cache) processItems() {
for {
select {
case i := <-c.setBuf:
// calculate item cost value if new or update
if i.cost == 0 && c.cost != nil && i.flag != itemDelete {
i.cost = c.cost(i.value)
}
switch i.flag {
case itemNew:
victims, added := c.policy.Add(i.key, i.cost)
if added {
c.store.Set(i.key, i.conflict, i.value)
c.Metrics.add(keyAdd, i.key, 1)
c.Metrics.add(costAdd, i.key, uint64(i.cost))
}
for _, victim := range victims {
victim.conflict, victim.value = c.store.Del(victim.key, 0)
if c.onEvict != nil {
c.onEvict(victim.key, victim.conflict, victim.value, victim.cost)
}
c.Metrics.add(keyEvict, victim.key, 1)
c.Metrics.add(costEvict, victim.key, uint64(victim.cost))
}
case itemUpdate:
c.policy.Update(i.key, i.cost)
case itemDelete:
c.policy.Del(i.key)
c.store.Del(i.key, i.conflict)
}
case <-c.stop:
return
}
}
}
// collectMetrics just creates a new *Metrics instance and adds the pointers
// to the cache and policy instances.
func (c *Cache) collectMetrics() {
c.Metrics = newMetrics()
c.policy.CollectMetrics(c.Metrics)
}
type metricType int
const (
// The following 2 keep track of hits and misses.
hit = iota
miss
// The following 3 keep track of number of keys added, updated and evicted.
keyAdd
keyUpdate
keyEvict
// The following 2 keep track of cost of keys added and evicted.
costAdd
costEvict
// The following keep track of how many sets were dropped or rejected later.
dropSets
rejectSets
// The following 2 keep track of how many gets were kept and dropped on the
// floor.
dropGets
keepGets
// This should be the final enum. Other enums should be set before this.
doNotUse
)
func stringFor(t metricType) string {
switch t {
case hit:
return "hit"
case miss:
return "miss"
case keyAdd:
return "keys-added"
case keyUpdate:
return "keys-updated"
case keyEvict:
return "keys-evicted"
case costAdd:
return "cost-added"
case costEvict:
return "cost-evicted"
case dropSets:
return "sets-dropped"
case rejectSets:
return "sets-rejected" // by policy.
case dropGets:
return "gets-dropped"
case keepGets:
return "gets-kept"
default:
return "unidentified"
}
}
// Metrics is a snapshot of performance statistics for the lifetime of a cache
// instance.
type Metrics struct {
all [doNotUse][]*uint64
}
func newMetrics() *Metrics {
s := &Metrics{}
for i := 0; i < doNotUse; i++ {
s.all[i] = make([]*uint64, 256)
slice := s.all[i]
for j := range slice {
slice[j] = new(uint64)
}
}
return s
}
func (p *Metrics) add(t metricType, hash, delta uint64) {
if p == nil {
return
}
valp := p.all[t]
// Avoid false sharing by padding at least 64 bytes of space between two
// atomic counters which would be incremented.
idx := (hash % 25) * 10
atomic.AddUint64(valp[idx], delta)
}
func (p *Metrics) get(t metricType) uint64 {
if p == nil {
return 0
}
valp := p.all[t]
var total uint64
for i := range valp {
total += atomic.LoadUint64(valp[i])
}
return total
}
// Hits is the number of Get calls where a value was found for the corresponding
// key.
func (p *Metrics) Hits() uint64 {
return p.get(hit)
}
// Misses is the number of Get calls where a value was not found for the
// corresponding key.
func (p *Metrics) Misses() uint64 {
return p.get(miss)
}
// KeysAdded is the total number of Set calls where a new key-value item was
// added.
func (p *Metrics) KeysAdded() uint64 {
return p.get(keyAdd)
}
// KeysUpdated is the total number of Set calls where the value was updated.
func (p *Metrics) KeysUpdated() uint64 {
return p.get(keyUpdate)
}
// KeysEvicted is the total number of keys evicted.
func (p *Metrics) KeysEvicted() uint64 {
return p.get(keyEvict)
}
// CostAdded is the sum of costs that have been added (successful Set calls).
func (p *Metrics) CostAdded() uint64 {
return p.get(costAdd)
}
// CostEvicted is the sum of all costs that have been evicted.
func (p *Metrics) CostEvicted() uint64 {
return p.get(costEvict)
}
// SetsDropped is the number of Set calls that don't make it into internal
// buffers (due to contention or some other reason).
func (p *Metrics) SetsDropped() uint64 {
return p.get(dropSets)
}
// SetsRejected is the number of Set calls rejected by the policy (TinyLFU).
func (p *Metrics) SetsRejected() uint64 {
return p.get(rejectSets)
}
// GetsDropped is the number of Get counter increments that are dropped
// internally.
func (p *Metrics) GetsDropped() uint64 {
return p.get(dropGets)
}
// GetsKept is the number of Get counter increments that are kept.
func (p *Metrics) GetsKept() uint64 {
return p.get(keepGets)
}
// Ratio is the number of Hits over all accesses (Hits + Misses). This is the
// percentage of successful Get calls.
func (p *Metrics) Ratio() float64 {
if p == nil {
return 0.0
}
hits, misses := p.get(hit), p.get(miss)
if hits == 0 && misses == 0 {
return 0.0
}
return float64(hits) / float64(hits+misses)
}
func (p *Metrics) Clear() {
if p == nil {
return
}
for i := 0; i < doNotUse; i++ {
for j := range p.all[i] {
atomic.StoreUint64(p.all[i][j], 0)
}
}
}
func (p *Metrics) String() string {
if p == nil {
return ""
}
var buf bytes.Buffer
for i := 0; i < doNotUse; i++ {
t := metricType(i)
fmt.Fprintf(&buf, "%s: %d ", stringFor(t), p.get(t))
}
fmt.Fprintf(&buf, "gets-total: %d ", p.get(hit)+p.get(miss))
fmt.Fprintf(&buf, "hit-ratio: %.2f", p.Ratio())
return buf.String()
}
ristretto-0.0.1/cache_test.go 0000664 0000000 0000000 00000022664 13563305047 0016220 0 ustar 00root root 0000000 0000000 package ristretto
import (
"math/rand"
"strings"
"sync"
"testing"
"time"
"github.com/dgraph-io/ristretto/z"
)
var wait time.Duration = time.Millisecond * 10
func TestCacheKeyToHash(t *testing.T) {
keyToHashCount := 0
c, err := NewCache(&Config{
NumCounters: 100,
MaxCost: 10,
BufferItems: 64,
KeyToHash: func(key interface{}) (uint64, uint64) {
keyToHashCount++
return z.KeyToHash(key)
},
})
if err != nil {
panic(err)
}
if c.Set(1, 1, 1) {
time.Sleep(wait)
if val, ok := c.Get(1); val == nil || !ok {
t.Fatal("get should be successful")
} else {
c.Del(1)
}
}
if keyToHashCount != 3 {
t.Fatal("custom KeyToHash function should be called three times")
}
}
func TestCacheMaxCost(t *testing.T) {
charset := "abcdefghijklmnopqrstuvwxyz0123456789"
key := func() []byte {
k := make([]byte, 2)
for i := range k {
k[i] = charset[rand.Intn(len(charset))]
}
return k
}
c, err := NewCache(&Config{
NumCounters: 12960, // 36^2 * 10
MaxCost: 1e6, // 1mb
BufferItems: 64,
Metrics: true,
})
if err != nil {
panic(err)
}
stop := make(chan struct{}, 8)
for i := 0; i < 8; i++ {
go func() {
for {
select {
case <-stop:
return
default:
time.Sleep(time.Millisecond)
k := key()
if _, ok := c.Get(k); !ok {
val := ""
if rand.Intn(100) < 10 {
val = "test"
} else {
val = strings.Repeat("a", 1000)
}
c.Set(key(), val, int64(2+len(val)))
}
}
}
}()
}
for i := 0; i < 20; i++ {
time.Sleep(time.Second)
cacheCost := c.Metrics.CostAdded() - c.Metrics.CostEvicted()
t.Logf("total cache cost: %d\n", cacheCost)
if float64(cacheCost) > float64(1e6*1.05) {
t.Fatal("cache cost exceeding MaxCost")
}
}
for i := 0; i < 8; i++ {
stop <- struct{}{}
}
}
func TestCache(t *testing.T) {
if _, err := NewCache(&Config{
NumCounters: 0,
}); err == nil {
t.Fatal("numCounters can't be 0")
}
if _, err := NewCache(&Config{
NumCounters: 100,
MaxCost: 0,
}); err == nil {
t.Fatal("maxCost can't be 0")
}
if _, err := NewCache(&Config{
NumCounters: 100,
MaxCost: 10,
BufferItems: 0,
}); err == nil {
t.Fatal("bufferItems can't be 0")
}
if c, err := NewCache(&Config{
NumCounters: 100,
MaxCost: 10,
BufferItems: 64,
Metrics: true,
}); c == nil || err != nil {
t.Fatal("config should be good")
}
}
func TestCacheProcessItems(t *testing.T) {
m := &sync.Mutex{}
evicted := make(map[uint64]struct{})
c, err := NewCache(&Config{
NumCounters: 100,
MaxCost: 10,
BufferItems: 64,
Cost: func(value interface{}) int64 {
return int64(value.(int))
},
OnEvict: func(key, conflict uint64, value interface{}, cost int64) {
m.Lock()
defer m.Unlock()
evicted[key] = struct{}{}
},
})
if err != nil {
panic(err)
}
var key uint64
var conflict uint64
key, conflict = z.KeyToHash(1)
c.setBuf <- &item{
flag: itemNew,
key: key,
conflict: conflict,
value: 1,
cost: 0,
}
time.Sleep(wait)
if !c.policy.Has(1) || c.policy.Cost(1) != 1 {
t.Fatal("cache processItems didn't add new item")
}
key, conflict = z.KeyToHash(1)
c.setBuf <- &item{
flag: itemUpdate,
key: key,
conflict: conflict,
value: 2,
cost: 0,
}
time.Sleep(wait)
if c.policy.Cost(1) != 2 {
t.Fatal("cache processItems didn't update item cost")
}
key, conflict = z.KeyToHash(1)
c.setBuf <- &item{
flag: itemDelete,
key: key,
conflict: conflict,
}
time.Sleep(wait)
key, conflict = z.KeyToHash(1)
if val, ok := c.store.Get(key, conflict); val != nil || ok {
t.Fatal("cache processItems didn't delete item")
}
if c.policy.Has(1) {
t.Fatal("cache processItems didn't delete item")
}
key, conflict = z.KeyToHash(2)
c.setBuf <- &item{
flag: itemNew,
key: key,
conflict: conflict,
value: 2,
cost: 3,
}
key, conflict = z.KeyToHash(3)
c.setBuf <- &item{
flag: itemNew,
key: key,
conflict: conflict,
value: 3,
cost: 3,
}
key, conflict = z.KeyToHash(4)
c.setBuf <- &item{
flag: itemNew,
key: key,
conflict: conflict,
value: 3,
cost: 3,
}
key, conflict = z.KeyToHash(5)
c.setBuf <- &item{
flag: itemNew,
key: key,
conflict: conflict,
value: 3,
cost: 5,
}
time.Sleep(wait)
m.Lock()
if len(evicted) == 0 {
m.Unlock()
t.Fatal("cache processItems not evicting or calling OnEvict")
}
m.Unlock()
defer func() {
if r := recover(); r == nil {
t.Fatal("cache processItems didn't stop")
}
}()
c.Close()
c.setBuf <- &item{flag: itemNew}
}
func TestCacheGet(t *testing.T) {
c, err := NewCache(&Config{
NumCounters: 100,
MaxCost: 10,
BufferItems: 64,
Metrics: true,
})
if err != nil {
panic(err)
}
key, conflict := z.KeyToHash(1)
c.store.Set(key, conflict, 1)
if val, ok := c.Get(1); val == nil || !ok {
t.Fatal("get should be successful")
}
if val, ok := c.Get(2); val != nil || ok {
t.Fatal("get should not be successful")
}
// 0.5 and not 1.0 because we tried Getting each item twice
if c.Metrics.Ratio() != 0.5 {
t.Fatal("get should record metrics")
}
c = nil
if val, ok := c.Get(0); val != nil || ok {
t.Fatal("get should not be successful with nil cache")
}
}
func TestCacheSet(t *testing.T) {
c, err := NewCache(&Config{
NumCounters: 100,
MaxCost: 10,
BufferItems: 64,
Metrics: true,
})
if err != nil {
panic(err)
}
if c.Set(1, 1, 1) {
time.Sleep(wait)
if val, ok := c.Get(1); val == nil || val.(int) != 1 || !ok {
t.Fatal("set/get returned wrong value")
}
} else {
if val, ok := c.Get(1); val != nil || ok {
t.Fatal("set was dropped but value still added")
}
}
c.Set(1, 2, 2)
val, ok := c.store.Get(z.KeyToHash(1))
if val == nil || val.(int) != 2 || !ok {
t.Fatal("set/update was unsuccessful")
}
c.stop <- struct{}{}
for i := 0; i < setBufSize; i++ {
key, conflict := z.KeyToHash(1)
c.setBuf <- &item{
flag: itemUpdate,
key: key,
conflict: conflict,
value: 1,
cost: 1,
}
}
if c.Set(2, 2, 1) {
t.Fatal("set should be dropped with full setBuf")
}
if c.Metrics.SetsDropped() != 1 {
t.Fatal("set should track dropSets")
}
close(c.setBuf)
close(c.stop)
c = nil
if c.Set(1, 1, 1) {
t.Fatal("set shouldn't be successful with nil cache")
}
}
func TestCacheDel(t *testing.T) {
c, err := NewCache(&Config{
NumCounters: 100,
MaxCost: 10,
BufferItems: 64,
})
if err != nil {
panic(err)
}
c.Set(1, 1, 1)
c.Del(1)
time.Sleep(wait)
if val, ok := c.Get(1); val != nil || ok {
t.Fatal("del didn't delete")
}
c = nil
defer func() {
if r := recover(); r != nil {
t.Fatal("del panic with nil cache")
}
}()
c.Del(1)
}
func TestCacheClear(t *testing.T) {
c, err := NewCache(&Config{
NumCounters: 100,
MaxCost: 10,
BufferItems: 64,
Metrics: true,
})
if err != nil {
panic(err)
}
for i := 0; i < 10; i++ {
c.Set(i, i, 1)
}
time.Sleep(wait)
if c.Metrics.KeysAdded() != 10 {
t.Fatal("range of sets not being processed")
}
c.Clear()
if c.Metrics.KeysAdded() != 0 {
t.Fatal("clear didn't reset metrics")
}
for i := 0; i < 10; i++ {
if val, ok := c.Get(i); val != nil || ok {
t.Fatal("clear didn't delete values")
}
}
}
func TestCacheMetrics(t *testing.T) {
c, err := NewCache(&Config{
NumCounters: 100,
MaxCost: 10,
BufferItems: 64,
Metrics: true,
})
if err != nil {
panic(err)
}
for i := 0; i < 10; i++ {
c.Set(i, i, 1)
}
time.Sleep(wait)
m := c.Metrics
if m.KeysAdded() != 10 {
t.Fatal("metrics exporting incorrect fields")
}
}
func TestMetrics(t *testing.T) {
newMetrics()
}
func TestMetricsAddGet(t *testing.T) {
m := newMetrics()
m.add(hit, 1, 1)
m.add(hit, 2, 2)
m.add(hit, 3, 3)
if m.Hits() != 6 {
t.Fatal("add/get error")
}
m = nil
m.add(hit, 1, 1)
if m.Hits() != 0 {
t.Fatal("get with nil struct should return 0")
}
}
func TestMetricsRatio(t *testing.T) {
m := newMetrics()
if m.Ratio() != 0 {
t.Fatal("ratio with no hits or misses should be 0")
}
m.add(hit, 1, 1)
m.add(hit, 2, 2)
m.add(miss, 1, 1)
m.add(miss, 2, 2)
if m.Ratio() != 0.5 {
t.Fatal("ratio incorrect")
}
m = nil
if m.Ratio() != 0.0 {
t.Fatal("ratio with a nil struct should return 0")
}
}
func TestMetricsString(t *testing.T) {
m := newMetrics()
m.add(hit, 1, 1)
m.add(miss, 1, 1)
m.add(keyAdd, 1, 1)
m.add(keyUpdate, 1, 1)
m.add(keyEvict, 1, 1)
m.add(costAdd, 1, 1)
m.add(costEvict, 1, 1)
m.add(dropSets, 1, 1)
m.add(rejectSets, 1, 1)
m.add(dropGets, 1, 1)
m.add(keepGets, 1, 1)
if m.Hits() != 1 || m.Misses() != 1 || m.Ratio() != 0.5 || m.KeysAdded() != 1 ||
m.KeysUpdated() != 1 || m.KeysEvicted() != 1 || m.CostAdded() != 1 ||
m.CostEvicted() != 1 || m.SetsDropped() != 1 || m.SetsRejected() != 1 ||
m.GetsDropped() != 1 || m.GetsKept() != 1 {
t.Fatal("Metrics wrong value(s)")
}
if len(m.String()) == 0 {
t.Fatal("Metrics.String() empty")
}
m = nil
if len(m.String()) != 0 {
t.Fatal("Metrics.String() should be empty with nil struct")
}
if stringFor(doNotUse) != "unidentified" {
t.Fatal("stringFor() not handling doNotUse type")
}
}
func TestCacheMetricsClear(t *testing.T) {
c, err := NewCache(&Config{
NumCounters: 100,
MaxCost: 10,
BufferItems: 64,
Metrics: true,
})
if err != nil {
panic(err)
}
c.Set(1, 1, 1)
stop := make(chan struct{})
go func() {
for {
select {
case <-stop:
return
default:
c.Get(1)
}
}
}()
time.Sleep(wait)
c.Clear()
stop <- struct{}{}
c.Metrics = nil
c.Metrics.Clear()
}
ristretto-0.0.1/go.mod 0000664 0000000 0000000 00000000235 13563305047 0014663 0 ustar 00root root 0000000 0000000 module github.com/dgraph-io/ristretto
go 1.12
require (
github.com/cespare/xxhash v1.1.0
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2
)
ristretto-0.0.1/go.sum 0000664 0000000 0000000 00000001436 13563305047 0014714 0 ustar 00root root 0000000 0000000 github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
ristretto-0.0.1/policy.go 0000664 0000000 0000000 00000020665 13563305047 0015414 0 ustar 00root root 0000000 0000000 /*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ristretto
import (
"math"
"sync"
"github.com/dgraph-io/ristretto/z"
)
const (
// lfuSample is the number of items to sample when looking at eviction
// candidates. 5 seems to be the most optimal number [citation needed].
lfuSample = 5
)
// policy is the interface encapsulating eviction/admission behavior.
//
// TODO: remove this interface and just rename defaultPolicy to policy, as we
// are probably only going to use/implement/maintain one policy.
type policy interface {
ringConsumer
// Add attempts to Add the key-cost pair to the Policy. It returns a slice
// of evicted keys and a bool denoting whether or not the key-cost pair
// was added. If it returns true, the key should be stored in cache.
Add(uint64, int64) ([]*item, bool)
// Has returns true if the key exists in the Policy.
Has(uint64) bool
// Del deletes the key from the Policy.
Del(uint64)
// Cap returns the available capacity.
Cap() int64
// Close stops all goroutines and closes all channels.
Close()
// Update updates the cost value for the key.
Update(uint64, int64)
// Cost returns the cost value of a key or -1 if missing.
Cost(uint64) int64
// Optionally, set stats object to track how policy is performing.
CollectMetrics(*Metrics)
// Clear zeroes out all counters and clears hashmaps.
Clear()
}
func newPolicy(numCounters, maxCost int64) policy {
return newDefaultPolicy(numCounters, maxCost)
}
type defaultPolicy struct {
sync.Mutex
admit *tinyLFU
evict *sampledLFU
itemsCh chan []uint64
stop chan struct{}
metrics *Metrics
}
func newDefaultPolicy(numCounters, maxCost int64) *defaultPolicy {
p := &defaultPolicy{
admit: newTinyLFU(numCounters),
evict: newSampledLFU(maxCost),
itemsCh: make(chan []uint64, 3),
stop: make(chan struct{}),
}
go p.processItems()
return p
}
func (p *defaultPolicy) CollectMetrics(metrics *Metrics) {
p.metrics = metrics
p.evict.metrics = metrics
}
type policyPair struct {
key uint64
cost int64
}
func (p *defaultPolicy) processItems() {
for {
select {
case items := <-p.itemsCh:
p.Lock()
p.admit.Push(items)
p.Unlock()
case <-p.stop:
return
}
}
}
func (p *defaultPolicy) Push(keys []uint64) bool {
if len(keys) == 0 {
return true
}
select {
case p.itemsCh <- keys:
p.metrics.add(keepGets, keys[0], uint64(len(keys)))
return true
default:
p.metrics.add(dropGets, keys[0], uint64(len(keys)))
return false
}
}
func (p *defaultPolicy) Add(key uint64, cost int64) ([]*item, bool) {
p.Lock()
defer p.Unlock()
// can't add an item bigger than entire cache
if cost > p.evict.maxCost {
return nil, false
}
// we don't need to go any further if the item is already in the cache
if has := p.evict.updateIfHas(key, cost); has {
return nil, true
}
// if we got this far, this key doesn't exist in the cache
//
// calculate the remaining room in the cache (usually bytes)
room := p.evict.roomLeft(cost)
if room >= 0 {
// there's enough room in the cache to store the new item without
// overflowing, so we can do that now and stop here
p.evict.add(key, cost)
return nil, true
}
// incHits is the hit count for the incoming item
incHits := p.admit.Estimate(key)
// sample is the eviction candidate pool to be filled via random sampling
//
// TODO: perhaps we should use a min heap here. Right now our time
// complexity is N for finding the min. Min heap should bring it down to
// O(lg N).
sample := make([]*policyPair, 0, lfuSample)
// as items are evicted they will be appended to victims
victims := make([]*item, 0)
// delete victims until there's enough space or a minKey is found that has
// more hits than incoming item.
for ; room < 0; room = p.evict.roomLeft(cost) {
// fill up empty slots in sample
sample = p.evict.fillSample(sample)
// find minimally used item in sample
minKey, minHits, minId, minCost := uint64(0), int64(math.MaxInt64), 0, int64(0)
for i, pair := range sample {
// look up hit count for sample key
if hits := p.admit.Estimate(pair.key); hits < minHits {
minKey, minHits, minId, minCost = pair.key, hits, i, pair.cost
}
}
// if the incoming item isn't worth keeping in the policy, reject.
if incHits < minHits {
p.metrics.add(rejectSets, key, 1)
return victims, false
}
// delete the victim from metadata
p.evict.del(minKey)
// delete the victim from sample
sample[minId] = sample[len(sample)-1]
sample = sample[:len(sample)-1]
// store victim in evicted victims slice
victims = append(victims, &item{
key: minKey,
conflict: 0,
cost: minCost,
})
}
p.evict.add(key, cost)
return victims, true
}
func (p *defaultPolicy) Has(key uint64) bool {
p.Lock()
_, exists := p.evict.keyCosts[key]
p.Unlock()
return exists
}
func (p *defaultPolicy) Del(key uint64) {
p.Lock()
p.evict.del(key)
p.Unlock()
}
func (p *defaultPolicy) Cap() int64 {
p.Lock()
capacity := int64(p.evict.maxCost - p.evict.used)
p.Unlock()
return capacity
}
func (p *defaultPolicy) Update(key uint64, cost int64) {
p.Lock()
p.evict.updateIfHas(key, cost)
p.Unlock()
}
func (p *defaultPolicy) Cost(key uint64) int64 {
p.Lock()
if cost, found := p.evict.keyCosts[key]; found {
p.Unlock()
return cost
}
p.Unlock()
return -1
}
func (p *defaultPolicy) Clear() {
p.Lock()
p.admit.clear()
p.evict.clear()
p.Unlock()
}
func (p *defaultPolicy) Close() {
// block until p.processItems goroutine is returned
p.stop <- struct{}{}
close(p.stop)
close(p.itemsCh)
}
// sampledLFU is an eviction helper storing key-cost pairs.
type sampledLFU struct {
keyCosts map[uint64]int64
maxCost int64
used int64
metrics *Metrics
}
func newSampledLFU(maxCost int64) *sampledLFU {
return &sampledLFU{
keyCosts: make(map[uint64]int64),
maxCost: maxCost,
}
}
func (p *sampledLFU) roomLeft(cost int64) int64 {
return p.maxCost - (p.used + cost)
}
func (p *sampledLFU) fillSample(in []*policyPair) []*policyPair {
if len(in) >= lfuSample {
return in
}
for key, cost := range p.keyCosts {
in = append(in, &policyPair{key, cost})
if len(in) >= lfuSample {
return in
}
}
return in
}
func (p *sampledLFU) del(key uint64) {
cost, ok := p.keyCosts[key]
if !ok {
return
}
p.used -= cost
delete(p.keyCosts, key)
}
func (p *sampledLFU) add(key uint64, cost int64) {
p.keyCosts[key] = cost
p.used += cost
}
func (p *sampledLFU) updateIfHas(key uint64, cost int64) bool {
if prev, found := p.keyCosts[key]; found {
// update the cost of an existing key, but don't worry about evicting,
// evictions will be handled the next time a new item is added
p.metrics.add(keyUpdate, key, 1)
p.used += cost - prev
p.keyCosts[key] = cost
return true
}
return false
}
func (p *sampledLFU) clear() {
p.used = 0
p.keyCosts = make(map[uint64]int64)
}
// tinyLFU is an admission helper that keeps track of access frequency using
// tiny (4-bit) counters in the form of a count-min sketch.
// tinyLFU is NOT thread safe.
type tinyLFU struct {
freq *cmSketch
door *z.Bloom
incrs int64
resetAt int64
}
func newTinyLFU(numCounters int64) *tinyLFU {
return &tinyLFU{
freq: newCmSketch(numCounters),
door: z.NewBloomFilter(float64(numCounters), 0.01),
resetAt: numCounters,
}
}
func (p *tinyLFU) Push(keys []uint64) {
for _, key := range keys {
p.Increment(key)
}
}
func (p *tinyLFU) Estimate(key uint64) int64 {
hits := p.freq.Estimate(key)
if p.door.Has(key) {
hits += 1
}
return hits
}
func (p *tinyLFU) Increment(key uint64) {
// flip doorkeeper bit if not already
if added := p.door.AddIfNotHas(key); !added {
// increment count-min counter if doorkeeper bit is already set.
p.freq.Increment(key)
}
p.incrs++
if p.incrs >= p.resetAt {
p.reset()
}
}
func (p *tinyLFU) reset() {
// Zero out incrs.
p.incrs = 0
// clears doorkeeper bits
p.door.Clear()
// halves count-min counters
p.freq.Reset()
}
func (p *tinyLFU) clear() {
p.incrs = 0
p.door.Clear()
p.freq.Clear()
}
ristretto-0.0.1/policy_test.go 0000664 0000000 0000000 00000014057 13563305047 0016451 0 ustar 00root root 0000000 0000000 package ristretto
import (
"testing"
"time"
)
func TestPolicy(t *testing.T) {
defer func() {
if r := recover(); r != nil {
t.Fatal("newPolicy failed")
}
}()
newPolicy(100, 10)
}
func TestPolicyMetrics(t *testing.T) {
p := newDefaultPolicy(100, 10)
p.CollectMetrics(newMetrics())
if p.metrics == nil || p.evict.metrics == nil {
t.Fatal("policy metrics initialization error")
}
}
func TestPolicyProcessItems(t *testing.T) {
p := newDefaultPolicy(100, 10)
p.itemsCh <- []uint64{1, 2, 2}
time.Sleep(wait)
p.Lock()
if p.admit.Estimate(2) != 2 || p.admit.Estimate(1) != 1 {
p.Unlock()
t.Fatal("policy processItems not pushing to tinylfu counters")
}
p.Unlock()
p.stop <- struct{}{}
p.itemsCh <- []uint64{3, 3, 3}
time.Sleep(wait)
p.Lock()
if p.admit.Estimate(3) != 0 {
p.Unlock()
t.Fatal("policy processItems not stopping")
}
p.Unlock()
}
func TestPolicyPush(t *testing.T) {
p := newDefaultPolicy(100, 10)
if !p.Push([]uint64{}) {
t.Fatal("push empty slice should be good")
}
keepCount := 0
for i := 0; i < 10; i++ {
if p.Push([]uint64{1, 2, 3, 4, 5}) {
keepCount++
}
}
if keepCount == 0 {
t.Fatal("push dropped everything")
}
}
func TestPolicyAdd(t *testing.T) {
p := newDefaultPolicy(1000, 100)
if victims, added := p.Add(1, 101); victims != nil || added {
t.Fatal("can't add an item bigger than entire cache")
}
p.Lock()
p.evict.add(1, 1)
p.admit.Increment(1)
p.admit.Increment(2)
p.admit.Increment(3)
p.Unlock()
if victims, added := p.Add(1, 1); victims != nil || !added {
t.Fatal("item should already exist")
}
if victims, added := p.Add(2, 20); victims != nil || !added {
t.Fatal("item should be added with no eviction")
}
if victims, added := p.Add(3, 90); victims == nil || !added {
t.Fatal("item should be added with eviction")
}
if victims, added := p.Add(4, 20); victims == nil || added {
t.Fatal("item should not be added")
}
}
func TestPolicyHas(t *testing.T) {
p := newDefaultPolicy(100, 10)
p.Add(1, 1)
if !p.Has(1) {
t.Fatal("policy should have key")
}
if p.Has(2) {
t.Fatal("policy shouldn't have key")
}
}
func TestPolicyDel(t *testing.T) {
p := newDefaultPolicy(100, 10)
p.Add(1, 1)
p.Del(1)
p.Del(2)
if p.Has(1) {
t.Fatal("del didn't delete")
}
if p.Has(2) {
t.Fatal("policy shouldn't have key")
}
}
func TestPolicyCap(t *testing.T) {
p := newDefaultPolicy(100, 10)
p.Add(1, 1)
if p.Cap() != 9 {
t.Fatal("cap returned wrong value")
}
}
func TestPolicyUpdate(t *testing.T) {
p := newDefaultPolicy(100, 10)
p.Add(1, 1)
p.Update(1, 2)
p.Lock()
if p.evict.keyCosts[1] != 2 {
p.Unlock()
t.Fatal("update failed")
}
p.Unlock()
}
func TestPolicyCost(t *testing.T) {
p := newDefaultPolicy(100, 10)
p.Add(1, 2)
if p.Cost(1) != 2 {
t.Fatal("cost for existing key returned wrong value")
}
if p.Cost(2) != -1 {
t.Fatal("cost for missing key returned wrong value")
}
}
func TestPolicyClear(t *testing.T) {
p := newDefaultPolicy(100, 10)
p.Add(1, 1)
p.Add(2, 2)
p.Add(3, 3)
p.Clear()
if p.Cap() != 10 || p.Has(1) || p.Has(2) || p.Has(3) {
t.Fatal("clear didn't clear properly")
}
}
func TestPolicyClose(t *testing.T) {
defer func() {
if r := recover(); r == nil {
t.Fatal("close didn't close channels")
}
}()
p := newDefaultPolicy(100, 10)
p.Add(1, 1)
p.Close()
p.itemsCh <- []uint64{1}
}
func TestSampledLFUAdd(t *testing.T) {
e := newSampledLFU(4)
e.add(1, 1)
e.add(2, 2)
e.add(3, 1)
if e.used != 4 {
t.Fatal("used not being incremented")
}
if e.keyCosts[2] != 2 {
t.Fatal("keyCosts not being updated")
}
}
func TestSampledLFUDel(t *testing.T) {
e := newSampledLFU(4)
e.add(1, 1)
e.add(2, 2)
e.del(2)
if e.used != 1 {
t.Fatal("del not updating used field")
}
if _, ok := e.keyCosts[2]; ok {
t.Fatal("del not deleting value from keyCosts")
}
e.del(4)
}
func TestSampledLFUUpdate(t *testing.T) {
e := newSampledLFU(4)
e.add(1, 1)
if !e.updateIfHas(1, 2) {
t.Fatal("update should be possible")
}
if e.used != 2 {
t.Fatal("update not changing used field")
}
if e.updateIfHas(2, 2) {
t.Fatal("update shouldn't be possible")
}
}
func TestSampledLFUClear(t *testing.T) {
e := newSampledLFU(4)
e.add(1, 1)
e.add(2, 2)
e.add(3, 1)
e.clear()
if len(e.keyCosts) != 0 || e.used != 0 {
t.Fatal("clear not deleting keyCosts or zeroing used field")
}
}
func TestSampledLFURoom(t *testing.T) {
e := newSampledLFU(16)
e.add(1, 1)
e.add(2, 2)
e.add(3, 3)
if e.roomLeft(4) != 6 {
t.Fatal("roomLeft returning wrong value")
}
}
func TestSampledLFUSample(t *testing.T) {
e := newSampledLFU(16)
e.add(4, 4)
e.add(5, 5)
sample := e.fillSample([]*policyPair{
{1, 1},
{2, 2},
{3, 3},
})
k := sample[len(sample)-1].key
if len(sample) != 5 || k == 1 || k == 2 || k == 3 {
t.Fatal("fillSample not filling properly")
}
if len(sample) != len(e.fillSample(sample)) {
t.Fatal("fillSample mutating full sample")
}
e.del(5)
if sample = e.fillSample(sample[:len(sample)-2]); len(sample) != 4 {
t.Fatal("fillSample not returning sample properly")
}
}
func TestTinyLFUIncrement(t *testing.T) {
a := newTinyLFU(4)
a.Increment(1)
a.Increment(1)
a.Increment(1)
if !a.door.Has(1) {
t.Fatal("doorkeeper bit not set")
}
if a.freq.Estimate(1) != 2 {
t.Fatal("incorrect counter value")
}
a.Increment(1)
if a.door.Has(1) {
t.Fatal("doorkeeper bit set after reset")
}
if a.freq.Estimate(1) != 1 {
t.Fatal("counter value not halved after reset")
}
}
func TestTinyLFUEstimate(t *testing.T) {
a := newTinyLFU(8)
a.Increment(1)
a.Increment(1)
a.Increment(1)
if a.Estimate(1) != 3 {
t.Fatal("estimate value incorrect")
}
if a.Estimate(2) != 0 {
t.Fatal("estimate value should be 0")
}
}
func TestTinyLFUPush(t *testing.T) {
a := newTinyLFU(16)
a.Push([]uint64{1, 2, 2, 3, 3, 3})
if a.Estimate(1) != 1 || a.Estimate(2) != 2 || a.Estimate(3) != 3 {
t.Fatal("push didn't increment counters properly")
}
if a.incrs != 6 {
t.Fatal("incrs not being incremented")
}
}
func TestTinyLFUClear(t *testing.T) {
a := newTinyLFU(16)
a.Push([]uint64{1, 3, 3, 3})
a.clear()
if a.incrs != 0 || a.Estimate(3) != 0 {
t.Fatal("clear not clearing")
}
}
ristretto-0.0.1/ring.go 0000664 0000000 0000000 00000005356 13563305047 0015054 0 ustar 00root root 0000000 0000000 /*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ristretto
import (
"sync"
)
// ringConsumer is the user-defined object responsible for receiving and
// processing items in batches when buffers are drained.
type ringConsumer interface {
Push([]uint64) bool
}
// ringStripe is a singular ring buffer that is not concurrent safe.
type ringStripe struct {
cons ringConsumer
data []uint64
capa int
}
func newRingStripe(cons ringConsumer, capa int64) *ringStripe {
return &ringStripe{
cons: cons,
data: make([]uint64, 0, capa),
capa: int(capa),
}
}
// Push appends an item in the ring buffer and drains (copies items and
// sends to Consumer) if full.
func (s *ringStripe) Push(item uint64) {
s.data = append(s.data, item)
// if we should drain
if len(s.data) >= s.capa {
// Send elements to consumer. Create a new one.
if s.cons.Push(s.data) {
s.data = make([]uint64, 0, s.capa)
} else {
s.data = s.data[:0]
}
}
}
// ringBuffer stores multiple buffers (stripes) and distributes Pushed items
// between them to lower contention.
//
// This implements the "batching" process described in the BP-Wrapper paper
// (section III part A).
type ringBuffer struct {
pool *sync.Pool
}
// newRingBuffer returns a striped ring buffer. The Consumer in ringConfig will
// be called when individual stripes are full and need to drain their elements.
func newRingBuffer(cons ringConsumer, capa int64) *ringBuffer {
// LOSSY buffers use a very simple sync.Pool for concurrently reusing
// stripes. We do lose some stripes due to GC (unheld items in sync.Pool
// are cleared), but the performance gains generally outweigh the small
// percentage of elements lost. The performance primarily comes from
// low-level runtime functions used in the standard library that aren't
// available to us (such as runtime_procPin()).
return &ringBuffer{
pool: &sync.Pool{
New: func() interface{} { return newRingStripe(cons, capa) },
},
}
}
// Push adds an element to one of the internal stripes and possibly drains if
// the stripe becomes full.
func (b *ringBuffer) Push(item uint64) {
// reuse or create a new stripe
stripe := b.pool.Get().(*ringStripe)
stripe.Push(item)
b.pool.Put(stripe)
}
ristretto-0.0.1/ring_test.go 0000664 0000000 0000000 00000002332 13563305047 0016102 0 ustar 00root root 0000000 0000000 package ristretto
import (
"sync"
"testing"
)
type testConsumer struct {
push func([]uint64)
save bool
}
func (c *testConsumer) Push(items []uint64) bool {
if c.save {
c.push(items)
return true
}
return false
}
func TestRingDrain(t *testing.T) {
drains := 0
r := newRingBuffer(&testConsumer{
push: func(items []uint64) {
drains++
},
save: true,
}, 1)
for i := 0; i < 100; i++ {
r.Push(uint64(i))
}
if drains != 100 {
t.Fatal("buffers shouldn't be dropped with BufferItems == 1")
}
}
func TestRingReset(t *testing.T) {
drains := 0
r := newRingBuffer(&testConsumer{
push: func(items []uint64) {
drains++
},
save: false,
}, 4)
for i := 0; i < 100; i++ {
r.Push(uint64(i))
}
if drains != 0 {
t.Fatal("testConsumer shouldn't be draining")
}
}
func TestRingConsumer(t *testing.T) {
mu := &sync.Mutex{}
drainItems := make(map[uint64]struct{})
r := newRingBuffer(&testConsumer{
push: func(items []uint64) {
mu.Lock()
defer mu.Unlock()
for i := range items {
drainItems[items[i]] = struct{}{}
}
},
save: true,
}, 4)
for i := 0; i < 100; i++ {
r.Push(uint64(i))
}
l := len(drainItems)
if l == 0 || l > 100 {
t.Fatal("drains not being processed correctly")
}
}
ristretto-0.0.1/sim/ 0000775 0000000 0000000 00000000000 13563305047 0014345 5 ustar 00root root 0000000 0000000 ristretto-0.0.1/sim/gli.lirs.gz 0000664 0000000 0000000 00000013212 13563305047 0016431 0 ustar 00root root 0000000 0000000 ‹…
ªU gli.trc í]®4Ëqß{5¬ê¿ýoÌŒ<üj@€-ÛI€ T_éV'INö§ó ÿȱŽ}\Ç}<Ç{|çAÿó¬ƒ}pÜÏÁ{ð9ô™YGö‘ëÈ}ä9òþãŸG¾cÇâXýK¯cíc]Ǻõë=ÖwìóØ;Çîó>öuìûØÏ±ßcÇuÇ•ãZÇÕÿa×qÝÇõ×{\ßqŸÇÍqç¸×qïãîÿîû¸Ÿã~û;žóx8žÏ:ž}<×ñôoë9ž÷x¾ã=—ãÍñ®ãÝÇ{ï}¼ý»~÷;¾óø8¾ß:¾}|×ñÝÇ÷_/Å[鵜½—³söfÎ^ÍÙ»9{9goçìõœ=7××s^ 7èz‡^¢·è5öéEï¹çz—ô2émÒë¤÷I¯’Þ$½Jz—,ô\¯“Þ'½Pz£ôJéÒK¥·J¯•-¹žëÍÒ«¥wK/—Þ.½^z¿ô‚é
s‰¸çzÉô–é5Ó{¦Moš^5½kzÙÜ~z®÷M/œÞ8½rzçôÒéÓk§÷Î㇦çzõôîéåÓÛ§×OïŸ (^?]=W
å@APeAaP|~ýöƒX)”GÊ#å‘òHy¤Ù=çgÛ·Ÿn?Þ~¾ýh—GÊ#å‘¥z®